I / O binding and processor binding

Hei.

I use Node.JSc child_processto create bash processes. I am trying to figure out if I am doing I / O binding, CPU binding, or both.

I am using pdftotext to extract the text of 10k + files . To control matches, I use async .

the code:

let spawn = require('child_process').spawn;
let async = require('async');
let files = [
  {
    path: 'path_for_file'
    ...
  },
  ...
];
let maxNumber = 5;

async.mapLimit(files, maxNumber, (file, callback) => {
  let process = child_process.spawn('pdftotext', [
    "-layout",
    "-enc",
    "UTF-8",
    file.path,
    "-"
  ]);
  let result = '';
  let error = '';

  process.stdout.on('data', function(chunk) {
    result += chunk.toString();
  });

  process.stderr.on('error', function(chunk) {
    error += chunk.toString();
  });

  process.on('close', function(data) {
    if (error) {
      return callback(error, null);
    }
    callback(null, result);
  });


}, function(error, files) {
  if (error) {
    throw new Error(error);
  }

  console.log(files);
});

I track Ubuntu usage and my processor and memory are very high when I run the program, and sometimes I only see one file that is processed at a time, is this normal? What could be the problem?

child_process. pdftotext Node.JS? ? , ?

:

enter image description here


Node.JS - child_process's

enter image description here

.

+4
2

, ( , ). , 4- , , , , 4 .

. . , .

, , - , .

: .

, 10k , 100 , , . , -. , .

find pdfdir -type f > files
mytest() {
  shuf files | head -n 100 |
    parallel -j $1 pdftotext -layout -enc UTF-8 {} - > out;
}
export -f mytest
# Test with 1..10 parallel jobs. Sort by JobRuntime.
seq 10 | parallel -j1 --joblog - mytest | sort -nk 4

, 100%. , , .

RAM - , ( 754M . < 100M, ), , , .

+6

Node.js I/O. . , . . CPU Node.js, pdftotext , , CPU .

0

All Articles