Hei.
I use Node.JSc child_processto create bash processes. I am trying to figure out if I am doing I / O binding, CPU binding, or both.
I am using pdftotext to extract the text of 10k + files . To control matches, I use async .
the code:
let spawn = require('child_process').spawn;
let async = require('async');
let files = [
{
path: 'path_for_file'
...
},
...
];
let maxNumber = 5;
async.mapLimit(files, maxNumber, (file, callback) => {
let process = child_process.spawn('pdftotext', [
"-layout",
"-enc",
"UTF-8",
file.path,
"-"
]);
let result = '';
let error = '';
process.stdout.on('data', function(chunk) {
result += chunk.toString();
});
process.stderr.on('error', function(chunk) {
error += chunk.toString();
});
process.on('close', function(data) {
if (error) {
return callback(error, null);
}
callback(null, result);
});
}, function(error, files) {
if (error) {
throw new Error(error);
}
console.log(files);
});
I track Ubuntu usage and my processor and memory are very high when I run the program, and sometimes I only see one file that is processed at a time, is this normal? What could be the problem?
child_process. pdftotext Node.JS? ? , ?
:

Node.JS - child_process's

.
user5526811