Passing a string stored in memory to pdftotext, antiword, catdoc, etc.

Is it possible to call CLI tools such as pdftotext, antiword, catdoc (scripts to delete text), passing a line instead of a file?

I am currently reading PDF files calling pdftotext with child_process.spawn . I create a new process and save the result in a new variable. Everything is working fine.

I like to pass binary from fs.readFile instead of the file itself:

 fs.readFile('./my.pdf', (error, binary) => { // Call pdftotext with child_process.spawn passing the binary. let event = child_process.spawn('pdftotext', [ // Args here! ]); }); 

How can i do this?

+6
source share
1 answer

This is definitely possible if the command can process the input channel.

spawn returns a ChildProcess , you can pass a string (or binary) into it by writing to its stdin . The string must be converted to ReadableStream first, then you can write the string before stdin CLI pipe .

createReadStream creates a ReadableStream from the file.

The following example loads a pdf file and transfers the contents to pdftotext , and then displays the first few bytes of the result.

 const source = 'http://static.googleusercontent.com/media/research.google.com/en//archive/gfs-sosp2003.pdf' const http = require('http') const spawn = require('child_process').spawn download(source).then(pdftotext) .then(result => console.log(result.slice(0, 77))) function download(url) { return new Promise(resolve => http.get(url, resolve)) } function pdftotext(binaryStream) { //read input from stdin and write to stdout const command = spawn('pdftotext', ['-', '-']) binaryStream.pipe(command.stdin) return new Promise(resolve => { const result = [] command.stdout.on('data', chunk => result.push(chunk.toString())) command.stdout.on('end', () => resolve(result.join(''))) }) } 

There is no way for CLI to read from stdin , you can use named pipes .

Edit: Add another example with named pipes.

Once named pipes are created, you can use them as files. The following example creates temporary named pipes to send input and receive output and display the first few bytes of the result.

 const fs = require('fs') const spawn = require('child_process').spawn pipeCommand({ name: 'wvText', input: fs.createReadStream('document.doc'), }).then(result => console.log(result.slice(0, 77))) function createPipe(name) { return new Promise(resolve => spawn('mkfifo', [name]).on('exit', () => resolve())) } function pipeCommand({name, input}) { const inpipe = 'input.pipe' const outpipe = 'output.pipe' return Promise.all([inpipe, outpipe].map(createPipe)).then(() => { const result = [] fs.createReadStream(outpipe) .on('data', chunk => result.push(chunk.toString())) .on('error', console.log) const command = spawn(name, [inpipe, outpipe]).on('error', console.log) input.pipe(fs.createWriteStream(inpipe).on('error', console.log)) return new Promise(resolve => command.on('exit', () => { [inpipe, outpipe].forEach(name => fs.unlink(name)) resolve(result.join('')) })) }) } 
+2
source

All Articles