Q concurrency constraint in Node js

Is there a way to limit the number of concurrent Q promises that will be executed immediately in node js?

I create a web scraper that needs to request and analyze more than 3000 + pages, and without a throttle some of the requests I make do not respond on time, so the connection remains and the necessary answer (html code) becomes unavailable,

To counteract this, I found that the limit on the number of requests I have lost.


I tried the following methods, but to no avail:

I need to query an array of URLs by doing only 1 query at a time and when all the URLs in the array are complete, then return the results to the array.

function processWebsite() { //computed by this stage urls = [u1,u2,u3,u4,l5,u6,u7,u8,u9]; var promises = throttle(urls,1,myfunction); // myfunction returns a Q promise and takes a considerable // amount of time to resolve (approximately 2-5 minutes) Q.all(promises).then(function(results){ //work with the results of the promises array }); } 
+7
javascript concurrency q
source share
3 answers

You can request a new URL in the then() block

 myFunction(urls[0]).then(function(result) { myFunction(urls[1]).then(function(result) { myFunction(urls[2]).then(function(result) { ... }); }); }); 

Of course, this would be his dynamic behavior. I would queue and delete one URL after the promise is resolved. Then make another request. And perhaps there is a hash object related to the URLs to the results.

Second take:

 var urls = ...; var limit = ...; var dequeue = function() { return an array containing up to limit }; var myFunction = function(dequeue) { var urls = dequeue(); $q.all(process urls); }; myFunction(dequeue).then(function(result) { myFunction(dequeue).then(function(result) { myFunction(dequeue).then(function(result) { ... }); }); }); 
0
source share

I would do this, which will iterate over each URL, creating a chain of promises that starts when the previous one completes, and solves with an array of query results.

 return urls.reduce(function(acc, url){ return acc.then(function(results) return myfunction(url).then(function(requestResult){ return results.concat(requestResult) }); }); }, Q.resolve([])); 

You can also include this in the helper:

 var results = map(urls, myfunction); function map(items, fn){ return items.reduce(function(acc, item){ return acc.then(function(results) return fn(item).then(function(result){ return results.concat(result) }); }); }, Q.resolve([]) } 

Please note: bluebird library has a helper to simplify this kind of thing.

 return Bluebird.map(urls, myfunction, {concurrency: 1}); 
+2
source share

Here is my hit on creating a throttle map function for Q.

 function qMap(items, worker, concurrent) { var result = Q.defer(); var work = []; var working = 0; var done = 0; concurrent = parseInt(concurrent, 10) || 1; function getNextIndex() { var i; for (i = 0; i < items.length; i++) { if (typeof work[i] === "undefined") return i; } } function doneWorking() { working--; done++; result.notify( +((100 * done / items.length).toFixed(1)) ); if (!startWorking() && done === items.length) { result.resolve(work); } } function startWorking() { var index = getNextIndex(); if (typeof index !== "undefined" && working < concurrent) { working++; work[index] = worker(items[index]).finally(doneWorking); return true; } } while (startWorking()); return result.promise; } 

He accepts

  • an array of items to work (URLs in your case),
  • a worker (which should be a function that takes an element and returns a promise)
  • and the maximum value of concurrent elements to work at any given time.

He returns

  • promise and
  • resolves an array of specified promises when all workers are finished.

This is not subject, you must check individual promises to determine the general condition of the operation.

In your case, you will use it like this, for example, with 15 simultaneous requests:

 // myfunction returns a Q promise and takes a considerable // amount of time to resolve (approximately 2-5 minutes) qMap(urls, myfunction, 15) .progress(function (percentDone) { console.log("progress: " + percentDone); }) .done(function (urlPromises) { console.log("all done: " + urlPromises); }); 
+1
source share

All Articles