Dynamic paging with a nightmare / electronic (scrape page)

I am trying to cross dynamic swap sites using Nightmare / Electron. I see no way to do ... until there are nightmare functions or a way to chain evaluate calls with logic.

Here is a simple code example that just calls the text and returns the hrefs result from page 1. I would like this code to continue for each page in the results.

var Nightmare = require('nightmare'); var vo = require('vo'); vo(function* () { var nightmare = Nightmare({ show: true }); var links = yield nightmare .goto('http://www.google.com') .wait('input[title="Search"]') .click('input[title="Search"]') .type('input[title="Search"]', 'Anequim Project') .click('input[name="btnK"]') .wait(600) .evaluate(function(){ var linkArray = []; var links = document.querySelectorAll('h3.r a'); for (var i = 0; i < links.length; ++i) { linkArray.push(links[i].getAttribute('href')); } return linkArray; }); yield nightmare.end(); return links; })(function (err, result) { if (err) return console.log(err); console.log(result); }); 
+7
javascript web-scraping nightmare electron
source share
1 answer

The following code example is a modified version of the solution provided by rosshinkley of the segmentio / nightmare project. This still requires some work, as at the moment it is not 100% more reliable than my tests using Nightmare version 2.1.2, but it is a great starting point.

Note. When testing, if you run it more than X times, Google will require an interception.

 var Nightmare = require('nightmare'); var vo = require('vo'); vo(run)(function(err, result) { if (err) throw err; }); function* run() { var nightmare = Nightmare({ show: true }), MAX_PAGE = 100, currentPage = 0, nextExists = true, links = []; yield nightmare .goto('http://www.google.com') .wait('input[title="Search"]') .click('input[title="Search"]') .type('input[title="Search"]', 'Anequim Project') .click('input[name="btnK"]') .wait(2000) nextExists = yield nightmare.visible('#pnnext'); while (nextExists && currentPage < MAX_PAGE) { links.push(yield nightmare .evaluate(function() { var linkArray = []; var links = document.querySelectorAll('h3.r a'); return links[0].href; })); yield nightmare .click('#pnnext') .wait(2000) currentPage++; nextExists = yield nightmare.visible('#pnnext'); } console.dir(links); yield nightmare.end(); } 
+5
source share

All Articles