Elasticsearch shows all results using scrolling in node js

Basically I am trying to show all index type entries. Now, if you use match_all () in a query elasticsearch shows 10 results by default. You can show all the results by scrolling. I am trying to implement scroll api but cannot make it work. It shows only 10 results, my code is:

module.exports.searchAll = function (searchData, callback) { client.search({ index: 'test', type: 'records', scroll: '10s', //search_type: 'scan', //if I use search_type then it requires size otherwise it shows 0 result body: { query: { "match_all": {} } } }, function (err, resp) { client.scroll({ scrollId: resp._scroll_id, scroll: '10s' }, callback(resp.hits.hits)); }); } 

Can anyone help please?

+10
source share
7 answers

You need to call client.scroll repeatedly until more records are returned. There is a good example in elasticsearch documentation . I reproduced their sample code below, slightly modified according to your question.

 var allRecords = []; // first we do a search, and specify a scroll timeout client.search({ index: 'test', type: 'records', scroll: '10s', body: { query: { "match_all": {} } } }, function getMoreUntilDone(error, response) { // collect all the records response.hits.hits.forEach(function (hit) { allRecords.push(hit); }); if (response.hits.total !== allRecords.length) { // now we can call scroll over and over client.scroll({ scrollId: response._scroll_id, scroll: '10s' }, getMoreUntilDone); } else { console.log('all done', allRecords); } }); 
+16
source

Thanks @Ceilingfish. Here's a modified version of ES6 above using await

 let allRecords = []; // first we do a search, and specify a scroll timeout var { _scroll_id, hits } = await esclient.search({ index: 'test', type: 'records', scroll: '10s', body: { query: { "match_all": {} }, _source: false } }) while(hits && hits.hits.length) { // Append all new hits allRecords.push(...hits.hits) console.log('${allRecords.length} of ${hits.total}') var { _scroll_id, hits } = await esclient.scroll({ scrollId: _scroll_id, scroll: '10s' }) } console.log('Complete: ${allRecords.length} records retrieved') 
+6
source

This is what I use with Promises

 var EsHelper = function() { this.esUrl = esUrl; this.indexName = "myIndex"; this.type = "myIndexType"; this.elasticClient = new elasticsearch.Client({ host: esUrl }); }; EsHelper.prototype.scrollData = function(response, allHits) { return new Promise((resolve, reject) => { response.hits.hits.forEach((hit) => allHits.push(hit)); if (response.hits.total !== allHits.length) { this.elasticClient.scroll({ scroll_id: response._scroll_id, scroll: '10s', }).then((response) => { resolve(this.scrollData(response, allHits)); }).catch((error) => reject(error)); } else { resolve(allHits); } }); }; EsHelper.prototype.runSearchWithScroll = function(query) { var allHits = []; return this.elasticClient.search({ index: this.indexName, type: this.type, scroll: '10s', body: query }) .then((response) => (this.scrollData(response, allHits))) .then((result) => { return result; }); }; 

The best way?

0
source

NodeJS failed when resilience had more than 10,000 results. This is how I used scrolling.

 async function getResultsFromElastic() { let responseAll = {}; responseAll["hits"] = {}; responseAll.hits.hits = []; const responseQueue = []; searchQuery = { index: 'test', type: 'records', body: { query: { "match_all": {} } } } searchQuery.scroll='10s'; searchQuery.size=10000; responseQueue.push(await esclient.search(searchQuery)); while (responseQueue.length) { const response = responseQueue.shift(); responseAll.hits.hits = responseAll.hits.hits.concat(response.hits.hits); if (response.hits.total == responseAll.hits.hits.length) { break; } // get the next response if there are more to fetch responseQueue.push( await esclient.scroll({ scrollId: response._scroll_id, scroll: '30s' }) ); } return responseAll; } 
0
source

There are many well written answers here that solve the problem. But if someone is looking for a custom solution, he can go here and use this package - https://github.com/alcacoop/elasticsearch-scroll-stream

Using is pretty simple and it just works great. Below is an example that I took from their official documentation.

 const elasticsearch = require('elasticsearch'); const ElasticsearchScrollStream = require('elasticsearch-scroll-stream'); const client = new elasticsearch.Client(); const es_stream = new ElasticsearchScrollStream(client, { index: 'your-index', type: 'your-type', scroll: '10s', size: '50', _source: ['name'], q: 'name:*' }); es_stream.pipe(process.stdout); es_stream.on('data', function(data) { // Process your results here }); es_stream.on('end', function() { console.log("End"); }); 
0
source

A query to retrieve all data from an elastic search using the Node.js client using scroll using async / await.

 const elasticsearch = require('@elastic/elasticsearch'); async function esconnection(){ let es = await new elasticsearch.Client({ node: "http://192.168.1.1:7200" }); return es; } async function getAllUserList(){ try{ let userArray = []; let query ={ "query":{ "match_all": {} } } let es = await esconnection(); let {body}= await es.search({ index: 'esIndex', type :"esIndexType", scroll :'2m', //# Specify how long a consistent view of the index should be maintained for scrolled search size: 100, // # Number of hits to return (default: 10) body: query }); let sid = body['_scroll_id'] let scroll_size = body['hits']['total'] let dataLength = body['hits']['hits'].length while (scroll_size > 0){ for(let i=0; i<dataLength;i++){ if(body['hits']['hits'][i]) { let userData = (body['hits']['hits'][i]['_source']) userArray.push(userData) } } sid = body['_scroll_id'] body = await es.scroll({ scrollId: sid, scroll: '10s' }) body=body.body scroll_size = (body['hits']['hits']).length; } es.close(); return userArray; } catch(error){ console.log("Code not working properly: ",'${error}') } } 
0
source

I wanted to get my entire data set in one query. NO SCROLL.

NOTE FOR NEGATIVE FEEDBACK:

In my case, this answer works fine because I did not care about performance. I made a one-time request to reset the index in JSON. Elasticsearch does not have a dump command, and this is the next best thing except third-party import / export tools.

None of the other answers here achieve this.

It does:


[1] Increase max_result_window (by default, a maximum of 10,000 results is allowed)

 $ curl -XPUT 'localhost:9200/yourindexname/_settings' -H'Content-Type: application/json' -d ' { "index": { "max_result_window": 100000 } }' 

[2] Get the entire Elasticsearch index through the Node API

  const response = await client.search({ index: "yourindexname", type: "yourtype", size: 100000 //lets say you need at most 100000 results in one go }) 

[3] Alternatively, get your entire Elasticsearch index via cURL

  • Note. The size parameter determines the number of returned items.

    $ curl 'localhost: 9200 / yourindexname / _search? pretty & scroll = 10m & size = 99999 '-H' Content type: application / json '-d' {"query": {"match_all": {}}} '

-1
source

All Articles