MongoDB's complex structure is very slow when there is a group

I am trying to make an aggregate query with "group" to get the total number of results.

The total number of "requested_items" (my results) is + - 1.900.000.

If I execute a command with a "group", the query is very slow (+ - 300sec).

If I execute without a “group”, the query is very fast (+ - 1sec).

What am I doing wrong?

The following is sample code.



SLOW REQUEST

 db.minute.aggregate([ { $match: { $and: [ { "status": "Homologado" }, { "requested_items.status": /aceito/i }, ] } }, { $sort: {'_id': 1}}, { $unwind: "$requested_items" }, { $unwind: "$requested_items.winner" }, { $match: { $and: [ { "status": "Homologado" }, { "requested_items.status": /aceito/i }, ] } }, { $project: { "_id": 1 } }, { $group: { "_id" : null, "total" : {$sum: 1}, } }, ], {allowDiskUse: true}); 


FAST QUERY

 db.minute.aggregate([ { $match: { $and: [ { "status": "Homologado" }, { "requested_items.status": /aceito/i }, ] } }, { $sort: {'_id': 1}}, { $unwind: "$requested_items" }, { $unwind: "$requested_items.winner" }, { $match: { $and: [ { "status": "Homologado" }, { "requested_items.status": /aceito/i }, ] } }, { $project: { "_id": 1 } }, ], {allowDiskUse: true}); 


DB STRUCTURE

 { "_id" : "12345678ABCD", "field_1" : [ { "a" : null, "b" : "ABC" }, { "code" : null, "b" : "ABCD" } ], "status" : "Homologado", "initial_date" : ISODate("2016-05-24T11:31:00.000Z"), "field_2" : [ { "a" : "ABC", "b" : "ABCDE" }, { "a" : "ABCF", "b" : "ABCDEF" } ], "field_3" : "Lorem ipsum dolor sit amet...", "field_4" : [ { "date" : ISODate("2016-05-24T13:54:48.000Z"), "a" : "Text", "b" : "More text..." } ], "field_4" : 12312321, "field_5" : ISODate("2016-05-24T13:55:00.000Z"), "field_6" : "ABCD", "requested_items" : [ { "status" : " Aceito e Habilitado", "field_a" : "Text...", "winner" : [ { "a" : "23213.213213.23/232-23", "b" : 130446, "c" : 543223, "d" : NumberLong(2), "e" : "ABC 123 FULANO", "f" : "text", "g" : { "description" : "TEXT TEXT TEXT" } }, { "a" : "23213.213213.23/232-23", "b" : 130446, "c" : 543223, "d" : NumberLong(2), "e" : "ABC 123 FULANO", "f" : "text", "g" : { "description" : "TEXT TEXT TEXT" } } ], "field_c" : { "_id" : ObjectId("5744dd3271af88052f0cc343"), "a" : "TEXT", "b" : "TEXT" }, "field_d" : NumberLong(2), "field_e" : 5223, "field_f" : "Não", "field_g" : "-", "field_h" : { "field_a1" : [ { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } }, { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } } ], "field_a2" : [ { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } }, { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } } ], "field_a3" : {}, "field_a4" : [ { "date" : ISODate("2016-05-24T11:34:32.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:12:54.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:48:21.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:55:38.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:55:47.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T13:01:36.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T13:15:02.000Z"), "A" : "TEXT", "B" : "TEXT" } ] }, "field_i" : "Não", "field_j" : 1 }, { "status" : " Aceito e Habilitado", "field_a" : "Text...", "winner" : [ { "a" : "23213.213213.23/232-23", "b" : 130446, "c" : 543223, "d" : NumberLong(2), "e" : "ABC 123 FULANO", "f" : "text", "g" : { "description" : "TEXT TEXT TEXT" } } ], "field_c" : { "_id" : ObjectId("5744dd3271af88052f0cc343"), "a" : "TEXT", "b" : "TEXT" }, "field_d" : NumberLong(2), "field_e" : 5223, "field_f" : "Não", "field_g" : "-", "field_h" : { "field_a1" : [ { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } }, { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } } ], "field_a2" : [ { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } }, { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } } ], "field_a3" : {}, "field_a4" : [ { "date" : ISODate("2016-05-24T11:34:32.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:12:54.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:48:21.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:55:38.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:55:47.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T13:01:36.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T13:15:02.000Z"), "A" : "TEXT", "B" : "TEXT" } ] }, "field_i" : "Não", "field_j" : 2 }, { "status" : " Aceito e Habilitado", "field_a" : "Text...", "winner" : [ { "a" : "23213.213213.23/232-23", "b" : 130446, "c" : 543223, "d" : NumberLong(2), "e" : "ABC 123 FULANO", "f" : "text", "g" : { "description" : "TEXT TEXT TEXT" } } ], "field_c" : { "_id" : ObjectId("5744dd3271af88052f0cc343"), "a" : "TEXT", "b" : "TEXT" }, "field_d" : NumberLong(2), "field_e" : 5223, "field_f" : "Não", "field_g" : "-", "field_h" : { "field_a1" : [ { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } }, { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } } ], "field_a2" : [ { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } }, { "a" : "23213.213213.23/232-23", "b" : ISODate("2016-05-23T23:54:21.000Z"), "c" : 103432446, "d" : 522343, "e" : "Sim", "f" : NumberLong(2), "g" : "TEXT TEXT TEXT", "h" : "Sim", "i" : { "a" : "TEXT TEXT TEXT" } } ], "field_a3" : {}, "field_a4" : [ { "date" : ISODate("2016-05-24T11:34:32.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:12:54.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:48:21.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:55:38.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T12:55:47.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T13:01:36.000Z"), "A" : "TEXT", "B" : "TEXT" }, { "date" : ISODate("2016-05-24T13:15:02.000Z"), "A" : "TEXT", "B" : "TEXT" } ] }, "field_i" : "Não", "field_j" : 3 }, ], "field_7" : "TEXT", "field_8" : { "a" : "TEXT", "b" : "TEXT", "c" : "324234", "d" : "TEXT TEXT TEXT TEXT" }, "field_9" : 43234 } 


EXPLAIN

 { "waitedMS" : NumberLong(0), "stages" : [ { "$cursor" : { "query" : { "$and" : [ { "status" : "Homologado" }, { "requested_items.status" : /aceito/i } ] }, "queryPlanner" : { "plannerVersion" : 1, "namespace" : "module_database.minute", "indexFilterSet" : false, "parsedQuery" : { "$and" : [ { "status" : { "$eq" : "Homologado" } }, { "requested_items.status" : /aceito/i } ] }, "winningPlan" : { "stage" : "COLLSCAN", "filter" : { "$and" : [ { "status" : { "$eq" : "Homologado" } }, { "requested_items.status" : /aceito/i } ] }, "direction" : "forward" }, "rejectedPlans" : [] } } }, { "$unwind" : { "path" : "$requested_items" } }, { "$unwind" : { "path" : "$requested_items.winner" } }, { "$match" : { "$and" : [ { "status" : "Homologado" }, { "requested_items.status" : /aceito/i } ] } }, { "$group" : { "_id" : { "$const" : null }, "numberOfdocs" : { "$sum" : { "$const" : 1 } } } } ], "ok" : 1 } 

My server:
OS: UBUNTU14 / 64
CPU: 6
RAM: 16 GB
Shared storage: 80 GB
Performing only tests of my question.

+6
source share
2 answers

Finally, I solved the problem at my request with the group. It was a design mistake. Thinking about the SQL world, I developed collections before thinking in my application. The result is slow queries.

To solve this problem, it is necessary to redesign my collections and put the corresponding data on the first level of my documents. In my searches, I found that in Aggragation the index should be at the first stage of the pipeline. If I use a field with an index after the $ unwind stage, it is not considered.

In addition, I create an int hash for text fields using the https://github.com/darkskyapp/string-hash package. So my text fields can be indexed.

So my queries have changed to 300 s in 5 seconds.

+4
source

it’s difficult to determine the speed, since we have no details of the medium. What could you try to understand how your request explains by adding:

 { explain:true } 

to your aggregation request db.coll.aggregate([pipeline], {explain:true},{allowDiskUse: true}) . Also note that $unwind doubles the number of documents processed.

When you list the number of documents → , it can be faster , just take the size array (after the first unwinding) and sum it up later

 db.inventory.aggregate( [ { $group: { _id: null, numberOfdocs: { $sum:{$size: "$requested_items.winner" }} } } ] ) 

EDIT

after playing with this request, I was able to reduce the execution time by about 45%. The main thing is to skip the second $match , because it scans the full set of results, so the last $group contains all the data, and we can filter out what is needed at the end, since this operation is performed on a small result set.

 db.coll.aggregate([{ $match : { "status" : "Homologado" } }, { $unwind : "$requested_items" }, { $unwind : "$requested_items.winner" }, { $project : { x : "$requested_items.status", } }, { $group : { _id : "$x", numberOfdocs : { $sum : 1 } } }, { $match : { "_id" : /acesssito/i } } ], { allowDiskUse: true }); 
0
source

All Articles