I have a large dataset (about 1.1M documents) that I need to run mapreduce.
The grouping field is an array called xref. Due to the size of the collection and the fact that I am doing this in a 32-bit environment, I am trying to reduce the collection in another collection in a new database.
First, here is the sample data:
{ "_id" : ObjectId("4ec6d3aa61910ad451f12e01"), "bii" : -32.9867, "class" : 2456, "decdeg" : -82.4856, "lii" : 297.4896, "name" : "HD 22237", "radeg" : 50.3284, "vmag" : 8, "xref" : ["HD 22237", "CPD -82 65", "-82 64","PPM 376283", "SAO 258336", "CP-82 65","GC 4125" ] } { "_id" : ObjectId("4ec6d44661910ad451f78eba"), "bii" : -32.9901, "class" : 2450, "decdeg" : -82.4781, "decpm" : 0.013, "lii" : 297.4807, "name" : "PPM 376283", "radeg" : 50.3543, "rapm" : 0.0357, "vmag" : 8.4, "xref" : ["HD 22237", "CPD -82 65", "-82 64","PPM 376283", "SAO 258336", "CP-82 65","GC 4125" ] } { "_id" : ObjectId("4ec6d48a61910ad451feae04"), "bii" : -32.9903, "class" : 2450, "decdeg" : -82.4779, "decpm" : 0.027, "hd_component" : 0, "lii" : 297.4806, "name" : "SAO 258336", "radeg" : 50.3543, "rapm" : 0.0355, "vmag" : 8, "xref" : ["HD 22237", "CPD -82 65", "-82 64","PPM 376283", "SAO 258336", "CP-82 65","GC 4125" ] }
Here are the maps and reduction functions (right now I'm just the lii and bii fields):
function map() { try { emit(this.xref, {lii:this.lii, bii:this.bii}); } catch(e) { } } function reduce(key, values) { var result = {xref:key, lii: 0.0, bii: 0.0}; try { values.forEach(function(value) { if (value.lii && value.bii) { result.lii += value.lii; result.bii += value.bii; } }); result.bii /= values.length; result.lii /= values.length; } catch(e) { } return result; }
Unfortunately, when starting this file, an error message appears:
db.catalog.mapReduce(map, reduce, {out:{replace:"catalog2", db:"astro2"}}); Wed Nov 23 10:12:25 uncaught exception: map reduce failed:{ "assertion" : "_id cannot be an array", "assertionCode" : 10099, "errmsg" : "db assertion failure", "ok" : 0
The xref field is an array, but all values ββare equal in this array. Is he trying to use this array as an id field in new collections?