What is the most efficient way to implement GroupBy in Javascript?

I am trying to implement a GroupBy method with these parameters

 function GroupBy(keySelector, elementSelector, comparer) { // keySelector = function(e) { return e.ID } // elementSelector = function(e) { return e.Name } // comparer = { Equals: function(a,b) { return a==b }, GetHashCode:... } } 

However, I do not know an effective way to implement it.

I created the jsPerf test with linq.js and the method I created that does not use a comparator and only works with flat types. ( Test result here )

Other libraries, such as underscores and Lo-Dash, do not accept the comparer parameter. Therefore, their implementation does not matter.

My key might be a class, so I need to determine something if TKey same in different cases.

So basically, what I'm trying to do is copy the C # Linq GroupBy behavior documented here .

Input Example:

 var arrComplex = [ { N: { Value: 10 }, Name: "Foo" }, { N: { Value: 10 }, Name: "Bar" }, { N: { Value: 20 }, Name: "Foo" }, { N: { Value: 20 }, Name: "Bar" } ]; 

Sample output (or something like this):

 [ { "Key": {"Value":10}, "Elements":["Foo","Bar"] }, { "Key": {"Value":20}, "Elements":["Foo","Bar"] } ] 

Any ideas on how to implement it?

Bounty

For generosity, I would like you to take into account that:

  • The key may be an object
  • Two objects can be equal if any property is equal
  • It should be as fast or fast as existing solutions.
  • The result can be an array or an object, it does not matter if I can get the elements grouped by key

Well, I'm waiting for a complete answer.

+7
performance javascript grouping
source share
2 answers

I managed to implement this method:

I need to get a hash code from objects.

 Object.prototype.GetHashCode = function () { var s = this instanceof Object ? stringify(this) : this.toString(); var hash = 0; if (s.length === 0) return hash; for (var i = 0; i < s.length; ++i) { hash = ((hash << 5) - hash) + s.charCodeAt(i); } return hash; }; Number.prototype.GetHashCode = function () { return this.valueOf(); }; 

Since JSON.stringify will fail with circular references, I created another method for formatting it so that I can make the most of the object as a string and calculate the hash code above it like this:

 function isPlainObject(obj) { if ((typeof (obj) !== "object" || obj.nodeType || (obj instanceof Window)) || (obj.constructor && !({}).hasOwnProperty.call(obj.constructor.prototype, "isPrototypeOf")) ) { return false; } return true; } function stringify(obj, s) { s = s || ""; for (var i in obj) { var o = obj[i]; if (o && (o instanceof Array || isPlainObject(o))) { s += i + ":" + JSON.stringify(o); } else if (o && typeof o === "object") { s += i + ":" + "$ref#" + o; } else { s += i + ":" + o; } } return s; } 

Performance impact is small. For a large object, this is one and the same for small objects, it loses, but it is still pretty fast and safe. Performance test here .

 Name op/s --------------------------------- JSON.stringify large 62 stringify large 62 JSON.stringify small 1,690,183 stringify small 1,062,452 

My GroupBy Method

 function GroupBy(a, keySelector, elementSelector, comparer) { // set default values for opitinal parameters elementSelector = elementSelector || function(e) { return e; }; comparer = comparer || { Equals: function(a,b) { return a==b }, GetHashCode: function(e) { return e.GetHashCode(); } }; var key, hashKey, reHashKey; // keep groups separated by hash var hashs = {}; for (var i = 0, n = a.length; i < n; ++i) { // in case of same hash, but Equals returns false reHashKey = undefined; // grabs the key key = keySelector(a[i]); // grabs the hashcode hashKey = comparer.GetHashCode(key); // if a hash exists in the list // compare values with Equals // in case it return false, generate a unique hash if (typeof hashs[hashKey] !== "undefined") reHashKey = comparer.Equals(key, hashs[hashKey].Key) ? hashKey : hashKey + " " + i; // if a new hash has been generated, update if (typeof reHashKey !== "undefined" && reHashKey !== hashKey) hashKey = reHashKey; // get/create a new group and add the current element to the list hashs[hashKey] = hashs[hashKey] || { Key: key, Elements: [] }; hashs[hashKey].Elements.push(a[i]); } return hashs; } 

To check

 var arrComplex = [ { N: { Value: 10 }, Name: "Foo" }, { N: { Value: 10 }, Name: "Bar" }, { N: { Value: 20 }, Name: "Foo" }, { N: { Value: 20 }, Name: "Bar" } ]; // var x = GroupBy(arrComplex , function(e) { return eN; } , function(e) { return e.Name; } , { Equals: function(a,b) { return a.Value == b.Value }, GetHashCode: function(e) { return e.GetHashCode(); } } ); // console.log(x); 

JsFiddle example , now with Jedi.

But, according to my tests , my implementation of GroupBy slower than linq.js GroupBy . This happens faster when I convert ToArray() . Maybe linq.js really only executes when converted to an array, so the difference is that I'm not sure about this part.

Test results

 Name op/s --------------------------------- GroupBy 163,261 GroupByToArray 152,382 linq.js groupBy 243,547 linq.js groupBy toArray 26,309 
0
source share

I used your jsperf as a link, for some of the finer points of the script. I really liked your hash code, so I completely stole it. Mine uses another method to generate the string used to create the hash, which seems to be a little faster, which improves performance according to browser charts. I included in my test a proof of the concept of “too much recursion” to show that it has recursion protection such as JSON.stringify and .toSource ().

My jsfiddle shows that the code returns the correct format. My jsperf seems to indicate that it is superior to the published solution. I also included linq.js solution, but for me it is very bad in FireFox. It works in Safari, Chrome, IE, but not faster than mine, except IE. I even tried this on my phone, and yet I have the same performance difference. I personally tested it in the latest versions of all browsers side by side with the published solution, and my release - about 40% for each of them. What are all the thoughts?

Here is my code:

 var arr = [ { N: 10, Name: "Foo" }, { N: 10, Name: "Bar" }, { N: 20, Name: "Foo" }, { N: 20, Name: "Bar" } ]; var poc = { name:'blah', obj:{} }; poc.obj = poc; var arrComplex = [ { N: { Value: 10, TooMuchRecursionProofPOC:poc }, Name: "Foo" }, { N: { Value: 10, TooMuchRecursionProofPOC:poc }, Name: "Bar" }, { N: { Value: 20, TooMuchRecursionProofPOC:poc }, Name: "Foo" }, { N: { Value: 20, TooMuchRecursionProofPOC:poc }, Name: "Bar" } ]; var eArr = Enumerable.From(arr); var eArrComplex = Enumerable.From(arrComplex); function setup_hashers() { // recursion protection idea var rp = '_rp'+(Math.random()*10000000); function tstr() { var out = '', i = ''; if (this[rp]) { this[rp] = undefined; return out; } for (i in this) if (i != rp && this.hasOwnProperty(i)) out += this[i] instanceof Object ? ((this[rp] = true) && this[i] != this && !this[i][rp] ? tstr.call(this[i]) : '') : (this[i].toString || tstr).call(this[i]); return out; }; Number.prototype.GetHashCode = function() { return this.valueOf(); }; Object.prototype.GetHashCode = function() { var s = (this instanceof Object ? tstr : this.toString || tstr).call(this), h = 0; if (s.length) for (var i = 0; i < s.length; i++) h = ((h << 5) - h) + s.charCodeAt(i); return h; }; } function group_by(a, keyFunc, valFunc, comp, as_array) { if (!a.length) return as_array ? [] : {}; var keyFunc = keyFunc || function (e) { return e; }, valFunc = valFunc || function (e) { return e; }; var comp = comp || { Equals: function (a, b) { return a == b; }, Hash: function (e) { return e.GetHashCode(); } }; var hashs = {}, key = '', hash = ''; for (var i = 0; i < a.length; i++) { key = keyFunc(a[i]); hash = comp.Hash(key); if (typeof hashs[hash] != 'undefined') hash = comp.Equals(key, hashs[hash].Key) ? hash : hash + '-' + i; hashs[hash] = hashs[hash] || { Key: key, Elements: [] }; hashs[hash].Elements.push(valFunc(a[i])); } if (as_array) { var out = [], j = '', keys = Object.keys(hashs); for (var j = 0; j < keys.length; j++) out.push(hashs[keys[j]]); return out; } return hashs; }; function group_by_control(a, keyFunc, valFunc) { if (!a.length) return as_array ? [] : {}; var keyFunc = keyFunc || function (e) { return e; }, valFunc = valFunc || function (e) { return e; }; var hashs = {}, key = '', hash = ''; for (var i = 0; i < a.length; i++) { key = keyFunc(a[i]); hashs[key] = hashs[key] || { Key: key, Elements: [] }; hashs[key].Elements.push(valFunc(a[i])); } var out = [], j = '', keys = Object.keys(hashs); for (var j = 0; j < keys.length; j++) out.push(hashs[keys[j]]); return out; }; setup_hashers(); console.log(group_by_control( arr, function(e) { return eN }, function(e) { return e.Name } )); console.log(group_by( arrComplex, function(e) { return eN; }, function(e) { return e.Name; }, { Equals: function(a, b) { return a.Value == b.Value }, Hash: function(e) { return e.GetHashCode(); } } )); console.log(group_by( arrComplex, function(e) { return eN; }, function(e) { return e.Name; }, { Equals: function(a, b) { return a.Value == b.Value }, Hash: function(e) { return e.GetHashCode(); } }, true )); 
+4
source share

All Articles