I suggest you create an MD5 hash of JSON.stringify (o) and save it in your hash map with a link to your saved object as data for the hash. And to make sure that in JSON.stringify() there are no restrictions on the order of the keys of objects, you need to create a copy of the object that orders the keys.
Then, when each new object arrives, you check it on the hash map. If you find a match on the hash map, then you compare the incoming object with the actual object that you saved to see if they really are duplicated (since there may be MD5 hash collisions). So you have a managed hash table (it only has MD5 hashes).
Here's the code for creating a canonical string representation of an object (including nested objects or objects inside arrays) that processes the keys of objects, which may be in a different order if you just called JSON.stringify ().
// Code to do a canonical JSON.stringify() that puts object properties // in a consistent order // Does not allow circular references (child containing reference to parent) JSON.stringifyCanonical = function(obj) { // compatible with either browser or node.js var Set = typeof window === "object" ? window.Set : global.Set; // poor man Set polyfill if (typeof Set !== "function") { Set = function(s) { if (s) { this.data = s.data.slice(); } else { this.data = []; } }; Set.prototype = { add: function(item) { this.data.push(item); }, has: function(item) { return this.data.indexOf(item) !== -1; } }; } function orderKeys(obj, parents) { if (typeof obj !== "object") { throw new Error("orderKeys() expects object type"); } var set = new Set(parents); if (set.has(obj)) { throw new Error("circular object in stringifyCanonical()"); } set.add(obj); var tempObj, item, i; if (Array.isArray(obj)) { // no need to re-order an array // but need to check it for embedded objects that need to be ordered tempObj = []; for (i = 0; i < obj.length; i++) { item = obj[i]; if (typeof item === "object") { tempObj[i] = orderKeys(item, set); } else { tempObj[i] = item; } } } else { tempObj = {}; // get keys, sort them and build new object Object.keys(obj).sort().forEach(function(item) { if (typeof obj[item] === "object") { tempObj[item] = orderKeys(obj[item], set); } else { tempObj[item] = obj[item]; } }); } return tempObj; } return JSON.stringify(orderKeys(obj)); }
And, the algorithm
var myHashMap = {}; function processObject(o) { var stringifiedCandidate = JSON.stringifyCanonical(o); var hash = CreateMD5(stringifiedCandidate); var list = [], found = false; // is it in the hashmap? if (!myHashMap[hash] { // not in the hash table, so it a unique object myObjects.push(o); list.push(myObjects.length - 1); // put a reference to the object with this hash value in the list myHashMap[hash] = list; // store the list in the hash table for future comparisons } else { // the hash does exist in the hash table, check for an exact object match to see if it really a duplicate list = myHashMap[hash]; // get the list of other object indexes with this hash value // loop through the list for (var i = 0; i < list.length; i++) { if (stringifiedCandidate === JSON.stringifyCanonical(myObjects[list[i]])) { found = true; // found an exact object match break; } } // if not found, it not an exact duplicate, even though there was a hash match if (!found) { myObjects.push(o); myHashMap[hash].push(myObjects.length - 1); } } }
A test case for jsonStringifyCanonical() is here: https://jsfiddle.net/jfriend00/zfrtpqcL/
source share