Here is a slightly modified version that adds a merge operation.
Merging allows you to accept counters from multiple HyperLogLog instances, and define unique counters in general.
For example, if you have unique visitors collected on Monday, Tuesday, and Wednesday, then you can combine the buckets together and count the number of unique visitors for three days:
var pow_2_32 = 0xFFFFFFFF + 1; function HyperLogLog(std_error) { function log2(x) { return Math.log(x) / Math.LN2; } function rank(hash, max) { var r = 1; while ((hash & 1) == 0 && r <= max) { ++r; hash >>>= 1; } return r; } var m = 1.04 / std_error; var k = Math.ceil(log2(m * m)), k_comp = 32 - k; m = Math.pow(2, k); var alpha_m = m == 16 ? 0.673 : m == 32 ? 0.697 : m == 64 ? 0.709 : 0.7213 / (1 + 1.079 / m); var M = []; for (var i = 0; i < m; ++i) M[i] = 0; function merge(other) { for (var i = 0; i < m; i++) M[i] = Math.max(M[i], other.buckets[i]); } function count(hash) { if (hash !== undefined) { var j = hash >>> k_comp; M[j] = Math.max(M[j], rank(hash, k_comp)); } else { var c = 0.0; for (var i = 0; i < m; ++i) c += 1 / Math.pow(2, M[i]); var E = alpha_m * m * m / c;
Then you can do something like this:
// initialize one counter per day var ll_monday = HyperLogLog(0.01); var ll_tuesday = HyperLogLog(0.01); var ll_wednesday = HyperLogLog(0.01); // add 5000 unique values in each day for(var i=0; i<5000; i++) ll_monday.count(fnv1a('' + Math.random())); for(var i=0; i<5000; i++) ll_tuesday.count(fnv1a('' + Math.random())); for(var i=0; i<5000; i++) ll_wednesday.count(fnv1a('' + Math.random())); // add 5000 values which appear every day for(var i=0; i<5000; i++) {ll_monday.count(fnv1a(''+i)); ll_tuesday.count(fnv1a('' + i)); ll_wednesday.count(fnv1a('' + i));} // merge three days together together = HyperLogLog(0.01); together.merge(ll_monday); together.merge(ll_tuesday); together.merge(ll_wednesday); // report console.log('unique per day: ' + Math.round(ll_monday.count()) + ' ' + Math.round(ll_tuesday.count()) + ' ' + Math.round(ll_wednesday.count())); console.log('unique numbers overall: ' + Math.round(together.count()));
Gary Weiss Mar 31 '14 at 12:18 2014-03-31 12:18
source share