:
def weighted_shuffle(array)
arr = array.sort_by { |h| -h[:weight] }
tot_wt = arr.reduce(0) { |t,h| t += h[:weight] }
ndx_left = arr.each_index.to_a
arr.size.times.with_object([]) do |_,a|
cum = 0
rn = (tot_wt>0) ? rand(tot_wt) : 0
ndx = ndx_left.find { |i| rn <= (cum += arr[i][:weight]) }
a << arr[ndx]
tot_wt -= arr[ndx_left.delete(ndx)][:weight]
end
end
< >
elements = [
{ :id => "ID_1", :weight => 100 },
{ :id => "ID_2", :weight => 200 },
{ :id => "ID_3", :weight => 600 }
]
def display(arr,n)
n.times.with_object([]) { |_,a|
p weighted_shuffle(arr).map { |h| h[:id] } }
end
display(elements,10)
["ID_3", "ID_2", "ID_1"]
["ID_1", "ID_3", "ID_2"]
["ID_1", "ID_3", "ID_2"]
["ID_3", "ID_2", "ID_1"]
["ID_3", "ID_2", "ID_1"]
["ID_2", "ID_3", "ID_1"]
["ID_2", "ID_3", "ID_1"]
["ID_3", "ID_1", "ID_2"]
["ID_3", "ID_1", "ID_2"]
["ID_3", "ID_2", "ID_1"]
n = 10_000
pos = elements.each_index.with_object({}) { |i,pos| pos[i] = Hash.new(0) }
n.times { weighted_shuffle(elements).each_with_index { |h,i|
pos[i][h[:id]] += 1 } }
pos.each { |_,h| h.each_key { |k| h[k] = (h[k]/n.to_f).round(3) } }
, 10000 weighted_shuffle , "ID_3" 66,1% , "ID_2" - 22,4% , "ID_1" - 11,5% . "ID_2" 47,2% ..
arr - , . arr.size. arr , . h[:weight] tot h of arr, , , h , h[:weight]/tot. p, rand(tot) <= p. element , :
elements.sort_by { |h| -h[:weight] }
#=> [{ :id => "ID_3", :weight => 600 },
# { :id => "ID_2", :weight => 200 },
# { :id => "ID_1", :weight => 100 }]
arr, ndx_left, . , h i, tot h[:weight] i ndx_left.
:
def weighted_shuffle_variant(array)
arr = array.sort_by { |h| -h[:weight] }
tot_wt = arr.reduce(0) { |t,h| t += h[:weight] }
n = arr.size
n.times.with_object([]) do |_,a|
cum = 0
rn = (tot_wt>0) ? rand(tot_wt) : 0
h, ndx = arr.each_with_index.find { |h,_| rn <= (cum += h[:weight]) }
a << h
tot_wt -= h[:weight]
arr[ndx] = arr.pop
end
end
, arr, , arr , . arr[i], i, arr :
arr[i] = arr.pop
Benchmark
h elements h[:weight] , , uniq . , . weighted_shuffle @Mori, "replicate, shuffle, delete":
def mori_shuffle(array)
array.flat_map { |h| [h[:id]] * h[:weight] }.shuffle.uniq
end
require 'benchmark'
def test_em(nelements, ndigits)
puts "\nelements.size=>#{nelements}, weights have #{ndigits} digits\n\n"
mx = 10**ndigits
elements = nelements.times.map { |i| { id: i, weight: rand(mx) } }
Benchmark.bm(15 "mori_shuffle", "weighted_shuffle") do |x|
x.report { mori_shuffle(elements) }
x.report { weighted_shuffle(elements) }
end
end
elements.size=>3, weights have 1 digits
user system total real
mori_shuffle 0.000000 0.000000 0.000000 ( 0.000068)
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000051)
elements.size=>3, weights have 2 digits
user system total real
mori_shuffle 0.000000 0.000000 0.000000 ( 0.000035)
weighted_shuffle 0.010000 0.000000 0.010000 ( 0.000026)
elements.size=>3, weights have 3 digits
user system total real
mori_shuffle 0.000000 0.000000 0.000000 ( 0.000161)
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000027)
elements.size=>3, weights have 4 digits
user system total real
mori_shuffle 0.000000 0.000000 0.000000 ( 0.000854)
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000026)
elements.size=>20, weights have 2 digits
user system total real
mori_shuffle 0.000000 0.000000 0.000000 ( 0.000089)
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000090)
elements.size=>20, weights have 3 digits
user system total real
mori_shuffle 0.000000 0.000000 0.000000 ( 0.000771)
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000071)
elements.size=>20, weights have 4 digits
user system total real
mori_shuffle 0.000000 0.000000 0.000000 ( 0.005895)
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000073)
elements.size=>100, weights have 2 digits
user system total real
mori_shuffle 0.000000 0.000000 0.000000 ( 0.000446)
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000683)
elements.size=>100, weights have 3 digits
user system total real
mori_shuffle 0.010000 0.000000 0.010000 ( 0.003765)
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000659)
elements.size=>100, weights have 4 digits
user system total real
mori_shuffle 0.030000 0.010000 0.040000 ( 0.034982)
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000638)
elements.size=>100, weights have 5 digits
user system total real
mori_shuffle 0.550000 0.040000 0.590000 ( 0.593190)
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000623)
elements.size=>100, weights have 6 digits
user system total real
mori_shuffle 5.560000 0.380000 5.940000 ( 5.944749)
weighted_shuffle 0.010000 0.000000 0.010000 ( 0.000636)
weighted_shuffle weighted_shuffle_variant
, , , . , weighted_shuffle . :
elements.size=>20, weights have 3 digits
user system total real
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000062)
weighted_shuffle_variant 0.000000 0.000000 0.000000 ( 0.000108)
elements.size=>20, weights have 4 digits
user system total real
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000060)
weighted_shuffle_variant 0.000000 0.000000 0.000000 ( 0.000089)
elements.size=>100, weights have 2 digits
user system total real
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000666)
weighted_shuffle_variant 0.000000 0.000000 0.000000 ( 0.000871)
elements.size=>100, weights have 4 digits
user system total real
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000625)
weighted_shuffle_variant 0.000000 0.000000 0.000000 ( 0.000803)
elements.size=>100, weights have 6 digits
user system total real
weighted_shuffle 0.000000 0.000000 0.000000 ( 0.000664)
weighted_shuffle_variant 0.000000 0.000000 0.000000 ( 0.000773)
weighted_shuffle, weighted_shuffle_variant () elements, ( ). , pop , . , . , weighted_shuffle . , .