Array random mixing algorithm depending on different weights

I have a set of elements that I want to randomly shuffle, but each element has a different priority or weight. Thus, an element with a large weight should have more probabilities at the top of the result.

I have this array:

elements = [
  { :id => "ID_1", :weight => 1 },
  { :id => "ID_2", :weight => 2 },
  { :id => "ID_3", :weight => 6 }
]

And I want to shuffle it so that the element with id has "ID_3"~ 6 times more probabilities to be the first than the element "ID_1"and 3 times more probabilities than the element "ID_2".

Update

Explanation: after you have selected the first position, other elements will fight for the remaining positions using the same logic.

+4
source share
6

, , Fisher-Yates :

O (n * W): ( )

, ( , ), . (-yates) . . O(n*W), n - , W - ( ).


O (nlogn): ( )

:

sum[i] = weight[0] + ... + weight[i]

0 sum[n] , sum / .
, , .

O(n^2*logn)

, , node .
, , ( , ), node .
O(n), O(logn), node , O(logn) - . , , O(nlogn). " " , , . , .


.

, elements=[a,b,c,d,e,f,g,h,i,j,k,l,m] weights=[1,2,3,1,2,3,1,2,3,1,2,3,1]

. , , , - .

- :

enter image description here

: w - node, .

. s.w = w. node s.w = left->s.w + right->s.w, ( ).

enter image description here

, s.w. node O(n).

0 ( s.w. 25). r, node.
node

if `r< root.left.sw`:
   go to left son, and repeat. 
else if `r<root.left.sw + root.w`:
   the node you are seeking is the root, choose it. 
else:
   go to `root.right` with `r= r-root.left.sw - root.w`

, r=10:

Is r<root.left.sw? Yes. Recursively invoke with r=10,root=B (left child)
Is r<root.left.sw No. Is r < root.left.sw + root.w? No. Recursively invoke with r=10-6-2=2, and root=E (right chile)
Is r<root.left.sw? No. Is r < root.left.sw + root.w? Yes. Choose E as next node.

O(h) = O(logn) .

node reset .
, , , : node node, node , .

:

enter image description here

:

enter image description here

, , O(logn) ( .), O(logn).

, , .

+6

@amit:

def self.random_suffle_with_weight(elements, &proc)
  consecutive_chain = []
  elements.each do |element|
    proc.call(element).times { consecutive_chain << element }
  end

  consecutive_chain.shuffle.uniq
end
+1

:

def weighted_shuffle(array)
  arr = array.sort_by { |h| -h[:weight] }
  tot_wt = arr.reduce(0) { |t,h| t += h[:weight] }
  ndx_left = arr.each_index.to_a
  arr.size.times.with_object([]) do |_,a|
    cum = 0
    rn = (tot_wt>0) ? rand(tot_wt) : 0
    ndx = ndx_left.find { |i| rn <= (cum += arr[i][:weight]) }
    a << arr[ndx]
    tot_wt -= arr[ndx_left.delete(ndx)][:weight]
  end
end

< >

elements = [
  { :id => "ID_1", :weight => 100 },
  { :id => "ID_2", :weight => 200 },
  { :id => "ID_3", :weight => 600 }
]

def display(arr,n)
  n.times.with_object([]) { |_,a|
    p weighted_shuffle(arr).map { |h| h[:id] } }
end

display(elements,10)
  ["ID_3", "ID_2", "ID_1"]
  ["ID_1", "ID_3", "ID_2"]
  ["ID_1", "ID_3", "ID_2"]
  ["ID_3", "ID_2", "ID_1"]
  ["ID_3", "ID_2", "ID_1"]
  ["ID_2", "ID_3", "ID_1"]
  ["ID_2", "ID_3", "ID_1"]
  ["ID_3", "ID_1", "ID_2"]
  ["ID_3", "ID_1", "ID_2"]
  ["ID_3", "ID_2", "ID_1"]

n = 10_000
pos = elements.each_index.with_object({}) { |i,pos| pos[i] = Hash.new(0) }
n.times { weighted_shuffle(elements).each_with_index { |h,i|
  pos[i][h[:id]] += 1 } }
pos.each { |_,h| h.each_key { |k| h[k] = (h[k]/n.to_f).round(3) } }
  #=> {0=>{"ID_3"=>0.661, "ID_2"=>0.224, "ID_1"=>0.115},
  #    1=>{"ID_2"=>0.472, "ID_3"=>0.278, "ID_1"=>0.251},
  #    2=>{"ID_1"=>0.635, "ID_2"=>0.304, "ID_3"=>0.061}}

, 10000 weighted_shuffle , "ID_3" 66,1% , "ID_2" - 22,4% , "ID_1" - 11,5% . "ID_2" 47,2% ..

arr - , . arr.size. arr , . h[:weight] tot h of arr, , , h , h[:weight]/tot. p, rand(tot) <= p. element , :

elements.sort_by { |h| -h[:weight] }
  #=> [{ :id => "ID_3", :weight => 600 },
  #    { :id => "ID_2", :weight => 200 },
  #    { :id => "ID_1", :weight => 100 }]

arr, ndx_left, . , h i, tot h[:weight] i ndx_left.

:

def weighted_shuffle_variant(array)
   arr = array.sort_by { |h| -h[:weight] }
   tot_wt = arr.reduce(0) { |t,h| t += h[:weight] }
   n = arr.size
   n.times.with_object([]) do |_,a|
     cum = 0
     rn = (tot_wt>0) ? rand(tot_wt) : 0
     h, ndx = arr.each_with_index.find { |h,_| rn <= (cum += h[:weight]) }
     a << h
     tot_wt -= h[:weight]
     arr[ndx] = arr.pop
   end
 end

, arr, , arr , . arr[i], i, arr :

arr[i] = arr.pop 

Benchmark

h elements h[:weight] , , uniq . , . weighted_shuffle @Mori, "replicate, shuffle, delete":

def mori_shuffle(array)
  array.flat_map { |h| [h[:id]] * h[:weight] }.shuffle.uniq
end

require 'benchmark'

def test_em(nelements, ndigits)
  puts "\nelements.size=>#{nelements}, weights have #{ndigits} digits\n\n"
  mx = 10**ndigits
  elements = nelements.times.map { |i| { id: i, weight: rand(mx) } }
  Benchmark.bm(15 "mori_shuffle", "weighted_shuffle") do |x|
    x.report { mori_shuffle(elements) }
    x.report { weighted_shuffle(elements) }
  end
end

elements.size=>3, weights have 1 digits

                      user     system      total        real
mori_shuffle      0.000000   0.000000   0.000000 (  0.000068)
weighted_shuffle  0.000000   0.000000   0.000000 (  0.000051)

elements.size=>3, weights have 2 digits

                      user     system      total        real
mori_shuffle      0.000000   0.000000   0.000000 (  0.000035)
weighted_shuffle  0.010000   0.000000   0.010000 (  0.000026)

elements.size=>3, weights have 3 digits

                      user     system      total        real
mori_shuffle      0.000000   0.000000   0.000000 (  0.000161)
weighted_shuffle  0.000000   0.000000   0.000000 (  0.000027)

elements.size=>3, weights have 4 digits

                      user     system      total        real
mori_shuffle      0.000000   0.000000   0.000000 (  0.000854)
weighted_shuffle  0.000000   0.000000   0.000000 (  0.000026)

elements.size=>20, weights have 2 digits

                      user     system      total        real
mori_shuffle      0.000000   0.000000   0.000000 (  0.000089)
weighted_shuffle  0.000000   0.000000   0.000000 (  0.000090)

elements.size=>20, weights have 3 digits

                      user     system      total        real
mori_shuffle      0.000000   0.000000   0.000000 (  0.000771)
weighted_shuffle  0.000000   0.000000   0.000000 (  0.000071)

elements.size=>20, weights have 4 digits

                      user     system      total        real
mori_shuffle      0.000000   0.000000   0.000000 (  0.005895)
weighted_shuffle  0.000000   0.000000   0.000000 (  0.000073)

elements.size=>100, weights have 2 digits

                      user     system      total        real
mori_shuffle      0.000000   0.000000   0.000000 (  0.000446)
weighted_shuffle  0.000000   0.000000   0.000000 (  0.000683)

elements.size=>100, weights have 3 digits

                      user     system      total        real
mori_shuffle      0.010000   0.000000   0.010000 (  0.003765)
weighted_shuffle  0.000000   0.000000   0.000000 (  0.000659)

elements.size=>100, weights have 4 digits

                      user     system      total        real
mori_shuffle      0.030000   0.010000   0.040000 (  0.034982)
weighted_shuffle  0.000000   0.000000   0.000000 (  0.000638)

elements.size=>100, weights have 5 digits

                      user     system      total        real
mori_shuffle      0.550000   0.040000   0.590000 (  0.593190)
weighted_shuffle  0.000000   0.000000   0.000000 (  0.000623)

elements.size=>100, weights have 6 digits

                      user     system      total        real
mori_shuffle      5.560000   0.380000   5.940000 (  5.944749)
weighted_shuffle  0.010000   0.000000   0.010000 (  0.000636)

weighted_shuffle weighted_shuffle_variant

, , , . , weighted_shuffle . :

elements.size=>20, weights have 3 digits

                               user     system      total        real
weighted_shuffle           0.000000   0.000000   0.000000 (  0.000062)
weighted_shuffle_variant   0.000000   0.000000   0.000000 (  0.000108)

elements.size=>20, weights have 4 digits

                               user     system      total        real
weighted_shuffle           0.000000   0.000000   0.000000 (  0.000060)
weighted_shuffle_variant   0.000000   0.000000   0.000000 (  0.000089)

elements.size=>100, weights have 2 digits

                               user     system      total        real
weighted_shuffle           0.000000   0.000000   0.000000 (  0.000666)
weighted_shuffle_variant   0.000000   0.000000   0.000000 (  0.000871)

elements.size=>100, weights have 4 digits

                               user     system      total        real
weighted_shuffle           0.000000   0.000000   0.000000 (  0.000625)
weighted_shuffle_variant   0.000000   0.000000   0.000000 (  0.000803)

elements.size=>100, weights have 6 digits

                               user     system      total        real
weighted_shuffle           0.000000   0.000000   0.000000 (  0.000664)
weighted_shuffle_variant   0.000000   0.000000   0.000000 (  0.000773)

weighted_shuffle, weighted_shuffle_variant () elements, ( ). , pop , . , . , weighted_shuffle . , .

+1

, , :

module Utils
  def self.random_suffle_with_weight(elements, &proc)
    # Create a consecutive chain of element
    # on which every element is represented
    # as many times as its weight.
    consecutive_chain = []
    elements.each do |element|
      proc.call(element).times { consecutive_chain << element }
    end

    # Choosine one element randomly from
    # the consecutive_chain and remove it for the next round
    # until all elements has been chosen.
    shorted_elements = []
    while(shorted_elements.length < elements.length)
      random_index = Kernel.rand(consecutive_chain.length)
      selected_element = consecutive_chain[random_index]
      shorted_elements << selected_element
      consecutive_chain.delete(selected_element)
    end

    shorted_elements
  end
end

:

def test_random_suffle_with_weight
  element_1 = { :id => "ID_1", :weight => 10 }
  element_2 = { :id => "ID_2", :weight => 20 }
  element_3 = { :id => "ID_3", :weight => 60 }
  elements = [element_1, element_2, element_3]

  Kernel.expects(:rand).with(90).returns(11)
  Kernel.expects(:rand).with(70).returns(1)
  Kernel.expects(:rand).with(60).returns(50)

  assert_equal([element_2, element_1, element_3], Utils.random_suffle_with_weight(elements) { |e| e[:weight] })
end
0
elements.flat_map { |h| [h[:id]] * h[:weight] }.shuffle.uniq
0

Weighted Random Sampling (2005; Efraimidis, Spirakis) provides a very elegant algorithm for this. The implementation is super simple and works in O(n log(n)):

def weigthed_shuffle(items, weights):
    order = sorted(range(len(items)), key=lambda i: -random.random() ** (1.0 / weights[i]))
    return [items[i] for i in order]
0
source

All Articles