Python: choosing numbers with related probabilities

Possible duplicates:
Random weighted selection
Generate random numbers with a given (numerical) distribution

I have a list list that contains a series of numbers and the probabilities associated with it.

prob_list = [[1, 0.5], [2, 0.25], [3, 0.05], [4, 0.01], [5, 0.09], [6, 0.1]]

for example, the prob_list[0]number 1 has a probability of 0.5 associated with it. Thus, you expect 1 to appear 50% of the time.

How to add weight to numbers when I select them?

NOTE: the number of numbers in the list can vary from 6 to 100


EDIT

In the list I have 6 numbers with probabilities associated with them. I want to choose two numbers based on their probability.

There is no number that can be selected twice. If "2" is selected, it cannot be selected again.

+5
4

, . () . , selected item, updated distribution. , , .

def random_distr(l):
    assert l # don't accept empty lists
    r = random.uniform(0, 1)
    s = 0
    for i in xrange(len(l)):
        item, prob = l[i]
        s += prob
        if s >= r:
            l.pop(i) # remove the item from the distribution
            break
    else: # Might occur because of floating point inaccuracies
        l.pop()
    # update probabilities based on new domain
    d = 1 - prob 
    for i in xrange(len(l)):
        l[i][1] /= d
    return item, l

dist = [[1, 0.5], [2, 0.25], [3, 0.05], [4, 0.01], [5, 0.09], [6, 0.1]]
while dist:
    val, dist = random_distr(dist)
    print val
+1

, 1. , , .

[0, 1], random.random(). , . , , . , (0,5, 0,75) , 2, 0,25 .

import random
import sys
def pick_random(prob_list):
  r, s = random.random(), 0
  for num in prob_list:
    s += num[1]
    if s >= r:
      return num[0]
  print >> sys.stderr, "Error: shouldn't get here"

:

import collections
count = collections.defaultdict(int)
for i in xrange(10000):
  count[pick_random(prob_list)] += 1
for n in count:
  print n, count[n] / 10000.0

:

1 0.498
2 0.25
3 0.0515
4 0.0099
5 0.0899
6 0.1007

: . , , . , (, 0.99999999) , . , 1, .

+4

- ( ). , , , , . , ( , , prob_list).

import random

prob_list = [[1, 0.5], [2, 0.25], [3, 0.05], [4, 0.01], [5, 0.09], [6, 0.1]]

# create a list with the running total of the probabilities
acc = 0.0
acc_list = [acc]
for t in prob_list:
    acc += t[1]
    acc_list.append(acc)

TOLERANCE = .000001
def approx_eq(v1, v2):
    return abs(v1-v2) <= TOLERANCE

def within(low, value, high):
    """ Determine if low >= value <= high (approximately) """
    return (value > low or approx_eq(low, value)) and \
           (value < high or approx_eq(high, value))

def get_selection():
    """ Find which weighted interval a random selection falls in """
    interval = -1
    rand = random.random()
    for i in range(len(acc_list)-1):
        if within(acc_list[i], rand, acc_list[i+1]):
            interval = i
            break
    if interval == -1:
        raise AssertionError('no interval for {:.6}'.format(rand))
    return interval

def get_two_different_nums():
    sel1 = get_selection()
    sel2 = sel1
    while sel2 == sel1:
        sel2 = get_selection()
    return prob_list[sel1][0], prob_list[sel2][0]
+3
source

Perhaps the problem is only with the data structure. It would be easier if you had a dictionary instead of a list of lists:

prob_list = { 1:0.5, 2:0.25, 3:0.05, 4:0.01, 5:0.09, 6:0.1}

Thus, you can get the weight corresponding to the quantity:

import random
number = weight = -1
while not( number in prob_list ):
    number = random.randint( 0, length( prob_list ) )
    weight = prob_list[ number ]

print( number, weight )
+1
source

All Articles