Say your x and y coordinates are integers from 0 to n. For small n, a simple method can generate a set of all possible xy-coordinates with np.mgrid, reformat it into an array (nx * ny, 2), then select random strings from this:
nx, ny = 100, 200
xy = np.mgrid[:nx,:ny].reshape(2, -1).T
sample = xy.take(np.random.choice(xy.shape[0], 100, replace=False), axis=0)
, nx / ny , - , .
@morningsun, nx * ny , x, y, nx * ny x, y.
, N- , , :
def sample_comb1(dims, nsamp):
perm = np.indices(dims).reshape(len(dims), -1).T
idx = np.random.choice(perm.shape[0], nsamp, replace=False)
return perm.take(idx, axis=0)
def sample_comb2(dims, nsamp):
idx = np.random.choice(np.prod(dims), nsamp, replace=False)
return np.vstack(np.unravel_index(idx, dims)).T
, :
In [1]: %timeit sample_comb1((100, 200), 100)
100 loops, best of 3: 2.59 ms per loop
In [2]: %timeit sample_comb2((100, 200), 100)
100 loops, best of 3: 2.4 ms per loop
In [3]: %timeit sample_comb1((1000, 2000), 100)
1 loops, best of 3: 341 ms per loop
In [4]: %timeit sample_comb2((1000, 2000), 100)
1 loops, best of 3: 319 ms per loop
scikit-learn, sklearn.utils.random.sample_without_replacement :
from sklearn.utils.random import sample_without_replacement
def sample_comb3(dims, nsamp):
idx = sample_without_replacement(np.prod(dims), nsamp)
return np.vstack(np.unravel_index(idx, dims)).T
In [5]: %timeit sample_comb3((1000, 2000), 100)
The slowest run took 4.49 times longer than the fastest. This could mean that an
intermediate result is being cached
10000 loops, best of 3: 53.2 µs per loop