.
: , , , , .
:
) , " "
"clusternumbers (A), " clusternumbers (B) ".
N = 100, 1000 fcluster , ncluster = 25,
N 100 ncluster 25: 22 + 3 r 117
sizes: av 4 10 9 8 7 6 6 5 5 4 4 4 ...
radii: av 117 202 198 140 134 64 62 28 197 144 148 132 ...
N 1000 cluster 25: 22 + 3 r 197
sizes: av 45 144 139 130 85 84 69 63 43 38 33 30 ...
radii: av 197 213 279 118 146 282 154 245 212 243 226 235 ...
b) 2 3
.
:
def randomclusters( N, ncluster=25, radius=1, box=box ):
""" -> N 2d points: Gaussian clusters, Poisson cluster sizes """
pts = []
lam = eval( str( N // ncluster ))
clustersize = lambda: np.random.poisson(lam - 1) + 1
while len(pts) < N:
u = uniformrandom2(box)
csize = clustersize()
if csize == 1:
pts.append( u )
else:
pts.extend( inbox( gauss2( u, radius, csize )))
return pts[:N]
import scipy.cluster.hierarchy as hier
def fcluster( pts, ncluster, method="average", criterion="maxclust" ):
""" -> (pts, Y pdist, Z linkage, T fcluster, clusterlists)
ncluster = n1 + n2 + ... (including n1 singletons)
av cluster size = len(pts) / ncluster
"""
pts = np.asarray(pts)
Y = scipy.spatial.distance.pdist( pts )
Z = hier.linkage( Y, method )
T = hier.fcluster( Z, ncluster, criterion=criterion )
clusters = clusterlists(T)
return (pts, Y, Z, T, clusters)
def clusterlists(T):
""" T = hier.fcluster( Z, t ) e.g. [a b a b c a]
-> [ [0 2 5] [1 3] ] sorted by len, no singletons [4]
"""
clists = [ [] for j in range( max(T) + 1 )]
for j, c in enumerate(T):
clists[c].append( j )
clists.sort( key=len, reverse=True )
n1 = np.searchsorted( map( len, clists )[::-1], 2 )
return clists[:-n1]
def radius( x ):
""" rms |x - xmid| """
return np.sqrt( np.mean( np.var( x, axis=0 )))
denis source
share