More efficient way to count intersections?

I have a list of 300,000 lists (fiber tracks), where each track is a list of (x, y, z) tuples / coordinates:

tracks=
[[(1,2,3),(3,2,4),...]
 [(4,2,1),(5,7,3),...]
 ...
]

I also have a mask group, where each mask is defined as a list of (x, y, z) tuples / coordinates:

mask_coords_list=
[[(1,2,3),(8,13,4),...]
 [(6,2,2),(5,7,3),...]
 ...
]

I am trying to find for all possible pairs of masks:

  • the number of tracks that intersect each mask-mask pair (to create a connection matrix)
  • a subset of tracks that intersect each mask to add 1 to each coordinate (x, y, z) for each track in the subset (to create an β€œimage density”)

I am currently doing part 1 as follows:

def mask_connectivity_matrix(tracks,masks,masks_coords_list):
    connect_mat=zeros((len(masks),len(masks)))
    for track in tracks:
        cur=[]
        for count,mask_coords in enumerate(masks_coords_list):
            if any(set(track) & set(mask_coords)):
                cur.append(count)
            for x,y in list(itertools.combinations(cur,2)):
                connect_mat[x,y] += 1

and part 2 as follows:

def mask_tracks(tracks,masks,masks_coords_list):
    vox_tracks_img=zeros((xdim,ydim,zdim,len(masks)))
    for track in tracks:
        for count,mask in enumerate(masks_coords_list):
            if any(set(track) & set(mask)):
                for x,y,z in track:
                    vox_tracks_img[x,y,z,count] += 1

, - , 70 . , ?

+5
6

scipy.sparse.sparse.csc.

v - , m - , t - .
M - csc-, (m x v), a 1 at (i, j) , voxel j.
T - csc, (t x v), a 1 at (k, j) , k voxel j.

Overlap = (M * T.transpose() > 0)  # track T overlaps mask M  
Connected = (Overlap * Overlap.tranpose() > 0) # Connected masks
Density[mask_idx] = numpy.take(T, nonzero(Overlap[mask_idx, :])[0], axis=0).sum(axis=0)

, , , css_matrices . .


, , . 2 MacBook. csr_matrices, 4 . , , .

from numpy import *
from scipy.sparse import csc_matrix

nvox = 1000000
ntracks = 300000
nmask = 100

# create about 100 entries per track
tcoords = random.uniform(0, ntracks, ntracks * 100).astype(int)
vcoords = random.uniform(0, nvox, ntracks * 100).astype(int)
d = ones(ntracks * 100)
T = csc_matrix((d,  vstack((tcoords, vcoords))), shape=(ntracks, nvox), dtype=bool)

# create around 10000 entries per mask
mcoords = random.uniform(0, nmask, nmask * 10000).astype(int)
vcoords = random.uniform(0, nvox, nmask * 10000).astype(int)
d = ones(nmask * 10000)
M = csc_matrix((d, vstack((mcoords, vcoords))), shape=(nmask, nvox), dtype=bool)

Overlap = (M * T.transpose()).astype(bool) # mask M overlaps track T
Connected = (Overlap * Overlap.transpose()).astype(bool) # mask M1 and M2 are connected
Density = Overlap * T.astype(float) # number of tracks overlapping mask M summed across voxels
+3

, , , , -, . , .

, , , .

import numpy
from collections import defaultdict

def by_point(sets):
    d = defaultdict(list)
    for i, s in enumerate(sets):
        for pt in s:
            d[pt].append(i)
    return d

def calc(xdim, ydim, zdim, mask_coords_list, tracks):
    masks_by_point = by_point(mask_coords_list)
    tracks_by_point = by_point(tracks)

    a = numpy.zeros((len(mask_coords_list), len(tracks)), dtype=int)
    for pt, maskids in masks_by_point.iteritems():
        for trackid in tracks_by_point.get(pt, ()):
            a[maskids, trackid] = 1
    m = numpy.matrix(a)

, , m * m.T.

, . triu .

    am = m * m.T  # calculate adjacency matrix
    am = numpy.triu(am, 1)  # keep only upper triangle
    am = am.A  # convert matrix back to array

.

    vox_tracks_img = numpy.zeros((xdim, ydim, zdim, len(mask_coords_list)), dtype=int)
    for trackid, track in enumerate(tracks):
        for x, y, z in track:
            vox_tracks_img[x, y, z, :] += a[:,trackid]
    return am, vox_tracks_img

, .

, , - , masks_by_point .

+1

, . , , .

def mask_connectivity_matrix_and_tracks(tracks,masks,masks_coords_list):
    connect_mat=zeros((len(masks),len(masks)))
    vox_tracks_img=zeros((xdim,ydim,zdim,len(masks)))
    for track in tracks:
        cur=[]
        for count,mask_coords in enumerate(masks_coords_list):
            if any(set(track) & set(mask_coords)):
                cur.append(count)
                for x,y,z in track:
                    vox_tracks_img[x,y,z,count] += 1
            for x,y in itertools.combinations(cur,2):
                connect_mat[x,y] += 1

, , , "", ", ", - Cython c- python.

0

: (1,2,3), (1,2,4), (1,3,1) : {1: [{2: set([3, 4])}, {3: set([1])}]}, ... , ,

0

( -O, ) :

  • set : , "" ,
  • if any(someset): if someset:,

, .

0

Lame , , , :

Sets of small integers can be modeled as bit vectors using long Python ints. Suppose you replace each tuple with a small integer id, and then convert each track and each set of mask masks into a set of these small identifiers. You can represent these sets as long ints, making the bit-crossing operation faster (but not asymptotically faster).

0
source

All Articles