Find integer nearest neighbor in dict

I have dictone that accepts integer keys:

a = {}
a[1] = 100
a[55] = 101
a[127] = 102

I would like to be able to take the closest neighbor when I ask:

a[20] # should return a[1] = 100
a[58] # should return a[55] = 101
a[167] # should return a[127] = 102

Is there a pythonic way to do this? (I suppose this can be done by going all over the dict, but is this probably not the most elegant solution?)


Same question with double index (integers):

 b[90, 1] = 100, b[90, 55] = 101, b[90, 127] = 102
 b[70, 1] = 40, b[70, 45] = 41, b[70, 107] = 42

I would like to get b[73, 40] = b[70, 45] = 41i.e. nearest neighbor in a 2D plane.

+4
source share
6 answers

Something like this:

class CustomDict(dict):
    def __getitem__(self, key):
        try:
            return dict.__getitem__(self, key)
        except KeyError:
            closest_key = min(self.keys(), key=lambda x: abs(x - key))
            return dict.__getitem__(self, closest_key)

Or that:

class CustomDict(dict):
    def __getitem__(self, key):
        if key in self:
            return dict.__getitem__(self, key)
        else:
            closest_key = min(self.keys(), key=lambda x: abs(x - key))
            return dict.__getitem__(self, closest_key)

Both give the following result:

a = CustomDict()
a[1] = 100
a[55] = 101
a[127] = 102

print a[20] # prints 100
print a[58] # prints 101
print a[167] # prints 102

And for the double version of the index:

class CustomDoubleDict(dict):
    def __getitem__(self, key):
        if key in self:
            return dict.__getitem__(self, key)
        else:
            closest_key = min(self.keys(), key=lambda c: (c[0] - key[0]) ** 2 + (c[1] - key[1]) ** 2)
            return dict.__getitem__(self, closest_key)


b = CustomDoubleDict()
b[90, 1] = 100
b[90, 55] = 101
b[90, 127] = 102
b[70, 1] = 40
b[70, 45] = 41
b[70, 107] = 42

print b[73, 40]  # prints 41
print b[70, 45]  # prints 41
+2
source

. , , .


n- :

class NearestDict(dict):
    def __init__(self, ndims):
        super(NearestDict, self).__init__()
        self.ndims = ndims

    # Enforce dimensionality
    def __setitem__(self, key, val):
        if not isinstance(key, tuple): key = (key,)
        if len(key) != self.ndims: raise KeyError("key must be %d dimensions" % self.ndims)
        super(NearestDict, self).__setitem__(key, val)

    @staticmethod
    def __dist(ka, kb):
        assert len(ka) == len(kb)
        return sum((ea-eb)**2 for (ea, eb) in zip(ka, kb))

    # Helper method and might be of use
    def nearest_key(self, key):
        if not isinstance(key, tuple): key = (key,)
        nk = min((k for k in self), key=lambda k: NearestDict.__dist(key, k))
        return nk

    def __missing__(self, key):
        if not isinstance(key, tuple): key = (key,)
        if len(key) != self.ndims: raise KeyError("key must be %d dimensions" % self.ndims)
        return self[self.nearest_key(key)]

:

a = NearestDict(1)
a[1] = 100
a[55] = 101
a[127] = 102
print a[20]    # 100
print a[58]    # 100
print a[167]   # 102
print a.nearest_key(20)     # (1,)
print a.nearest_key(58)     # (55,)
print a.nearest_key(127)    # (127,)

b = NearestDict(2)
b[90, 1]   = 100
b[90, 55]  = 101
b[90, 127] = 102
b[70, 1]   = 40
b[70, 45]  = 41
b[70, 107] = 42
print b[73, 40] # 41
print b.nearest_key((73,40)) # (70, 45)

, , , . , . , , .

Edit:

, Kasra, , , scipy cKDTree:

, regenOnAdd, () KDTree , () :

from scipy.spatial import cKDTree

class KDDict(dict):
    def __init__(self, ndims, regenOnAdd=False):
        super(KDDict, self).__init__()
        self.ndims = ndims
        self.regenOnAdd = regenOnAdd
        self.__keys = []
        self.__tree = None
        self.__stale = False

    # Enforce dimensionality
    def __setitem__(self, key, val):
        if not isinstance(key, tuple): key = (key,)
        if len(key) != self.ndims: raise KeyError("key must be %d dimensions" % self.ndims)
        self.__keys.append(key)
        self.__stale = True
        if self.regenOnAdd: self.regenTree()
        super(KDDict, self).__setitem__(key, val)

    def regenTree(self):
        self.__tree = cKDTree(self.__keys)
        self.__stale = False

    # Helper method and might be of use
    def nearest_key(self, key):
        if not isinstance(key, tuple): key = (key,)
        if self.__stale: self.regenTree()
        _, idx = self.__tree.query(key, 1)
        return self.__keys[idx]

    def __missing__(self, key):
        if not isinstance(key, tuple): key = (key,)
        if len(key) != self.ndims: raise KeyError("key must be %d dimensions" % self.ndims)
        return self[self.nearest_key(key)]

.

(NearestDict, KDDict(True) (regen on insert) KDDict(False) (defer regen)), .

3 . , :

  • : 5 . ( timeit.repeat - 3).
  • : 0 <= x < 1000
  • :. . 10000 .

4- 1000 .

{'NDIMS': 4, 'NITER': 5, 'NELEMS': 1000, 'NFINDS': 10000, 'DIM_LB': 0, 'DIM_UB': 1000, 'SCORE_MUL': 100}
insert::NearestDict       0.125
insert::KDDict(regen)    35.957
insert::KDDict(defer)     0.174
search::NearestDict    2636.965
search::KDDict(regen)    49.965
search::KDDict(defer)    51.880

4- 100 . , , .

{'NDIMS': 4, 'NITER': 5, 'NELEMS': 100, 'NFINDS': 10000, 'DIM_LB': 0, 'DIM_UB': 1000, 'SCORE_MUL': 100}
insert::NearestDict       0.013
insert::KDDict(regen)     0.629
insert::KDDict(defer)     0.018
search::NearestDict     247.920
search::KDDict(regen)    44.523
search::KDDict(defer)    44.718

100 (, ), 12 . , , .

{'NDIMS': 12, 'NITER': 5, 'NELEMS': 100, 'NFINDS': 10000, 'DIM_LB': 0, 'DIM_UB': 1000, 'SCORE_MUL': 100}
insert::NearestDict       0.013
insert::KDDict(regen)     0.722
insert::KDDict(defer)     0.017
search::NearestDict     405.092
search::KDDict(regen)    49.046
search::KDDict(defer)    50.601

KDDict (KDDict(True)) ( ), ( ). - NearestDict KDDict(False), KDDict

KDDict .

KDDict , NearestDict. - .

KDDict , NearestDict.

/, NearestDict , KDDict. 100 1000 NearestDict 9,64x, KDDict 0,16x.

, NearestDict , KDDict. 4 12 NearestDict 0,64x, KDDict 0,13x.

, scipy toolkit, KDDict.

+3

1:

( OrderedDict). , . O (log n).

2: ( )

, dict , dict, . __getitem__, . O (1).

3:

, O (n). , , .

+2

min :

>>> b ={(90, 55): 101, (90, 127): 102, (90, 1): 100}
>>> def nearest(x,y):
...   m=min(((i,j) for i,j in b ),key= lambda v:abs(v[0]-x)+abs(v[1]-y))
...   return b[m]
... 
>>> nearest(40,100)
102
>>> nearest(90,100)
102
>>> b
{(90, 55): 101, (90, 127): 102, (90, 1): 100}
>>> nearest(90,10)
100

- , , , scipy.spatial.KDTree:

scipy.spatial.KDTree(data, leafsize = 10)

kd-tree

k- , .

http://docs.scipy.org/doc/scipy/reference/generated/scipy.spatial.cKDTree.html#scipy-spatial-ckdtree

http://docs.scipy.org/doc/scipy/reference/spatial.distance.html#module-scipy.spatial.distance

+1

O (log n) :

import collections
import bisect


class ProximityDict(collections.MutableMapping):
    def __init__(self, *args, **kwargs):
        self.store = dict()
        self.ordkeys = []
        self.update(dict(*args, **kwargs))

    def __getitem__(self, key):
        try: return self.store[key]
        except KeyError:
            cand = bisect.bisect_left(self.ordkeys, key)
            if cand == 0: return self.store[self.ordkeys[0]]

            return self.store[
                min(self.ordkeys[cand], self.ordkeys[cand-1],
                    key=lambda k: abs(k - key))
            ]

    def __setitem__(self, key, value):
        if not key in self.store: bisect.insort_left(self.ordkeys, key)
        self.store[key] = value

    def __delitem__(self, key):
        del self.store[key]
        del self.keys[bisect.bisect_left(self.ordkeys, key)]

    def __iter__(self):
        return iter(self.store)

    def __len__(self):
        return len(self.store)

The two-dimensional case is much more annoying (you need to store keys arranged in a quadrant), but possibly in a similar way.

I did not code the deletion to have "proximity" behavior as well, but you can also do this.

0
source

Here you will find one python solution that mainly relies on card / filter operations

class NeirestSearchDictionnary1D(dict):
    """ An extended dictionnary that returns the value that is the nearest to 
    the requested key. As it key distance is defined for simple number 
    values, trying to add other keys will throw error. """
    def __init__(self):
        """ Constructor of the dictionnary.
        It only allow to initialze empty dict """
        dict.__init__(self)

    def keyDistance(self, key1, key2):
        """ returns a distance between 2 dic keys """
        return abs(key1-key2)

    def __setitem__(self, key, value):
        """ override  the addition of a couple in the dict."""
        #type checking
        if not (isinstance(key, int) or isinstance(key, float)):
            raise TypeError("The key of such a "+ type(self) + "must be a simple numerical value")
        else:
            dict.__setitem__(self, key, value)

    def __getitem__(self, key):
        """ Override the getting item operation """
        #compute the minial distance
        minimalDistance = min(map(lambda x : self.keyDistance(key, x), self.keys()))
        #get the list of key that minimize the distance
        resultSetKeys = filter(lambda x : self.keyDistance(key, x) <= minimalDistance, self.keys())
        #return the values binded to the keys minimizing the distances.
        return list(map(lambda x : dict.__getitem__(self, x), resultSetKeys))

if __name__ == "__main__":

    dic = NeirestSearchDictionnary1D()
    dic[1] = 100
    dic[55] = 101
    dic[57] = 102
    dic[127] = 103
    print("the entire dict :", dic)
    print("dic of '20'", dic[20])
    print("dic of '56'", dic[56])

Obviously, you can extend this to a two-dimensional dimension with little work.

0
source

All Articles