Cython: performance based on CPP-like card

Question

Cython: performance based on CPP-like card

I have compared the performance of using C ++ maps simulating python dictionaries using regular python dictionaries in cython. I wrote a (simplified) implementation of fast_dict in sklearn ( https://github.com/scikit-learn/scikit-learn/blob/master/sklearn/utils/fast_dict.pyx ). According to the comment in doststring fast_dict, map creation is much slower in case of cpp, but it also looks like a search (which I expected a lot faster).

My implementation basically looks like this (pretty much removed from the sklearn implementation, but modified to include the map from Int-> Int):

from libcpp.map cimport map as cpp_map
from cython.operator cimport derefernce as deref

cdef class IntDict:

    def __init__(self,dict orig_dict={}):
        """loads from an ordinary dict directly"""
        for key,val in orig_dict.iteritems():
            self.my_map[key] = val

    def __setitem__(self, int key, int value):
        self.my_map[key] = value

    def __getitem__(self, int key):
        cdef cpp_map[ITYPE_t,ITYPE_t].iterator it = self.my_map.find(key)
        if it == self.my_map.end():
            raise KeyError('%d' % key)
        return deref(it).second

The .pxd file looks like this (again, mostly from sklearn):

DTYPE = np.float64
ctypedef np.float64_t DTYPE_t

ITYPE = np.intp
ctypedef np.intp_t ITYPE_t

cdef class IntDict:
    cdef cpp_map[ITYPE_t,ITYPE_t] my_mapenter

Here are the tests I developed:

cpdef load_ord_dict(r=100000):
    cdef dict d = {}
    cdef int i,size = len(range(r))

    for i in range(size):
        d[i] = 0

    return d

cpdef load_cpp(r=100000):
    cdef IntDict d = IntDict()
    cdef int i,size = len(range(r))

    for i in range(size):
        d[i] = 0

    return d

cpdef lookup_ord(dict d,r=100000):
    cdef int i,size = len(range(r))

    for i in range(size):
       d[i]

cpdef lookup_cpp(IntDict d,r=100000):
    cdef int i,size = len(range(r))

    for i in range(size):
       d[i]

( linux, mac):

In [3]: timeit(load_ord_dict())
100 loops, best of 3: 4.3 ms per loop

In [4]: timeit(load_cpp())
10 loops, best of 3: 21.7 ms per loop

In [5]: d1 = load_ord_dict()

In [6]: timeit(lookup_ord(d1))
100 loops, best of 3: 3.95 ms per loop

In [7]: d2 = load_cpp()

In [8]: timeit(lookup_cpp(d2))
100 loops, best of 3: 11.8 ms per loop

, cpp ? - ?

UPDATE: unordered_map, . , -, dict :

from libcpp.unordered_map cimport unordered_map as umap

cdef class UnOrdIntDict:

    def __init__(self,dict orig_dict={}):
        for key,val in orig_dict.iteritems():
            """exchange cpp_map[..] in pxd with umap[..] in this case"""
            self.my_map[key] = val

    def __setitem__(self, int key, int value):
        self.my_map[key] = value

    def __getitem__(self,int key):
        """I'm assuming that this works like the ordinary cpp_map"""
        cdef umap[ITYPE_t,ITYPE_t].iterator it = self.my_map.find(key)
        if it == self.my_map.end():
            raise KeyError('%d' % key)
        return deref(it).second


   cpdef load_cpp_unordered(r=100000):
       cdef UnOrdIntDict d = UnOrdIntDict()
       cdef int i,size = len(range(r))

       for i in range(size):
           d[i] = 0

       return d

   cpdef lookup_cpp_unordered(UnOrdIntDict d,r=100000):
       cdef int i,size = len(range(r))

       for i in range(size):
           d[i]

:

In [3]: timeit(load_cpp_unordered())
10 loops, best of 3: 29.8 ms per loop

In [4]: timeit(lookup_cpp_unordered(d3))
100 loops, best of 3: 8.23 ms per loop