How to avoid renaming dict keys?

Below is a script feature setand frozensetthat I would like to understand and, if possible, replicate to a subclass of collections. MutableSet. (BTW, this function is not just weird setand frozenset: it is actively tested in Python unit tests for these types.)

The script performs the following steps for each of several types / classes of type objects:

  • create a dict dwhose keys nare special instrumental integers that keep track of how many times their method __hash__is called ( dvalues ​​are everything None, but that doesn't matter);
  • calculate (and save later) the cumulative number of times the __hash__key method has dbeen called so far (i.e., at creation time d);
  • create an object of the scurrent type type / class, using das an argument for the constructor (therefore, the dkeys will become the contents of the resulting object, while the values dwill be ignored);
  • repeat the calculation described in (2);
  • derive the results of calculations from (2) and (4) above.

Here's the output for the case when nset to 10 for all types / classes (I give the full code at the end of this post):

set: 10 10
frozenset: 10 10
Set: 10 20
myset: 10 20

: set frozenset d __hash__ d, . , d set myset. , d '__hash__ .

myset (. ), d - d ?

!

from sets import Set
from collections import MutableSet

class hash_counting_int(int):
    def __init__(self, *args):
        self.count = 0
    def __hash__(self):
        self.count += 1
        return int.__hash__(self)

class myset(MutableSet):
    def __init__(self, iterable=()):
        # The values of self.dictset matter!  See further notes below.
        self.dictset = dict((item, i) for i, item in enumerate(iterable))

    def __bomb(s, *a, **k): raise NotImplementedError
    add = discard = __contains__ = __iter__ = __len__ = __bomb

def test_do_not_rehash_dict_keys(thetype, n=1):
    d = dict.fromkeys(hash_counting_int(k) for k in xrange(n))
    before = sum(elem.count for elem in d)
    s = thetype(d)
    after = sum(elem.count for elem in d)
    return before, after

for t in set, frozenset, Set, myset:
    before, after = test_do_not_rehash_dict_keys(t, 10)
    print '%s: %d %d' % (t.__name__, before, after)

, self.dictset () iterable.values() ( , iterable.values )! ( , ), , iterable dict ( ), values , , , values of self.dictset . , , self.dictset.update(iterable), , __hash__. ( , , self.dictset.update(iterable), , iterable self.dictset.update, .)

: 1) myset.dictset; 2) myset.__bomb__ myset.__bomb.

+5
1

, x dict .

:

class myset(MutableSet):
    def __init__(self, iterable=()):
        self.dictset = {}
        self.dictset.update(iterable)

    def __bomb__(s, *a, **k): raise NotImplementedError
    add = discard = __contains__ = __iter__ = __len__ = __bomb__

:

set: 10 10
frozenset: 10 10
Set: 10 20
myset: 10 10

update , iterable , Python , . , dict , . dict(mapping), genex. , , . dict.fromkeys(mapping, default_val): , , , . , ; , , .

, , , - . , , , .

, : . :

  • , Python , , . Python , , , . Python . __hash__ . ( , Python - .)

  • - , , dict, . Python. , .

Python , dict, 1 ; , ! , . fromkeys Python, , - 2 , . , Python fromkeys, ; . . .

- - . , dict, , .

>>> a = dict((hash_counting_int(x), []) for x in range(10))
>>> [x.count for x in a.keys()]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
>>> b = dict(a)
>>> [x.count for x in a.keys()]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
>>> for n, v in enumerate(b.itervalues()):
...     v.append(n)
... 
>>> [x.count for x in a.keys()]
[1, 1, 1, 1, 1, 1, 1, 1, 1, 1]
>>> b
{0: [0], 1: [1], 2: [2], 3: [3], 4: [4], 5: [5], 6: [6], 7: [7], 8: [8], 9: [9]}

, , , dict . , , .

+2

All Articles