Groupby () gives an empty list

I executed the following script:

from itertools import groupby
from pprint import pprint as prnt
dt = [('23271800', 0.00066790780636275307),
 ('23271812', 0.0010018617095441298),
 ('26112103', 0.00066790780636275307),
 ('27111616', 0.0056772163540834012),
 # ... many lines deleted ...
 ('40161500', 0.00040074468381765189)
]

agg = groupby(dt, lambda x: x[0])
lst = list(agg)
lst1 = map(lambda x: (x[0], list(x[1])), lst)
prnt(lst1)

For an element, '23271800'it must report [('23271800', 0.00066790780636275307)]as the corresponding groupby element. However, I get the wrong conclusion.

[('23271800', []),
 ('23271812', []),
 ('26112103', []),
 ('27111616', []),
 # ... many lines deleted ...
 ('40161500', [('40161500', 0.00040074468381765189)])]

Need help understanding if I am doing something wrong here.

PS: Code Insertion: http://codepad.org/cCd8DfoT

+4
source share
3 answers
d = [(key, list(group)) for key, group in groupby(dt, lambda x: x[0])]
prnt(d)

groupby will return the key and generator for the group, for each group found.

Exit

[('23271800', [('23271800', 0.0006679078063627531)]),
 ('23271812', [('23271812', 0.0010018617095441298)]),
 ('26112103', [('26112103', 0.0006679078063627531)]),
 ('27111616', [('27111616', 0.005677216354083401)]),
 ('30101600',
  [('30101600', 1.3909064158636346e-05), ('30101600', 0.002002905238843634)]),
 ('30102200', [('30102200', 0.00013358156127255062)]),
 ('31100000', [('31100000', 2.1849453575689805e-05)]),
 ('31161500', [('31161500', 0.0005180729752775727)]),
 ('31161501', [('31161501', 0.00012902764441098641)]),
 ('31161505', [('31161505', 0.013866049271881438)]),
 ('31161513', [('31161513', 0.021559049445886335)]),
 ('31161518', [('31161518', 0.0011596016382808651)]),
 ('31161520', [('31161520', 0.022263593545425106)]),
 ('31161600', [('31161600', 0.003930380552826971)]),
 ('31161618', [('31161618', 0.0016029787352706075)]),
 ('31161620', [('31161620', 0.0008462931211056002)]),
 ('31161700', [('31161700', 0.0008833842874611101)]),
 ('31161716', [('31161716', 7.067074299688881e-05)]),
 ('31161717', [('31161717', 0.0014193040885208503)]),
 ('31161727', [('31161727', 0.01364664212812536)]),
 ('31161801', [('31161801', 0.000179280516444739)]),
 ('31161900',
  [('31161900', 1.6624352427769844e-05), ('31161900', 0.0001496191718499286)]),
 ('31161904', [('31161904', 6.666007460763289e-05)]),
 ('31162409', [('31162409', 0.007129527514430318)]),
 ('31162800',
  [('31162800', 0.0002625302360269781),
   ('31162800', 0.359403893120933),
   ('31162800', 0.2207879284986886),
   ('31162800', 0.0002625302360269781)]),
 ('31163200',
  [('31163200', 0.00037295581888139136),
   ('31163200', 4.1439535431265705e-05)]),
 ('31163201', [('31163201', 0.011292216638533014)]),
 ('31163202',
  [('31163202', 4.5417730832667214e-05),
   ('31163202', 4.5417730832667214e-05)]),
 ('31163203', [('31163203', 0.003471418917146539)]),
 ('31163204', [('31163204', 0.0002962025923869601)]),
 ('31163214', [('31163214', 0.0014119501813264418)]),
 ('31163215', [('31163215', 0.017772155543217604)]),
 ('31171504', [('31171504', 0.05423235622453355)]),
 ('31181600', [('31181600', 5.262772981769086e-05)]),
 ('31181602', [('31181602', 0.00019920057382748777)]),
 ('31191518', [('31191518', 0.0014972878296483697)]),
 ('39121719', [('39121719', 0.0022708865416333607)]),
 ('40141600', [('40141600', 5.0614113112184855e-05)]),
 ('40141607', [('40141607', 0.0958030259751574)]),
 ('40141616',
  [('40141616', 0.005499007768977646), ('40141616', 0.00015275021580493458)]),
 ('40141636', [('40141636', 0.0007247510239255406)]),
 ('40141680', [('40141680', 0.12267031972561518)]),
 ('40142000', [('40142000', 0.0002962025923869601)]),
 ('40142100', [('40142100', 8.188292818389522e-05)]),
 ('40142315', [('40142315', 0.00034758467473980007)]),
 ('40142323', [('40142323', 0.0006308018171203779)]),
 ('40161500', [('40161500', 0.0004007446838176519)])]
+1
source

The iterator returned itertools.groupby()is a bit hard to use. As the documentation says :

, groupby(). , groupby() , .

, , groupby(). , , . :

>>> from itertools import groupby
>>> groups = list(groupby('AAABBBCCC'))
>>> groups
[('A', <itertools._grouper object at 0x107155490>),
 ('B', <itertools._grouper object at 0x1071553d0>),
 ('C', <itertools._grouper object at 0x107155d50>)]
>>> list(groups[0][1])
[]

:

, , .

:

>>> groups = [(key, list(group)) for key, group in groupby('AAABBBCCC')]
>>> groups[0][1]
['A', 'A', 'A']

, , . , :

for key, group in groupby('AAABBBCCC'):
    for item in group:
        # do something with item
+7

, . itertools, , . toolz.

+1

All Articles