For your smaller example, with a limited variety of elements, you can use a set and understanding of dict:
>>> mylist = [1,1,1,1,1,1,2,3,2,2,2,2,3,3,4,5,5,5,5] >>> {k:mylist.count(k) for k in set(mylist)} {1: 6, 2: 5, 3: 3, 4: 1, 5: 4}
To break it up, set(mylist) destroys the list and makes it more compact:
>>> set(mylist) set([1, 2, 3, 4, 5])
Then the understanding of the dictionary goes through unique values ββand sets the score from the list.
It is also significantly faster than using a counter and faster than using setdefault:
from __future__ import print_function from collections import Counter from collections import defaultdict import random mylist=[1,1,1,1,1,1,2,3,2,2,2,2,3,3,4,5,5,5,5]*10 def s1(mylist): return {k:mylist.count(k) for k in set(mylist)} def s2(mlist): return Counter(mylist) def s3(mylist): mydict=dict() for index in mylist: mydict[index] = mydict.setdefault(index, 0) + 1 return mydict def s4(mylist): mydict={}.fromkeys(mylist,0) for k in mydict: mydict[k]=mylist.count(k) return mydict def s5(mylist): mydict={} for k in mylist: mydict[k]=mydict.get(k,0)+1 return mydict def s6(mylist): mydict=defaultdict(int) for i in mylist: mydict[i] += 1 return mydict def s7(mylist): mydict={}.fromkeys(mylist,0) for e in mylist: mydict[e]+=1 return mydict if __name__ == '__main__': import timeit n=1000000 print(timeit.timeit("s1(mylist)", setup="from __main__ import s1, mylist",number=n)) print(timeit.timeit("s2(mylist)", setup="from __main__ import s2, mylist, Counter",number=n)) print(timeit.timeit("s3(mylist)", setup="from __main__ import s3, mylist",number=n)) print(timeit.timeit("s4(mylist)", setup="from __main__ import s4, mylist",number=n)) print(timeit.timeit("s5(mylist)", setup="from __main__ import s5, mylist",number=n)) print(timeit.timeit("s6(mylist)", setup="from __main__ import s6, mylist, defaultdict",number=n)) print(timeit.timeit("s7(mylist)", setup="from __main__ import s7, mylist",number=n))
On my machine that prints (Python 3):
18.123854104997008
For larger lists, such as 10 million integers, with more diverse elements (1,500 random numbers), use defaultdict or fromkeys in a loop:
from __future__ import print_function from collections import Counter from collections import defaultdict import random mylist = [random.randint(0,1500) for _ in range(10000000)] def s1(mylist): return {k:mylist.count(k) for k in set(mylist)} def s2(mlist): return Counter(mylist) def s3(mylist): mydict=dict() for index in mylist: mydict[index] = mydict.setdefault(index, 0) + 1 return mydict def s4(mylist): mydict={}.fromkeys(mylist,0) for k in mydict: mydict[k]=mylist.count(k) return mydict def s5(mylist): mydict={} for k in mylist: mydict[k]=mydict.get(k,0)+1 return mydict def s6(mylist): mydict=defaultdict(int) for i in mylist: mydict[i] += 1 return mydict def s7(mylist): mydict={}.fromkeys(mylist,0) for e in mylist: mydict[e]+=1 return mydict if __name__ == '__main__': import timeit n=1 print(timeit.timeit("s1(mylist)", setup="from __main__ import s1, mylist",number=n)) print(timeit.timeit("s2(mylist)", setup="from __main__ import s2, mylist, Counter",number=n)) print(timeit.timeit("s3(mylist)", setup="from __main__ import s3, mylist",number=n)) print(timeit.timeit("s4(mylist)", setup="from __main__ import s4, mylist",number=n)) print(timeit.timeit("s5(mylist)", setup="from __main__ import s5, mylist",number=n)) print(timeit.timeit("s6(mylist)", setup="from __main__ import s6, mylist, defaultdict",number=n)) print(timeit.timeit("s7(mylist)", setup="from __main__ import s7, mylist",number=n))
Print
2825.2697427899984
You can see that solutions that are relayed to count with a moderate number of times in a large list will suffer / disastrously compared to other solutions.