Python collections useful tips
unpack
>>> arr=[1,2,3,4]
>>> first,*mid,last = arr
>>> first
1
>>> mid
[2, 3]
>>> last
4
>>> first, *the_rest = arr
>>> first
1
>>> the_rest
[2, 3, 4]
fixed length array (only keep last n items)
>>> from collections import deque
>>> q = deque(maxlen=3)
>>> for i in range(0,10):
… q.append(i)
…
>>> q
deque([7, 8, 9], maxlen=3)
nlargest and nsmallest (heapq)
import heapq
>>> chars = [{'k':chr(ord('z')-n), 'v':n} for n in range(0,26)]
sort by key
>>> heapq.nlargest(3, chars, lambda x: x['k'])
[{'k': 'z', 'v': 0}, {'k': 'y', 'v': 1}, {'k': 'x', 'v': 2}]
>>> heapq.nsmallest(3, chars, lambda x: x['k'])
[{'k': 'a', 'v': 25}, {'k': 'b', 'v': 24}, {'k': 'c', 'v': 23}]
sort by value
>>> heapq.nlargest(3, chars, lambda x: x['v'])
[{'k': 'a', 'v': 25}, {'k': 'b', 'v': 24}, {'k': 'c', 'v': 23}]
>>> heapq.nsmallest(3, chars, lambda x: x['v'])
[{'k': 'z', 'v': 0}, {'k': 'y', 'v': 1}, {'k': 'x', 'v': 2}]
zipped collection can only be consumed once
>>> a=['a','b','c']
>>> b=[1,2,3]
>>> c=zip(a,b)
>>> max(c)
('c', 3)
>>> max(c)
Traceback (most recent call last):
File "<stdin>", line 1, in <module>
ValueError: max() arg is an empty sequence
Apply max and min on dict
>>> chars={'a':3,'b':2,'c':1}
>>> max(chars, lambda x:chars[x])
>>> max(chars, key=lambda x:chars[x])
'a'
>>> min(chars, key=lambda x:chars[x])
'c'
dict operation
add multiple items into dict
>>> d={'a':1}
>>> d.update({'b':2,'c':3})
>>> d
{'a': 1, 'b': 2, 'c': 3}
find common items
>>> d1={'a':1,'b':'b','c':3}
>>> d2={'a':'a','b':2,'c':3}
>>> d1.items()&d2.items()
{('c', 3)}
find common keys
>>> d1.keys()&d2.keys()
{'a', 'b', 'c'}
find different keys
>>> d1.keys()-{'b','c','d'}
{'a'}
sort dictionary by key
>>> arr = [{'name':'john','age':20},{'name':'alice','age':25},{'name':'bob', 'age':30}]
>>> from operator import itemgetter
using itemgetter
>>> sorted(arr, key=itemgetter('name'))
sorted(arr, key = lambda x:x['name']) # using lambda
[{'name': 'alice', 'age': 25}, {'name': 'bob', 'age': 30}, {'name': 'john', 'age': 20}]
>>> sorted(arr, key=itemgetter('age'))
[{'name': 'john', 'age': 20}, {'name': 'alice', 'age': 25}, {'name': 'bob', 'age': 30}]
>>> sorted(arr, key=itemgetter('age'), reverse=True)
[{'name': 'bob', 'age': 30}, {'name': 'alice', 'age': 25}, {'name': 'john', 'age': 20}]
sort by multiple keys
>>> arr = [{'name':'john','age':20},{'name':'alice','age':25},{'name':'bob', 'age':30},{'name':'allen','age':40}]
>>> sorted(arr, key=itemgetter('name','age'))
sorted(arr, key = lambda x:(x['name'],x['age'])) # using lambda
[{'name': 'alice', 'age': 25}, {'name': 'allen', 'age': 40}, {'name': 'bob', 'age': 30}, {'name': 'john', 'age': 20}]
dict groupby
must sort by the group key first
>>> arr.sort(key = lambda x:x['area'])
>>> for area,names in groupby(arr, key=lambda x:x['area']):
… print(area)
… for n in names:
… print(f' {n}')
…
cn
{'name': 'bob', 'area': 'cn'}
{'name': 'frank', 'area': 'cn'}
ru
{'name': 'ali', 'area': 'ru'}
us
{'name': 'john', 'area': 'us'}
{'name': 'leo', 'area': 'us'}
shortcut to copy array
>>> a=[1,2,3,4]
>>> a[:]
[1, 2, 3, 4]
Count most common
>>> arr = [1,2,3,3,1,5,6,7,8,1,2,3,4,3,2]
>>> from collections import Counter
>>> freq = Counter(arr)
find top 3 frequent number
>>> freq.most_common(3)
[(3, 4), (1, 3), (2, 3)]
Apply add and subtract with Counter
>>> c1=Counter({'a':3,'b':4,'c':5})
>>> c2=Counter({'a':1,'b':1,'c':1})
>>> c1-c2
Counter({'c': 4, 'b': 3, 'a': 2})
>>> c1+c2
Counter({'c': 6, 'b': 5, 'a': 4})