Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- rand = np.random.RandomState(1)
- df = pd.DataFrame({'A': ['foo', 'bar', 'baz'] * 10,
- 'B': [rand.choice(['cat', 'dog', 'fish', 'pig', 'cow']) for i in range(30)],
- 'C': 1})
- >> df.head(5)
- A B C
- 0 foo pig 1
- 1 bar cow 1
- 2 baz cat 1
- 3 foo dog 1
- 4 bar pig 1
- >> d = df.groupby(['A','B']).sum();
- >> d = d.groupby(level=0, group_keys=False).apply(lambda x: x.sort_values('C', ascending=False)); d
- C
- A B
- bar dog 4
- cow 2
- fish 2
- cat 1
- pig 1
- baz cow 4
- cat 3
- fish 2
- dog 1
- foo dog 4
- cow 3
- pig 2
- cat 1
- def summarise(l, n=10, name='Other'):
- h = l.head(n)
- idx = l.index[0]
- if isinstance(idx, (list, tuple)):
- prefix = list(idx[:-1])
- else:
- prefix = []
- return h.append(pd.DataFrame([l.tail(-n).sum()], columns=l.columns, index=[tuple(prefix+[name])]))
- >> summarise(d, n=2)
- C
- A B
- bar dog 4
- cow 2
- Other 24
- A B C
- bar dog 4
- bar cow 2
- bar Other 4
- baz cow 4
- baz cat 3
- baz Other 3
- foo dog 4
- foo cow 3
- foo Other 3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement