Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- np.random.seed(1)
- # Setting up dummy data
- df = pd.DataFrame(np.random.rand(9, 7), columns = [i for i in range(5)] + ['H1', 'H2'])
- df['H1'] = ['M1'] * 5 + ['M2'] * 4
- df['H2'] = ['All', 'c1', 'c2', 'c3', 'c4', 'All', 'c1', 'c2', 'c3']
- # Mark columns to be aggregated.
- numeric_cols = [0,1,2,3,4]
- # Exclude columns with 'All' indicator and compute the summary.
- summary = df.loc[df.H2 != 'All'].groupby('H1')[numeric_cols].sum()
- # Assign the summary values; relies on H1 being sorted.
- df.loc[df.H2 == 'All', numeric_cols] = summary.values
- # Result
- 0 1 2 3 4 H1 H2
- 0 1.511040 2.258802 1.746791 2.275381 2.678103 M1 All
- 1 0.345561 0.396767 0.538817 0.419195 0.685220 M1 c1
- 2 0.027388 0.670468 0.417305 0.558690 0.140387 M1 c2
- 3 0.968262 0.313424 0.692323 0.876389 0.894607 M1 c3
- 4 0.169830 0.878143 0.098347 0.421108 0.957890 M1 c4
- 5 0.984590 0.955375 1.371367 1.359280 1.276477 M2 All
- 6 0.280444 0.789279 0.103226 0.447894 0.908596 M2 c1
- 7 0.130029 0.019367 0.678836 0.211628 0.265547 M2 c2
- 8 0.574118 0.146729 0.589306 0.699758 0.102334 M2 c3
- # Division
- # Times two assumes that the current totals are correct
- df2 = 2 * df.groupby('H1').apply(lambda gp: gp[numeric_cols].div(gp[numeric_cols].sum()))
- 0 1 2 3 4
- 0 1.000000 1.000000 1.000000 1.000000 1.000000
- 1 0.228691 0.175654 0.308461 0.184230 0.255860
- 2 0.018125 0.296824 0.238898 0.245537 0.052420
- 3 0.640791 0.138757 0.396340 0.385161 0.334045
- 4 0.112393 0.388765 0.056301 0.185071 0.357675
- 5 1.000000 1.000000 1.000000 1.000000 1.000000
- 6 0.284833 0.826146 0.075272 0.329508 0.711800
- 7 0.132064 0.020272 0.495006 0.155691 0.208031
- 8 0.583103 0.153582 0.429721 0.514801 0.080169
Add Comment
Please, Sign In to add comment