Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Sp Mt Value count
- 0 MM1 S1 a **3**
- 1 MM1 S1 n 2
- 2 MM1 S3 cb 5
- 3 MM2 S3 mk **8**
- 4 MM2 S4 bg **10**
- 5 MM2 S4 dgd 1
- 6 MM4 S2 rd 2
- 7 MM4 S2 cb 2
- 8 MM4 S2 uyi **7**
- 0 MM1 S1 a **3**
- 1 3 MM2 S3 mk **8**
- 4 MM2 S4 bg **10**
- 8 MM4 S2 uyi **7**
- Sp Mt Value count
- 4 MM2 S4 bg 10
- 5 MM2 S4 dgd 1
- 6 MM4 S2 rd 2
- 7 MM4 S2 cb 8
- 8 MM4 S2 uyi 8
- MM2 S4 bg 10
- MM4 S2 cb 8
- MM4 S2 uyi 8
- In [1]: df
- Out[1]:
- Sp Mt Value count
- 0 MM1 S1 a 3
- 1 MM1 S1 n 2
- 2 MM1 S3 cb 5
- 3 MM2 S3 mk 8
- 4 MM2 S4 bg 10
- 5 MM2 S4 dgd 1
- 6 MM4 S2 rd 2
- 7 MM4 S2 cb 2
- 8 MM4 S2 uyi 7
- In [2]: df.groupby(['Mt'], sort=False)['count'].max()
- Out[2]:
- Mt
- S1 3
- S3 8
- S4 10
- S2 7
- Name: count
- In [3]: idx = df.groupby(['Mt'])['count'].transform(max) == df['count']
- In [4]: df[idx]
- Out[4]:
- Sp Mt Value count
- 0 MM1 S1 a 3
- 3 MM2 S3 mk 8
- 4 MM2 S4 bg 10
- 8 MM4 S2 uyi 7
- In [5]: df['count_max'] = df.groupby(['Mt'])['count'].transform(max)
- In [6]: df
- Out[6]:
- Sp Mt Value count count_max
- 0 MM1 S1 a 3 3
- 1 MM1 S1 n 2 3
- 2 MM1 S3 cb 5 8
- 3 MM2 S3 mk 8 8
- 4 MM2 S4 bg 10 10
- 5 MM2 S4 dgd 1 10
- 6 MM4 S2 rd 2 7
- 7 MM4 S2 cb 2 7
- 8 MM4 S2 uyi 7 7
- df.sort_values('count', ascending=False).drop_duplicates(['Sp','Mt'])
- In [365]: import pandas as pd
- In [366]: df = pd.DataFrame({
- 'sp' : ['MM1', 'MM1', 'MM1', 'MM2', 'MM2', 'MM2', 'MM4', 'MM4','MM4'],
- 'mt' : ['S1', 'S1', 'S3', 'S3', 'S4', 'S4', 'S2', 'S2', 'S2'],
- 'val' : ['a', 'n', 'cb', 'mk', 'bg', 'dgb', 'rd', 'cb', 'uyi'],
- 'count' : [3,2,5,8,10,1,2,2,7]
- })
- In [367]: df
- Out[367]:
- count mt sp val
- 0 3 S1 MM1 a
- 1 2 S1 MM1 n
- 2 5 S3 MM1 cb
- 3 8 S3 MM2 mk
- 4 10 S4 MM2 bg
- 5 1 S4 MM2 dgb
- 6 2 S2 MM4 rd
- 7 2 S2 MM4 cb
- 8 7 S2 MM4 uyi
- ### Apply idxmax() and use .loc() on dataframe to filter the rows with max values:
- In [368]: df.loc[df.groupby(["sp", "mt"])["count"].idxmax()]
- Out[368]:
- count mt sp val
- 0 3 S1 MM1 a
- 2 5 S3 MM1 cb
- 3 8 S3 MM2 mk
- 4 10 S4 MM2 bg
- 8 7 S2 MM4 uyi
- ### Just to show what values are returned by .idxmax() above:
- In [369]: df.groupby(["sp", "mt"])["count"].idxmax().values
- Out[369]: array([0, 2, 3, 4, 8])
- df = pd.DataFrame({
- 'sp' : ['MM1', 'MM1', 'MM1', 'MM2', 'MM2', 'MM2', 'MM4', 'MM4', 'MM4'],
- 'mt' : ['S1', 'S1', 'S3', 'S3', 'S4', 'S4', 'S2', 'S2', 'S2'],
- 'val' : ['a', 'n', 'cb', 'mk', 'bg', 'dgb', 'rd', 'cb', 'uyi'],
- 'count' : [3,2,5,8,10,1,2,2,7]
- })
- df_grouped = df.groupby(['sp', 'mt']).agg({'count':'max'})
- df_grouped = df_grouped.reset_index()
- df_grouped = df_grouped.rename(columns={'count':'count_max'})
- df = pd.merge(df, df_grouped, how='left', on=['sp', 'mt'])
- df = df[df['count'] == df['count_max']]
- df[df['count'] == df.groupby(['Mt'])['count'].transform(max)]
- df['date']=pd.to_datetime(df['date'])
- idx=df.groupby(by='ad_id')['date'].idxmax()
- df_max=df.loc[idx,]
- ad_id price date
- 7 22 2 2018-06-11
- 6 23 2 2018-06-22
- 2 24 2 2018-06-30
- 3 28 5 2018-06-22
- df.sort_values('count').drop_duplicates(['Sp','Mt'],keep='last')
- Out[190]:
- Sp Mt Value count
- 0 MM1 S1 a 3
- 2 MM1 S3 cb 5
- 8 MM4 S2 uyi 7
- 3 MM2 S3 mk 8
- 4 MM2 S4 bg 10
- df.sort_values('count').groupby(['Sp', 'Mt']).tail(1)
- Out[52]:
- Sp Mt Value count
- 0 MM1 S1 a 3
- 2 MM1 S3 cb 5
- 8 MM4 S2 uyi 7
- 3 MM2 S3 mk 8
- 4 MM2 S4 bg 10
- df = pd.DataFrame({
- 'sp' : ['MM1', 'MM1', 'MM1', 'MM2', 'MM2', 'MM2', 'MM4', 'MM4','MM4'],
- 'mt' : ['S1', 'S1', 'S3', 'S3', 'S4', 'S4', 'S2', 'S2', 'S2'],
- 'val' : ['a', 'n', 'cb', 'mk', 'bg', 'dgb', 'rd', 'cb', 'uyi'],
- 'count' : [3,2,5,8,10,1,2,2,7]
- })
- df.groupby(['sp', 'mt']).apply(lambda grp: grp.nlargest(1, 'count'))
- df = pd.DataFrame({
- 'Sp' : ['MM1', 'MM1', 'MM1', 'MM2', 'MM2', 'MM2', 'MM4', 'MM4', 'MM4'],
- 'Mt' : ['S1', 'S1', 'S3', 'S3', 'S4', 'S4', 'S2', 'S2', 'S2'],
- 'Val' : ['a', 'n', 'cb', 'mk', 'bg', 'dgb', 'rd', 'cb', 'uyi'],
- 'Count' : [3,2,5,8,10,1,2,2,7]
- })
- df.groupby('Mt')
- .apply(lambda group: group[group.Count == group.Count.max()])
- .reset_index(drop=True)
- sp mt val count
- 0 MM1 S1 a 3
- 1 MM4 S2 uyi 7
- 2 MM2 S3 mk 8
- 3 MM2 S4 bg 10
- In [85]: import pandas as pd
- In [86]: df = pd.DataFrame({
- ...: 'sp' : ['MM1', 'MM1', 'MM1', 'MM2', 'MM2', 'MM2', 'MM4', 'MM4','MM4'],
- ...: 'mt' : ['S1', 'S1', 'S3', 'S3', 'S4', 'S4', 'S2', 'S2', 'S2'],
- ...: 'val' : ['a', 'n', 'cb', 'mk', 'bg', 'dgb', 'rd', 'cb', 'uyi'],
- ...: 'count' : [3,2,5,8,10,1,2,2,7]
- ...: })
- ## Apply nlargest(1) to find the max val df, and nlargest(n) gives top n values for df:
- In [87]: df.groupby(["sp", "mt"]).apply(lambda x: x.nlargest(1, "count")).reset_index(drop=True)
- Out[87]:
- count mt sp val
- 0 3 S1 MM1 a
- 1 5 S3 MM1 cb
- 2 8 S3 MM2 mk
- 3 10 S4 MM2 bg
- 4 7 S2 MM4 uyi
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement