Advertisement
Guest User

Untitled

a guest
Jun 19th, 2019
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.64 KB | None | 0 0
  1. Sp Mt Value count
  2. 0 MM1 S1 a **3**
  3. 1 MM1 S1 n 2
  4. 2 MM1 S3 cb 5
  5. 3 MM2 S3 mk **8**
  6. 4 MM2 S4 bg **10**
  7. 5 MM2 S4 dgd 1
  8. 6 MM4 S2 rd 2
  9. 7 MM4 S2 cb 2
  10. 8 MM4 S2 uyi **7**
  11.  
  12. 0 MM1 S1 a **3**
  13. 1 3 MM2 S3 mk **8**
  14. 4 MM2 S4 bg **10**
  15. 8 MM4 S2 uyi **7**
  16.  
  17. Sp Mt Value count
  18. 4 MM2 S4 bg 10
  19. 5 MM2 S4 dgd 1
  20. 6 MM4 S2 rd 2
  21. 7 MM4 S2 cb 8
  22. 8 MM4 S2 uyi 8
  23.  
  24. MM2 S4 bg 10
  25. MM4 S2 cb 8
  26. MM4 S2 uyi 8
  27.  
  28. In [1]: df
  29. Out[1]:
  30. Sp Mt Value count
  31. 0 MM1 S1 a 3
  32. 1 MM1 S1 n 2
  33. 2 MM1 S3 cb 5
  34. 3 MM2 S3 mk 8
  35. 4 MM2 S4 bg 10
  36. 5 MM2 S4 dgd 1
  37. 6 MM4 S2 rd 2
  38. 7 MM4 S2 cb 2
  39. 8 MM4 S2 uyi 7
  40.  
  41. In [2]: df.groupby(['Mt'], sort=False)['count'].max()
  42. Out[2]:
  43. Mt
  44. S1 3
  45. S3 8
  46. S4 10
  47. S2 7
  48. Name: count
  49.  
  50. In [3]: idx = df.groupby(['Mt'])['count'].transform(max) == df['count']
  51.  
  52. In [4]: df[idx]
  53. Out[4]:
  54. Sp Mt Value count
  55. 0 MM1 S1 a 3
  56. 3 MM2 S3 mk 8
  57. 4 MM2 S4 bg 10
  58. 8 MM4 S2 uyi 7
  59.  
  60. In [5]: df['count_max'] = df.groupby(['Mt'])['count'].transform(max)
  61.  
  62. In [6]: df
  63. Out[6]:
  64. Sp Mt Value count count_max
  65. 0 MM1 S1 a 3 3
  66. 1 MM1 S1 n 2 3
  67. 2 MM1 S3 cb 5 8
  68. 3 MM2 S3 mk 8 8
  69. 4 MM2 S4 bg 10 10
  70. 5 MM2 S4 dgd 1 10
  71. 6 MM4 S2 rd 2 7
  72. 7 MM4 S2 cb 2 7
  73. 8 MM4 S2 uyi 7 7
  74.  
  75. df.sort_values('count', ascending=False).drop_duplicates(['Sp','Mt'])
  76.  
  77. In [365]: import pandas as pd
  78.  
  79. In [366]: df = pd.DataFrame({
  80. 'sp' : ['MM1', 'MM1', 'MM1', 'MM2', 'MM2', 'MM2', 'MM4', 'MM4','MM4'],
  81. 'mt' : ['S1', 'S1', 'S3', 'S3', 'S4', 'S4', 'S2', 'S2', 'S2'],
  82. 'val' : ['a', 'n', 'cb', 'mk', 'bg', 'dgb', 'rd', 'cb', 'uyi'],
  83. 'count' : [3,2,5,8,10,1,2,2,7]
  84. })
  85.  
  86. In [367]: df
  87. Out[367]:
  88. count mt sp val
  89. 0 3 S1 MM1 a
  90. 1 2 S1 MM1 n
  91. 2 5 S3 MM1 cb
  92. 3 8 S3 MM2 mk
  93. 4 10 S4 MM2 bg
  94. 5 1 S4 MM2 dgb
  95. 6 2 S2 MM4 rd
  96. 7 2 S2 MM4 cb
  97. 8 7 S2 MM4 uyi
  98.  
  99.  
  100. ### Apply idxmax() and use .loc() on dataframe to filter the rows with max values:
  101. In [368]: df.loc[df.groupby(["sp", "mt"])["count"].idxmax()]
  102. Out[368]:
  103. count mt sp val
  104. 0 3 S1 MM1 a
  105. 2 5 S3 MM1 cb
  106. 3 8 S3 MM2 mk
  107. 4 10 S4 MM2 bg
  108. 8 7 S2 MM4 uyi
  109.  
  110. ### Just to show what values are returned by .idxmax() above:
  111. In [369]: df.groupby(["sp", "mt"])["count"].idxmax().values
  112. Out[369]: array([0, 2, 3, 4, 8])
  113.  
  114. df = pd.DataFrame({
  115. 'sp' : ['MM1', 'MM1', 'MM1', 'MM2', 'MM2', 'MM2', 'MM4', 'MM4', 'MM4'],
  116. 'mt' : ['S1', 'S1', 'S3', 'S3', 'S4', 'S4', 'S2', 'S2', 'S2'],
  117. 'val' : ['a', 'n', 'cb', 'mk', 'bg', 'dgb', 'rd', 'cb', 'uyi'],
  118. 'count' : [3,2,5,8,10,1,2,2,7]
  119. })
  120.  
  121. df_grouped = df.groupby(['sp', 'mt']).agg({'count':'max'})
  122.  
  123. df_grouped = df_grouped.reset_index()
  124.  
  125. df_grouped = df_grouped.rename(columns={'count':'count_max'})
  126.  
  127. df = pd.merge(df, df_grouped, how='left', on=['sp', 'mt'])
  128.  
  129. df = df[df['count'] == df['count_max']]
  130.  
  131. df[df['count'] == df.groupby(['Mt'])['count'].transform(max)]
  132.  
  133. df['date']=pd.to_datetime(df['date'])
  134.  
  135. idx=df.groupby(by='ad_id')['date'].idxmax()
  136.  
  137. df_max=df.loc[idx,]
  138.  
  139. ad_id price date
  140. 7 22 2 2018-06-11
  141. 6 23 2 2018-06-22
  142. 2 24 2 2018-06-30
  143. 3 28 5 2018-06-22
  144.  
  145. df.sort_values('count').drop_duplicates(['Sp','Mt'],keep='last')
  146. Out[190]:
  147. Sp Mt Value count
  148. 0 MM1 S1 a 3
  149. 2 MM1 S3 cb 5
  150. 8 MM4 S2 uyi 7
  151. 3 MM2 S3 mk 8
  152. 4 MM2 S4 bg 10
  153.  
  154. df.sort_values('count').groupby(['Sp', 'Mt']).tail(1)
  155. Out[52]:
  156. Sp Mt Value count
  157. 0 MM1 S1 a 3
  158. 2 MM1 S3 cb 5
  159. 8 MM4 S2 uyi 7
  160. 3 MM2 S3 mk 8
  161. 4 MM2 S4 bg 10
  162.  
  163. df = pd.DataFrame({
  164. 'sp' : ['MM1', 'MM1', 'MM1', 'MM2', 'MM2', 'MM2', 'MM4', 'MM4','MM4'],
  165. 'mt' : ['S1', 'S1', 'S3', 'S3', 'S4', 'S4', 'S2', 'S2', 'S2'],
  166. 'val' : ['a', 'n', 'cb', 'mk', 'bg', 'dgb', 'rd', 'cb', 'uyi'],
  167. 'count' : [3,2,5,8,10,1,2,2,7]
  168. })
  169.  
  170. df.groupby(['sp', 'mt']).apply(lambda grp: grp.nlargest(1, 'count'))
  171.  
  172. df = pd.DataFrame({
  173. 'Sp' : ['MM1', 'MM1', 'MM1', 'MM2', 'MM2', 'MM2', 'MM4', 'MM4', 'MM4'],
  174. 'Mt' : ['S1', 'S1', 'S3', 'S3', 'S4', 'S4', 'S2', 'S2', 'S2'],
  175. 'Val' : ['a', 'n', 'cb', 'mk', 'bg', 'dgb', 'rd', 'cb', 'uyi'],
  176. 'Count' : [3,2,5,8,10,1,2,2,7]
  177. })
  178.  
  179. df.groupby('Mt')
  180. .apply(lambda group: group[group.Count == group.Count.max()])
  181. .reset_index(drop=True)
  182.  
  183. sp mt val count
  184. 0 MM1 S1 a 3
  185. 1 MM4 S2 uyi 7
  186. 2 MM2 S3 mk 8
  187. 3 MM2 S4 bg 10
  188.  
  189. In [85]: import pandas as pd
  190.  
  191. In [86]: df = pd.DataFrame({
  192. ...: 'sp' : ['MM1', 'MM1', 'MM1', 'MM2', 'MM2', 'MM2', 'MM4', 'MM4','MM4'],
  193. ...: 'mt' : ['S1', 'S1', 'S3', 'S3', 'S4', 'S4', 'S2', 'S2', 'S2'],
  194. ...: 'val' : ['a', 'n', 'cb', 'mk', 'bg', 'dgb', 'rd', 'cb', 'uyi'],
  195. ...: 'count' : [3,2,5,8,10,1,2,2,7]
  196. ...: })
  197.  
  198. ## Apply nlargest(1) to find the max val df, and nlargest(n) gives top n values for df:
  199. In [87]: df.groupby(["sp", "mt"]).apply(lambda x: x.nlargest(1, "count")).reset_index(drop=True)
  200. Out[87]:
  201. count mt sp val
  202. 0 3 S1 MM1 a
  203. 1 5 S3 MM1 cb
  204. 2 8 S3 MM2 mk
  205. 3 10 S4 MM2 bg
  206. 4 7 S2 MM4 uyi
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement