Advertisement
vmamontov

different_between_min_max_in_group

Nov 30th, 2022 (edited)
718
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.24 KB | None | 0 0
  1. # Нахождение минимального и максимального значения VALUE по каждому ID.
  2. # Расчёт промежутки времени DATE_DIFF, который прошел между этими событиями DATE.
  3.  
  4. import pandas as pd
  5.  
  6.  
  7. data = [[1, 123124, 2000],
  8.         [1, 324324, 2001],
  9.         [1, 345345, 2002],
  10.         [1, 324430, 2003],
  11.         [2, 465446, 2011],
  12.         [2, 545644, 2010],
  13.         [2, 545454, 2012],
  14.         [2, 687877, 2005]]
  15.  
  16. df = pd.DataFrame(data, columns=['ID', 'VALUE', 'DATE'])
  17.  
  18. df['MIN_VALUE'] = df.groupby(['ID'])['VALUE'].transform('min')
  19. df['MAX_VALUE'] = df.groupby(['ID'])['VALUE'].transform('max')
  20. df['DATE_MIN'] = None
  21. df['DATE_MAX'] = None
  22.  
  23. for id in df['ID'].unique():
  24.     group = df[df['ID'] == id]
  25.     min = group['VALUE'].min()
  26.     max = group['VALUE'].max()
  27.  
  28.     date_min = group[group['VALUE'] == min]['DATE'].min()
  29.     date_max = group[group['VALUE'] == max]['DATE'].max()
  30.    
  31.     df.loc[df['ID'] == id, 'DATE_MIN'] = date_min
  32.     df.loc[df['ID'] == id, 'DATE_MAX'] = date_max
  33.  
  34.  
  35. df['DATE_DIFF'] = df['DATE_MIN'] - df['DATE_MAX']
  36.  
  37. df_2 = df[['ID', 'MIN_VALUE', 'MAX_VALUE', 'DATE_MIN', 'DATE_MAX', 'DATE_DIFF']].drop_duplicates()
  38. df_2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement