Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Нахождение минимального и максимального значения VALUE по каждому ID.
- # Расчёт промежутки времени DATE_DIFF, который прошел между этими событиями DATE.
- import pandas as pd
- data = [[1, 123124, 2000],
- [1, 324324, 2001],
- [1, 345345, 2002],
- [1, 324430, 2003],
- [2, 465446, 2011],
- [2, 545644, 2010],
- [2, 545454, 2012],
- [2, 687877, 2005]]
- df = pd.DataFrame(data, columns=['ID', 'VALUE', 'DATE'])
- df['MIN_VALUE'] = df.groupby(['ID'])['VALUE'].transform('min')
- df['MAX_VALUE'] = df.groupby(['ID'])['VALUE'].transform('max')
- df['DATE_MIN'] = None
- df['DATE_MAX'] = None
- for id in df['ID'].unique():
- group = df[df['ID'] == id]
- min = group['VALUE'].min()
- max = group['VALUE'].max()
- date_min = group[group['VALUE'] == min]['DATE'].min()
- date_max = group[group['VALUE'] == max]['DATE'].max()
- df.loc[df['ID'] == id, 'DATE_MIN'] = date_min
- df.loc[df['ID'] == id, 'DATE_MAX'] = date_max
- df['DATE_DIFF'] = df['DATE_MIN'] - df['DATE_MAX']
- df_2 = df[['ID', 'MIN_VALUE', 'MAX_VALUE', 'DATE_MIN', 'DATE_MAX', 'DATE_DIFF']].drop_duplicates()
- df_2
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement