Advertisement
Guest User

GroupBy: covert to DataFrame, OLS Regression, group predicti

a guest
Jun 28th, 2017
77
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.29 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. """
  3. GroupBy: covert to DataFrame, OLS Regression, group predictions
  4. Created on Mon Jun 12 20:57:32 2017
  5. """
  6. import pandas as pd
  7. import numpy as np
  8. import statsmodels.api as sm
  9. from sklearn.linear_model import LinearRegression
  10.  
  11. #%% Converting a Pandas GroupBy object to DataFrame
  12. #https://stackoverflow.com/questions/10373660/
  13. df1 = pd.DataFrame( {
  14.   "Name" : ["Alice", "Bob", "Mallory", "Mallory", "Bob" , "Mallory"] ,
  15.   "City" : ["Seattle", "Seattle", "Portland", "Seattle", "Seattle", "Portland"] } )
  16.  
  17. # g1 here is a DataFrame. It has a hierarchical index
  18. g1 = df1.groupby( [ "Name", "City"] ).count()
  19. g2=g1.add_suffix('_Count').reset_index()
  20.  
  21.  
  22. df2=pd.DataFrame({'count' : df1.groupby( [ "Name", "City"] ).size()}).reset_index()
  23.  
  24. df3=pd.DataFrame({'count' : df1.groupby( [ "Name", "City"] ).size()})
  25.  
  26. #%% OLS Regression with groupby
  27. M=500
  28. df=pd.DataFrame({'FID': np.random.randint(low=1, high=3, size=500),
  29.                  'MEAN':np.random.randn(M),
  30.                  'Accum_Prcp':np.random.randn(M)*300,
  31.                  'Accum_HDD':np.random.randn(M)*1000})
  32.  
  33. def group_ols(grps):
  34.     results=[]
  35.     for fid, grp in grps: # fid此处必须要有,是指index列。
  36.         y=grp.loc[:,'MEAN']
  37.         x=grp.loc[:,['Accum_Prcp', 'Accum_HDD']]
  38.         result=sm.OLS(y,x ).fit()
  39.         results.append((fid, result.summary()))
  40.     return results
  41. r_ols=group_ols(df.groupby(['FID']))
  42. r_ols
  43.  
  44. #%% group predictions
  45.  
  46. # predict the value for each group for 01-10-2016.
  47. df4=pd.DataFrame({'group':['A','A','A','A','A','A','B','B','B','C','C'],
  48.                 'date':['2016-1-2','2016-1-3','2016-1-4','2016-1-5','2016-1-6',
  49.                         '2016-1-7','2016-1-2','2016-1-3','2016-1-4','2016-1-2','2016-1-3'],
  50.                 'value':[16,15,14,17,19,20,16,13,13,16,16]})
  51.  
  52. def model(df4, delta):
  53.     y = df4[['value']].values
  54.     X = df4[['date_delta']].values
  55.     return np.squeeze(LinearRegression().fit(X, y).predict(delta))
  56.  
  57. def group_predictions(df, date):
  58.     date = pd.to_datetime(date)
  59.     df4.date = pd.to_datetime(df4.date)
  60.     day = np.timedelta64(1, 'D')
  61.     mn = df4.date.min()
  62.     df4['date_delta'] = df4.date.sub(mn).div(day)
  63.     dd = (date - mn) / day
  64.     return df.groupby('group').apply(model, delta=dd)
  65.  
  66. group_predictions(df4, '2016-1-10')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement