Advertisement
Guest User

Untitled

a guest
Nov 20th, 2017
64
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.23 KB | None | 0 0
  1. from copy import deepcopy
  2. import numpy as np
  3. import pandas as pd
  4. #
  5. class SparkPDModelTransformer:
  6.  
  7. def __init__(self, years=3):
  8. self.years = years
  9.  
  10. def build_columns(self, original):
  11. result = []
  12. for i in range(self.years):
  13. result += [l + '_' + str(i) for l in original]
  14.  
  15. for i in range(self.years - 1):
  16. result += ['dynamical_' + l + '_' + str(i) for l in original]
  17. return result
  18.  
  19. def _fit(self, X):
  20. yearly_frames = X[0]
  21. end_year_df = X[1]
  22. year_list = list(yearly_frames.keys())
  23. start_year = np.max(year_list) - self.years + 1
  24. result_df = None
  25. result_columns = self.build_columns(yearly_frames[list(yearly_frames.keys())[0]].columns)
  26. while start_year >= np.min(year_list):
  27. good_inns = end_year_df.loc[end_year_df['end_year'] == start_year + self.years]
  28.  
  29. absolute_df = pd.concat(
  30. [yearly_frames[year] for year in range(start_year, start_year + self.years)] + [good_inns],
  31. axis=1, join='inner')
  32. dynamical_df = pd.concat(
  33. [(yearly_frames[year + 1] - yearly_frames[year]) / (yearly_frames[year].abs() + 1e-6)
  34. for year in range(start_year, start_year + self.years - 1)] + [good_inns],
  35. axis=1, join='inner')
  36. joined_df = pd.concat(
  37. [absolute_df.drop('end_year', axis=1), dynamical_df.drop('end_year', axis=1)],
  38. axis=1, join='inner')
  39. joined_df.columns = result_columns
  40.  
  41. if result_df is None:
  42. result_df = joined_df
  43. else:
  44. result_df = result_df.append(joined_df, ignore_index=True)
  45. start_year -= 1
  46.  
  47. columns_all = result_df.columns[:-1]
  48. return result_df, None
  49.  
  50. def fit(self, X, y=None):
  51. '''
  52. Args:
  53. X : [ dict({'year':'DataFrame'}) , DataFrame(index='INN', ['end_year']), categorical_columns ]
  54. Returns :
  55. [ DataFrame{[index: INN, transformed features]} , categorical_features ]
  56. '''
  57.  
  58. return self._fit(X)
  59.  
  60. def transform(self, X):
  61. return self._fit(X)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement