Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from copy import deepcopy
- import numpy as np
- import pandas as pd
- #
- class SparkPDModelTransformer:
- def __init__(self, years=3):
- self.years = years
- def build_columns(self, original):
- result = []
- for i in range(self.years):
- result += [l + '_' + str(i) for l in original]
- for i in range(self.years - 1):
- result += ['dynamical_' + l + '_' + str(i) for l in original]
- return result
- def _fit(self, X):
- yearly_frames = X[0]
- end_year_df = X[1]
- year_list = list(yearly_frames.keys())
- start_year = np.max(year_list) - self.years + 1
- result_df = None
- result_columns = self.build_columns(yearly_frames[list(yearly_frames.keys())[0]].columns)
- while start_year >= np.min(year_list):
- good_inns = end_year_df.loc[end_year_df['end_year'] == start_year + self.years]
- absolute_df = pd.concat(
- [yearly_frames[year] for year in range(start_year, start_year + self.years)] + [good_inns],
- axis=1, join='inner')
- dynamical_df = pd.concat(
- [(yearly_frames[year + 1] - yearly_frames[year]) / (yearly_frames[year].abs() + 1e-6)
- for year in range(start_year, start_year + self.years - 1)] + [good_inns],
- axis=1, join='inner')
- joined_df = pd.concat(
- [absolute_df.drop('end_year', axis=1), dynamical_df.drop('end_year', axis=1)],
- axis=1, join='inner')
- joined_df.columns = result_columns
- if result_df is None:
- result_df = joined_df
- else:
- result_df = result_df.append(joined_df, ignore_index=True)
- start_year -= 1
- columns_all = result_df.columns[:-1]
- return result_df, None
- def fit(self, X, y=None):
- '''
- Args:
- X : [ dict({'year':'DataFrame'}) , DataFrame(index='INN', ['end_year']), categorical_columns ]
- Returns :
- [ DataFrame{[index: INN, transformed features]} , categorical_features ]
- '''
- return self._fit(X)
- def transform(self, X):
- return self._fit(X)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement