Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from copy import deepcopy, copy
- from tqdm import tqdm
- class FeatureExpander:
- def __init__(self, logs=False, squares=False):
- self.logs = logs
- self.squares = squares
- def __add_logs(self, X, fit_mode=False):
- if self.logs:
- X = deepcopy(X)
- if fit_mode:
- self.mins = np.min(X)
- added_cols = []
- cols = copy(X.columns)
- for col in tqdm(cols, position=0):
- X[col + '_log'] = np.nan_to_num(np.log(X[col] + self.mins[col] + 2))
- added_cols.append(col + '_log')
- return X[added_cols]
- return None
- def __add_sqrs(self, X, fit_mode=False):
- if self.squares:
- X = deepcopy(X)
- added_cols = []
- cols = copy(X.columns)
- for col1 in tqdm(cols, position=0):
- for col2 in cols:
- X[col1 + '_X_' + col2] = X[col1] * X[col2]
- X[col1 + '_/_' + col2] = X[col1] / X[col2]
- added_cols.append(col1 + '_X_' + col2)
- added_cols.append(col1 + '_/_' + col2)
- return X[added_cols]
- return None
- def fit(self, X, y=None):
- return pd.concat((X, self.__add_logs(X, True), self.__add_sqrs(X, True)), axis=1)
- def transform(self, X):
- return pd.concat((X, self.__add_logs(X), self.__add_sqrs(X)), axis=1)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement