Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def apply_transformation(dataframe, train_target):
- """
- Apply transformation on features and calculate the correlation with the target
- :param dataframe: pandas dataframe
- :param train_target: pandas series
- :return: pandas dataframe contains the correlation between each feature and the target for different
- applied transformations
- """
- # remove negative values and zeros to avoid math problem
- for col_i in dataframe.columns:
- dataframe[col_i] += abs(min(dataframe[col_i])) + 1
- # 1 means the original values. If the type is number, it means x^number
- transformation_type = [1, "log", 0.25, 0.5, 0.75, 2, 3, 4]
- correlation_dataframe = pd.DataFrame(columns=[str(x) for x in transformation_type])
- for trans_i in transformation_type:
- if trans_i == "log":
- dataframe_trans = np.log(dataframe)
- else:
- dataframe_trans = (dataframe) ** trans_i
- correlation_dataframe[str(trans_i)] = [round(np.corrcoef([dataframe_trans[x], train_target])[0][1], 2)
- for x in dataframe.columns]
- return correlation_dataframe
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement