Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # Список категориальных признаков
- ohe_features = features_train.select_dtypes(include='object').columns.to_list()
- # Список численных признаков
- num_features = features_train.select_dtypes(exclude='object').columns.to_list()
- num_features_to_remove = ['has_cr_card', 'is_active_member']
- for item in num_features_to_remove:
- num_features.remove(item)
- # Преобразуем категориальные признаки в численные
- encoder_ohe = OneHotEncoder(drop='first', handle_unknown='ignore', sparse_output=False)
- encoder_ohe.fit(features_train[ohe_features])
- for dataset in ['features_train', 'features_valid', 'features_test']:
- vars()[dataset][encoder_ohe.get_feature_names_out()] = encoder_ohe.transform(vars()[dataset][ohe_features])
- vars()[dataset] = vars()[dataset].drop(ohe_features, axis=1)
- vars()[dataset].columns = vars()[dataset].columns.str.lower()
- # Масштабирование численных признаков
- scaler = StandardScaler()
- scaler.fit(features_train[num_features])
- for dataset in ['features_train', 'features_valid', 'features_test']:
- vars()[dataset][num_features] = scaler.transform(vars()[dataset][num_features])
- for dataset in ['features_train', 'features_valid', 'features_test']:
- print('\n>>>>>>>>>> ' + dataset)
- display(vars()[dataset].head())
- print ()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement