Advertisement
EliiO

OneHotEncoder

Jan 28th, 2023 (edited)
813
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.39 KB | Science | 0 0
  1. # Список категориальных признаков
  2. ohe_features = features_train.select_dtypes(include='object').columns.to_list()
  3.  
  4. # Список численных признаков
  5. num_features = features_train.select_dtypes(exclude='object').columns.to_list()
  6. num_features_to_remove = ['has_cr_card', 'is_active_member']
  7.  
  8. for item in num_features_to_remove:
  9.     num_features.remove(item)
  10.  
  11. # Преобразуем категориальные признаки в численные
  12. encoder_ohe = OneHotEncoder(drop='first', handle_unknown='ignore', sparse_output=False)
  13. encoder_ohe.fit(features_train[ohe_features])
  14.  
  15. for dataset in ['features_train', 'features_valid', 'features_test']:
  16.     vars()[dataset][encoder_ohe.get_feature_names_out()] = encoder_ohe.transform(vars()[dataset][ohe_features])
  17.     vars()[dataset] = vars()[dataset].drop(ohe_features, axis=1)
  18.     vars()[dataset].columns = vars()[dataset].columns.str.lower()
  19.  
  20. # Масштабирование численных признаков
  21. scaler = StandardScaler()
  22. scaler.fit(features_train[num_features])
  23.  
  24. for dataset in ['features_train', 'features_valid', 'features_test']:
  25.     vars()[dataset][num_features] = scaler.transform(vars()[dataset][num_features])
  26.  
  27. for dataset in ['features_train', 'features_valid', 'features_test']:
  28.     print('\n>>>>>>>>>> ' + dataset)
  29.     display(vars()[dataset].head())
  30.     print ()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement