Advertisement
sacr1ficerq

OneHotEncoder code for TwoHotGirls

Apr 8th, 2022
37
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.43 KB | None | 0 0
  1. from sklearn.preprocessing import OneHotEncoder
  2.  
  3. hot_enc = OneHotEncoder(sparse=False)
  4. categorical_train_hot = data_train[:, categorical_features]
  5. hot_enc.fit(categorical_train_hot)
  6. print(hot_enc.categories_)
  7. categorical_train_hot = hot_enc.transform(categorical_train_hot)
  8. categorical_train_hot = np.column_stack((np.ones(len(categorical_train_hot)), categorical_train_hot))
  9. categorical_train_hot = np.delete(categorical_train_hot, [5, 12, 20], axis=1)
  10. print("Categorical train shape: " + str(categorical_train_hot.shape))
  11.  
  12. categorical_weights_hot = calculate_approximate_coefficients(categorical_train_hot, y_v)
  13. print("shape of weights: " + str(categorical_weights_hot.shape))
  14. print("Categorical weights: ", categorical_weights_hot)
  15.  
  16. categorical_test = data_test[:, categorical_features]
  17. categorical_test_hot = np.array(hot_enc.transform(categorical_test))
  18. categorical_test_hot = np.delete(categorical_test_hot, [4, 11, 19], axis=1)
  19. print("Categorical test shape: " + str(categorical_test_hot.shape))
  20.  
  21.  
  22. def apply_categorical_coefficients_hot(input_matrix):
  23.     return apply_coefficients(input_matrix, categorical_weights_hot)
  24.  
  25.  
  26. categorical_error = calculate_quadratic_error(apply_categorical_coefficients_hot, categorical_test_hot, y_test)
  27. print("Categorical error with hot encoding:  " + str(round(categorical_error/10**6, 3)) + " million")
  28. # bruh moment. Ошибка 1 707 миллионов на тестовых данных:-(
  29. # Если не брать экспоненту, ошибка получается не такой большой (всего 31 миллион)
  30. # Проверим на обучающей выборке
  31.  
  32. categorical_error_train_hot = calculate_quadratic_error(apply_categorical_coefficients_hot, np.delete(categorical_train_hot, 0, axis=1), y_v)
  33. categorical_error_train = calculate_quadratic_error(apply_categorical_coefficients, np.delete(categorical_train, 0, axis=1), y_v)
  34.  
  35. print("Error on training data (hot): " + str(round(categorical_error_train_hot/10**6, 3)) + " million")
  36. print("Error on training data (ordinal): " + str(round(categorical_error_train/10**6, 3)) + " million")
  37. print()
  38. # Разница почти такая же. Видимо принимая больше переменных, модель как-то очень странно распределяет между ними
  39. # ответственность за цену, что в итоге приводит к огромной ошибке
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement