Guest User

Untitled

a guest
Jun 14th, 2018
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.20 KB | None | 0 0
  1. import pandas as pd
  2. from sklearn.model_selection import train_test_split
  3. import matplotlib.pyplot as plt
  4. from sklearn.preprocessing import Imputer
  5.  
  6. df = pd.read_csv("bouts_out_new.csv")
  7.  
  8.  
  9.  
  10.  
  11.  
  12.  
  13.  
  14.  
  15. # PRE-PROCESSING AND CLEAN-UP
  16.  
  17. # Models can only handle numeric features so I convert the non-numeric features
  18. # into numeric using dummy features
  19.  
  20. clean_df = pd.get_dummies(df)
  21.  
  22. # This results in 39 features
  23. print(clean_df.columns)
  24. print(clean_df.shape)
  25.  
  26.  
  27. # Test both imputed values aswell as a completely clean dataset
  28.  
  29. # Split the dataset, drop result label from data
  30. clean_df.columns
  31.  
  32. target = clean_df.drop(['result_win_B', 'result_win_A', 'result_draw'], axis=1)
  33.  
  34. X_train, X_test, y_train, y_test = train_test_split(
  35. clean_df.drop(['result_win_B', 'result_win_A', 'result_draw'],
  36. axis=1), target, random_state=0)
  37.  
  38.  
  39. import sklearn.feature_selection
  40.  
  41.  
  42. selection = sklearn.feature_selection.SelectKBest(k=20)
  43. selected_features = selection.fit(X_train, y_train)
  44. indices_selected = selected_features.get_support(indices=True)
  45. colnames_selected = [clean_df.columns[i] for i in indices_selected]
  46.  
  47. X_train_selected = X_train[colnames_selected]
  48. X_test_selected = X_test[colnames_selected]
  49.  
  50. colnames_selected
Add Comment
Please, Sign In to add comment