Advertisement
Guest User

Untitled

a guest
May 27th, 2019
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.01 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from sklearn.model_selection import train_test_split
  4. from sklearn.feature_selection import VarianceThreshold
  5. from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
  6. from sklearn.metrics import roc_auc_score
  7.  
  8.  
  9. from mlxtend.feature_selection import SequentialFeatureSelector
  10.  
  11. data = pd.read_csv("truck.csv")
  12. print(data.shape)
  13.  
  14. output = data.iloc[:,402]
  15. print(output)
  16.  
  17. input = data.drop(labels=['Send_Date','PARTITIONING','VEHICLE_ID','All_Fault_in_3_months'],axis=1)
  18.  
  19. print(input)
  20.  
  21.  
  22.  
  23.  
  24. train_features,test_features,train_labels,test_labels = train_test_split(input,output,test_size=0.2,random_state=41)
  25.  
  26. correlated_features = set()
  27. correlated_matrix = input.corr()
  28. for i in range(len(correlated_matrix.columns)):
  29. for j in range(i):
  30. if abs(correlated_matrix.iloc[i,j] > 0.8):
  31. colname = correlated_matrix.columns[i]
  32. correlated_features.add(colname)
  33.  
  34.  
  35. train_features.drop(labels=correlated_features, axis=1, inplace=True)
  36. test_features.drop(labels=correlated_features, axis=1, inplace=True)
  37.  
  38. print(train_features.shape)
  39. print(test_features.shape)
  40.  
  41. feature_Selector = SequentialFeatureSelector(RandomForestClassifier(n_jobs=-1),k_features=50,forward=True,verbose=2,scoring='roc_auc',cv=4)
  42.  
  43. features = feature_Selector.fit(np.array(train_features.fillna(0)),train_labels)
  44. filtered_features= train_features.columns[list(features.k_feature_idx_)]
  45. print(filtered_features)
  46.  
  47.  
  48. #to see how it works
  49.  
  50. classifier = RandomForestClassifier(n_estimators=100,random_state=41,max_depth=6)
  51. classifier.fit(train_features[filtered_features].fillna(0),train_labels)
  52.  
  53. train_prediction = classifier.predict_proba(train_features[filtered_features].fillna(0))
  54. print('Accuracy on training set: {}'.format(roc_auc_score(train_labels, train_prediction[:,1])))
  55.  
  56. test_prediction = classifier.predict_proba(test_features[filtered_features].fillna(0))
  57. print('Accuracy on test set: {}'.format(roc_auc_score(test_labels, test_prediction[:,1])))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement