Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.feature_selection import VarianceThreshold
- from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
- from sklearn.metrics import roc_auc_score
- from mlxtend.feature_selection import SequentialFeatureSelector
- data = pd.read_csv("truck.csv")
- print(data.shape)
- output = data.iloc[:,402]
- print(output)
- input = data.drop(labels=['Send_Date','PARTITIONING','VEHICLE_ID','All_Fault_in_3_months'],axis=1)
- print(input)
- train_features,test_features,train_labels,test_labels = train_test_split(input,output,test_size=0.2,random_state=41)
- correlated_features = set()
- correlated_matrix = input.corr()
- for i in range(len(correlated_matrix.columns)):
- for j in range(i):
- if abs(correlated_matrix.iloc[i,j] > 0.8):
- colname = correlated_matrix.columns[i]
- correlated_features.add(colname)
- train_features.drop(labels=correlated_features, axis=1, inplace=True)
- test_features.drop(labels=correlated_features, axis=1, inplace=True)
- print(train_features.shape)
- print(test_features.shape)
- feature_Selector = SequentialFeatureSelector(RandomForestClassifier(n_jobs=-1),k_features=50,forward=True,verbose=2,scoring='roc_auc',cv=4)
- features = feature_Selector.fit(np.array(train_features.fillna(0)),train_labels)
- filtered_features= train_features.columns[list(features.k_feature_idx_)]
- print(filtered_features)
- #to see how it works
- classifier = RandomForestClassifier(n_estimators=100,random_state=41,max_depth=6)
- classifier.fit(train_features[filtered_features].fillna(0),train_labels)
- train_prediction = classifier.predict_proba(train_features[filtered_features].fillna(0))
- print('Accuracy on training set: {}'.format(roc_auc_score(train_labels, train_prediction[:,1])))
- test_prediction = classifier.predict_proba(test_features[filtered_features].fillna(0))
- print('Accuracy on test set: {}'.format(roc_auc_score(test_labels, test_prediction[:,1])))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement