Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import pandas as pd
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.model_selection import train_test_split
- dataset=np.loadtxt('DatasetAfterClustering.csv',delimiter=',',skiprows=1)
- features=dataset[:,0:3]
- labels=dataset[:,3]
- tr_features,ts_features,tr_labels,ts_labels=train_test_split(features,labels,test_size=0.5,random_state=42)
- model=RandomForestClassifier(n_estimators=10, criterion="gini", max_depth=4, min_samples_split=2, min_samples_leaf=1,
- min_weight_fraction_leaf=0.0, max_features="sqrt", max_leaf_nodes=None, min_impurity_decrease=0.0,
- min_impurity_split=None, bootstrap=True, oob_score=False, n_jobs=-1, verbose=0)
- model.fit(tr_features,tr_labels)
- y=model.predict(ts_features)
- pred=pd.DataFrame(y)
- act=pd.DataFrame(ts_labels)
- final=pd.DataFrame(pd.concat([pred,act],axis=1))
- print final.head()
- count=0.0
- for i in range(len(y)):
- if(y[i]==ts_labels[i]):
- count=count+1
- else:
- count=count+0
- Accuracy=model.score(ts_features,ts_labels)
- print (count/(ts_labels.shape[0]))*100 #99.8317712088
- print model.feature_importances_ #[ 0.92408555 0.02874635 0.0471681 ]
Add Comment
Please, Sign In to add comment