Advertisement
Guest User

Untitled

a guest
Jun 24th, 2019
84
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.17 KB | None | 0 0
  1. import pandas as pd
  2. import numpy as np
  3. from sklearn.model_selection import GridSearchCV
  4. from sklearn.neighbors import KNeighborsClassifier
  5. from sklearn import model_selection
  6. from sklearn.model_selection import train_test_split
  7. from sklearn.ensemble import RandomForestClassifier
  8. from sklearn.linear_model import LogisticRegression
  9. from sklearn.naive_bayes import GaussianNB,BernoulliNB,MultinomialNB
  10. from sklearn.svm import SVC
  11. from sklearn.tree import DecisionTreeClassifier
  12. from sklearn.neural_network import MLPClassifier
  13. from sklearn.ensemble import AdaBoostClassifier,GradientBoostingClassifier,BaggingClassifier
  14. from sklearn.ensemble import VotingClassifier
  15. from sklearn.metrics import classification_report,accuracy_score
  16. from sklearn import metrics
  17.  
  18.  
  19.  
  20.  
  21.  
  22. df = pd.read_csv('/Users/arbade/Desktop/Datasets/realData.csv',encoding="utf-8")
  23.  
  24.  
  25. X = df.drop(columns = ['mobileOp'])
  26. y = df['mobileOp']
  27.  
  28. seed = 42
  29. num_trees = 25
  30. kfold = model_selection.KFold(n_splits=10, random_state=seed)
  31.  
  32.  
  33. X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.20,random_state = seed)
  34.  
  35.  
  36.  
  37. knn = KNeighborsClassifier(n_neighbors=3,metric='minkowski')
  38.  
  39.  
  40.  
  41. params_knn = {"n_neighbors": np.arange(1, 50)}
  42.  
  43. knn_gs = GridSearchCV(knn, params_knn, cv=kfold,iid=False)
  44. knn_gs.fit(X_train, y_train)
  45. knn.fit(X_train, y_train)
  46. knn_best = knn_gs.best_estimator_
  47. prediction = knn.predict(X_test)
  48. print(prediction)
  49.  
  50.  
  51. rf = RandomForestClassifier(n_estimators=num_trees, random_state=seed,max_features="sqrt")
  52.  
  53. params_rf = {"n_estimators": [50,100]}
  54.  
  55. rf_gs = GridSearchCV(rf, params_rf, cv=kfold)
  56. rf_gs.fit(X_train, y_train)
  57.  
  58.  
  59. rf_best = rf_gs.best_estimator_
  60.  
  61. print(rf_gs.best_params_)
  62.  
  63.  
  64.  
  65.  
  66. adaBoost = AdaBoostClassifier(n_estimators=num_trees, random_state=seed)
  67.  
  68. params_adaBoost = {"n_estimators": [50,100]}
  69.  
  70. adaBoost_gs = GridSearchCV(adaBoost, params_adaBoost, cv=kfold,iid=False)
  71. adaBoost_gs.fit(X_train, y_train)
  72.  
  73.  
  74. adaBoost_best = adaBoost_gs.best_estimator_
  75.  
  76.  
  77. adaBoost.fit(X_train,y_train)
  78.  
  79.  
  80.  
  81. grBoost=GradientBoostingClassifier(n_estimators=num_trees, random_state=seed)
  82. params_grBoost = {"n_estimators": [50,100]}
  83.  
  84. grBoost_gs = GridSearchCV(grBoost, params_grBoost, cv=kfold,iid=False)
  85. grBoost_gs.fit(X_train, y_train)
  86.  
  87.  
  88. grBoost_best = grBoost_gs.best_estimator_
  89.  
  90. grBoost.fit(X_train,y_train)
  91.  
  92.  
  93.  
  94. mlp = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(15,15), random_state=seed)
  95. mlp.fit(X_train,y_train)
  96. dtc=DecisionTreeClassifier(max_depth=10,random_state=seed,criterion='entropy')
  97. dtc.fit(X_train,y_train)
  98. svc = SVC(gamma='scale', kernel='rbf', probability=True,random_state=seed)
  99. svc.fit(X_train,y_train)
  100. nb=MultinomialNB()
  101. nb.fit(X_train,y_train)
  102. log_reg = LogisticRegression(penalty='l1',multi_class='multinomial',solver='saga',max_iter=100,C=1e5,random_state=seed,dual=False,intercept_scaling=1,verbose=0,n_jobs=3,class_weight=None)
  103.  
  104.  
  105.  
  106.  
  107. print("KNN Classifier: {}".format(knn_best.score(X_test, y_test)))
  108.  
  109. print("Random Forest: {}".format(rf_best.score(X_test, y_test)))
  110.  
  111. print("Logistic Regression: {}".format(log_reg.score(X_test, y_test)))
  112.  
  113. print("SVC Classifier: {}".format(svc.score(X_test, y_test)))
  114.  
  115. print("Naive-Bayes Classifier: {}".format(nb.score(X_test, y_test)))
  116.  
  117. print("Desicion-Tree: {}".format(dtc.score(X_test, y_test)))
  118.  
  119. print("Multi-Layer Perceptron: {}".format(mlp.score(X_test, y_test)))
  120.  
  121. print("AdaBoost: {}".format(adaBoost_best.score(X_test, y_test)))
  122.  
  123. print("GradientBoosting Classifier: {}".format(grBoost_best.score(X_test, y_test)))
  124.  
  125.  
  126.  
  127.  
  128.  
  129. estimators=[("knn", knn_best), ("rf", rf_best), ("log_reg", log_reg),("nb",nb),("svc",svc),("dtc",dtc),("mlp",mlp),("adaBoost",adaBoost_best),('grBoost',grBoost_best)]
  130.  
  131. ensemble = VotingClassifier(estimators, voting="hard")
  132.  
  133.  
  134.  
  135. ensemble.fit(X_train, y_train)
  136.  
  137. a=ensemble.score(X_test, y_test)
  138. ensPred=ensemble.predict(X_test)
  139. results = model_selection.cross_val_score(ensemble,X,y,cv=kfold)
  140.  
  141.  
  142. print("Accuracy: %0.2f (+/- %0.2f)" % (results.mean(), results.std()))
  143. print("std: %",results.std()*100)
  144.  
  145.  
  146. accuracy_score(y_test,ensPred)
  147.  
  148. #print('Ensemble Score: ' + repr(a) + '%')
  149. print('Average Score:'+repr(results.mean()*100)+'%')
  150. print(classification_report(y_test,ensPred))
  151. #print(accuracy_score(y_test,ensPred))
  152. #print("Accuracy:",metrics.accuracy_score(y_test, ensPred))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement