Advertisement
Guest User

Untitled

a guest
Nov 12th, 2019
86
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 1.70 KB | None | 0 0
  1. from sklearn.ensemble import RandomForestClassifier
  2. from sklearn.metrics import confusion_matrix
  3. from sklearn import preprocessing as pp
  4. from sklearn import datasets, linear_model
  5. from sklearn.metrics import classification_report, confusion_matrix
  6. from sklearn.model_selection import train_test_split
  7.  
  8. #data pre-processing
  9. le = pp.LabelEncoder()
  10. metadata['dx_type'].iloc[:] = le.fit_transform(metadata['dx_type']).astype(float)
  11. metadata['sex'].iloc[:] = le.fit_transform(metadata['sex']).astype(float)
  12. metadata['localization'].iloc[:] = le.fit_transform(metadata['localization']).astype(float)
  13.  
  14. labels = metadata['dx']
  15. features = metadata.drop(['lesion_id', 'image_id', 'dx'], axis=1)
  16.  
  17. #train + test
  18. X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
  19. X_train = X_train.fillna(-1.0)
  20. X_test = X_test.fillna(-1.0)
  21. y_train = y_train.fillna(-1.0)
  22. y_test = y_test.fillna(-1.0)
  23.  
  24. #random forest classifier
  25. model = RandomForestClassifier()
  26. model.fit(X_train, y_train)
  27. ypred = model.predict(X_test)
  28.  
  29. #save labels, ypred
  30. reverse = dict()
  31. label_map = y_test.index
  32. for l in label_map:
  33. reverse[y_test.loc[l]] = l
  34. labels_named = [reverse[number] for number in y_test]
  35. ypred_named = [reverse[number] for number in ypred]
  36.  
  37. pred_df = pd.DataFrame()
  38. pred_df["y_true"] = labels_named
  39. pred_df["y_pred"] = ypred_named
  40. pred_df.to_csv("../Figures/prat_classifications.csv")
  41.  
  42. #confusion matrix
  43. print('Confusion Matrix')
  44. print(confusion_matrix(y_test, ypred))
  45. print('\n\n\nClassification Report')
  46. print(classification_report(y_test, ypred, target_names=metadata['dx'].unique()))
  47. num_right = np.sum(y_test == ypred)
  48. print(f"\nAccuracy {num_right / ypred.shape[0]}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement