Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sklearn.ensemble import RandomForestClassifier
- from sklearn.metrics import confusion_matrix
- from sklearn import preprocessing as pp
- from sklearn import datasets, linear_model
- from sklearn.metrics import classification_report, confusion_matrix
- from sklearn.model_selection import train_test_split
- #data pre-processing
- le = pp.LabelEncoder()
- metadata['dx_type'].iloc[:] = le.fit_transform(metadata['dx_type']).astype(float)
- metadata['sex'].iloc[:] = le.fit_transform(metadata['sex']).astype(float)
- metadata['localization'].iloc[:] = le.fit_transform(metadata['localization']).astype(float)
- labels = metadata['dx']
- features = metadata.drop(['lesion_id', 'image_id', 'dx'], axis=1)
- #train + test
- X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
- X_train = X_train.fillna(-1.0)
- X_test = X_test.fillna(-1.0)
- y_train = y_train.fillna(-1.0)
- y_test = y_test.fillna(-1.0)
- #random forest classifier
- model = RandomForestClassifier()
- model.fit(X_train, y_train)
- ypred = model.predict(X_test)
- #save labels, ypred
- reverse = dict()
- label_map = y_test.index
- for l in label_map:
- reverse[y_test.loc[l]] = l
- labels_named = [reverse[number] for number in y_test]
- ypred_named = [reverse[number] for number in ypred]
- pred_df = pd.DataFrame()
- pred_df["y_true"] = labels_named
- pred_df["y_pred"] = ypred_named
- pred_df.to_csv("../Figures/prat_classifications.csv")
- #confusion matrix
- print('Confusion Matrix')
- print(confusion_matrix(y_test, ypred))
- print('\n\n\nClassification Report')
- print(classification_report(y_test, ypred, target_names=metadata['dx'].unique()))
- num_right = np.sum(y_test == ypred)
- print(f"\nAccuracy {num_right / ypred.shape[0]}")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement