Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #Kevin Tran
- #3-21-18
- #Problem 1 sklearn svm and class
- import numpy as np
- from sklearn import preprocessing, cross_validation, neighbors, svm
- import pandas as pd
- import random
- from svm import Support_Vector_Machine
- from sklearn.metrics import classification_report
- from sklearn.metrics import confusion_matrix
- df = pd.read_csv('cancer.txt')
- df.replace('?', -99999, inplace=True)
- df.drop(df.columns[0], 1, inplace=True)
- df.drop(df.columns[[1,2,4,5,6,7,8]], axis=1, inplace=True)
- X = np.array(df.drop([df.columns[2]], 1))
- y = np.array(df[df.columns[2]])
- X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, y, test_size = 0.4)
- clf = svm.SVC()
- clf.fit(X_train, y_train)
- confidence = clf.score(X_test, y_test)
- print("Predicted Table of X_Test:")
- print(clf.predict(X_test), "\n")
- print("SKLEARN confidence score: ", confidence, "\n")
- print("Begin in class SVM\n")
- pred = clf.predict(X_test)
- print(confusion_matrix(y_test, pred))
- print(classification_report(y_test, pred))
- #
- # Email said to use example as the test
- #
- df = pd.read_csv("cancer.txt")
- df.replace('?',-99999, inplace=True)
- df.drop(df.columns[0], 1, inplace=True)
- full_data = df.astype(float).values.tolist()
- random.shuffle(full_data)
- #test_data = df['ct','bn']
- test_size = 0.4
- train_set = {2:[], 4:[]}
- test_set = {2:[], 4:[]}
- train_data = full_data[:-int(test_size*len(full_data))]
- test_data = full_data[-int(test_size*len(full_data)):]
- data_dict = train_set
- for i in train_data:
- train_set[i[-1]].append(i[:-1])
- for i in test_data:
- test_set[i[-1]].append(i[:-1])
- svm = Support_Vector_Machine()
- svm.fit(data=data_dict)
- predict_us = [[0,10],
- [1,3],
- [3,4],
- [3,5],
- [5,5],
- [5,6],
- [6,-5],
- [5,8]]
- for p in test_set:
- svm.predict(p)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement