Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas
- from pandas.plotting import scatter_matrix
- from pandas.plotting import andrews_curves
- import matplotlib.pyplot as plt
- from sklearn import model_selection
- from sklearn.metrics import classification_report
- from sklearn.metrics import confusion_matrix
- from sklearn.metrics import accuracy_score
- from sklearn.linear_model import LogisticRegression
- from sklearn.tree import DecisionTreeClassifier
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
- from sklearn.naive_bayes import GaussianNB
- from sklearn.svm import SVC
- import pyodbc as cn
- import numpy as np
- import pandas as pd
- from sklearn import preprocessing
- class Program:
- #url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
- # url = "C:\\Users\\rajmu\\Desktop\\iris.data"
- #names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
- validation_size = -1
- seed = -1
- def __init__(self):
- #self.dataset = pandas.read_csv(Program.url, names=Program.names)
- #Connecting to database
- server = 'facil.database.windows.net'
- database = 'main'
- username = 'facildatabase'
- password = 'DifficultPassword69.'
- driver = '{ODBC Driver 11 for SQL Server}'
- cnxn = cn.connect('DRIVER=' + driver + ';SERVER=' + server + ';DATABASE=' + database + ';UID=' + username + ';PWD=' + password)
- print(cnxn)
- ##Importing Data to dataframes form daatabase
- nutsComplete = pd.read_sql('SELECT * from dbo.NutsComplete', con = cnxn)
- boltsComplete = pd.read_sql('SELECT * from dbo.BoltsComplete', con = cnxn)
- contracts = pd.read_sql('SELECT * from dbo.contracts', con = cnxn)
- coatings = pd.read_sql('SELECT * from dbo.coatings', con = cnxn)
- geometryCoating = pd.read_sql('SELECT * from dbo.geometryCoating', con = cnxn)
- countryCodes = pd.read_sql('SELECT * from dbo.countryCodes', con = cnxn)
- rfqPortal = pd.read_sql('SELECT * from dbo.rfqPortal', con = cnxn)
- headers = list(nutsComplete)
- nutsComplete['Has_Washer'] = nutsComplete['Has_Washer'].astype('category')
- nutsComplete['plantNumber'] = nutsComplete['plantNumber'].astype('category')
- self.dataset = nutsComplete
- def info(self):
- print(self.dataset.shape)
- print(self.dataset.describe())
- def show_sample(self, amt=10):
- print(self.dataset.head(amt))
- def show_whisker_plots(self):
- self.dataset.plot(kind="box", subplots=True, layout=(1, 4), sharex=False, sharey=False)
- plt.show()
- def show_histogram(self):
- # histograms
- self.dataset.hist()
- plt.show()
- def show_matrix(self):
- scatter_matrix(self.dataset)
- plt.show()
- def show_curves(self):
- andrews_curves(self.dataset, 'class')
- plt.show()
- def __split_validation_dataset(self):
- array = self.dataset.values
- X = array[:, 0:4]
- Y = array[:, 4]
- Program.validation_size = 0.5
- Program.seed = 7
- return model_selection.train_test_split(X, Y, test_size=Program.validation_size, random_state=Program.seed)
- def test_models(self):
- X_train, X_validation, Y_train, Y_validation = Program.__split_validation_dataset(self)
- scoring = 'accuracy'
- models = []
- models.append(('LR', LogisticRegression()))
- models.append(('LDA', LinearDiscriminantAnalysis()))
- models.append(('KNN', KNeighborsClassifier()))
- models.append(('CART', DecisionTreeClassifier()))
- models.append(('NB', GaussianNB()))
- models.append(('SVM', SVC()))
- # evaluate each model in turn
- results = []
- names = []
- for name, model in models:
- kfold = model_selection.KFold(n_splits=10, random_state=Program.seed)
- cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
- results.append(cv_results)
- names.append(name)
- msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
- print(msg)
- # visualize test data
- fig = plt.figure()
- fig.suptitle('Alg comparison')
- ax = fig.add_subplot(111)
- plt.boxplot(results)
- ax.set_xticklabels(names)
- plt.show()
- def predict_with_knn(self):
- knn = KNeighborsClassifier()
- x_train, x_validation, y_train, y_validation = Program.__split_validation_dataset(self)
- knn.fit(x_train, y_train)
- predictions = knn.predict(x_validation)
- print(accuracy_score(y_validation, predictions))
- print(confusion_matrix(y_validation, predictions))
- print(classification_report(y_validation, predictions))
- def main():
- p = Program()
- #p.test_models()
- p.predict_with_knn()
- p.show_histogram()
- if __name__ == "__main__":
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement