Advertisement
Guest User

Untitled

a guest
Mar 23rd, 2018
163
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.97 KB | None | 0 0
  1. import pandas
  2. from pandas.plotting import scatter_matrix
  3. from pandas.plotting import andrews_curves
  4. import matplotlib.pyplot as plt
  5. from sklearn import model_selection
  6. from sklearn.metrics import classification_report
  7. from sklearn.metrics import confusion_matrix
  8. from sklearn.metrics import accuracy_score
  9. from sklearn.linear_model import LogisticRegression
  10. from sklearn.tree import DecisionTreeClassifier
  11. from sklearn.neighbors import KNeighborsClassifier
  12. from sklearn.discriminant_analysis import LinearDiscriminantAnalysis
  13. from sklearn.naive_bayes import GaussianNB
  14. from sklearn.svm import SVC
  15. import pyodbc as cn
  16. import numpy as np
  17. import pandas as pd
  18. from sklearn import preprocessing
  19.  
  20.  
  21. class Program:
  22.  
  23.  
  24.     #url = "https://archive.ics.uci.edu/ml/machine-learning-databases/iris/iris.data"
  25.     # url = "C:\\Users\\rajmu\\Desktop\\iris.data"
  26.     #names = ['sepal-length', 'sepal-width', 'petal-length', 'petal-width', 'class']
  27.     validation_size = -1
  28.     seed = -1
  29.  
  30.     def __init__(self):
  31.         #self.dataset = pandas.read_csv(Program.url, names=Program.names)
  32.  
  33.         #Connecting to database
  34.         server = 'facil.database.windows.net'
  35.         database = 'main'
  36.         username = 'facildatabase'
  37.         password = 'DifficultPassword69.'
  38.         driver = '{ODBC Driver 11 for SQL Server}'
  39.         cnxn = cn.connect('DRIVER=' + driver + ';SERVER=' + server + ';DATABASE=' + database + ';UID=' + username + ';PWD=' + password)
  40.         print(cnxn)
  41.  
  42.         ##Importing Data to dataframes form daatabase
  43.         nutsComplete = pd.read_sql('SELECT * from dbo.NutsComplete', con = cnxn)
  44.         boltsComplete = pd.read_sql('SELECT * from dbo.BoltsComplete', con = cnxn)
  45.         contracts = pd.read_sql('SELECT * from dbo.contracts', con = cnxn)
  46.         coatings = pd.read_sql('SELECT * from dbo.coatings', con = cnxn)
  47.         geometryCoating = pd.read_sql('SELECT * from dbo.geometryCoating', con = cnxn)
  48.         countryCodes = pd.read_sql('SELECT * from dbo.countryCodes', con = cnxn)
  49.         rfqPortal = pd.read_sql('SELECT * from dbo.rfqPortal', con = cnxn)
  50.  
  51.         headers = list(nutsComplete)
  52.  
  53.         nutsComplete['Has_Washer'] = nutsComplete['Has_Washer'].astype('category')
  54.         nutsComplete['plantNumber'] = nutsComplete['plantNumber'].astype('category')
  55.  
  56.         self.dataset = nutsComplete
  57.  
  58.     def info(self):
  59.         print(self.dataset.shape)
  60.         print(self.dataset.describe())
  61.  
  62.     def show_sample(self, amt=10):
  63.         print(self.dataset.head(amt))
  64.  
  65.     def show_whisker_plots(self):
  66.         self.dataset.plot(kind="box", subplots=True, layout=(1, 4), sharex=False, sharey=False)
  67.         plt.show()
  68.  
  69.     def show_histogram(self):
  70.         # histograms
  71.         self.dataset.hist()
  72.         plt.show()
  73.  
  74.     def show_matrix(self):
  75.         scatter_matrix(self.dataset)
  76.         plt.show()
  77.  
  78.     def show_curves(self):
  79.         andrews_curves(self.dataset, 'class')
  80.         plt.show()
  81.  
  82.     def __split_validation_dataset(self):
  83.         array = self.dataset.values
  84.         X = array[:, 0:4]
  85.         Y = array[:, 4]
  86.         Program.validation_size = 0.5
  87.         Program.seed = 7
  88.         return model_selection.train_test_split(X, Y, test_size=Program.validation_size, random_state=Program.seed)
  89.  
  90.     def test_models(self):
  91.         X_train, X_validation, Y_train, Y_validation = Program.__split_validation_dataset(self)
  92.         scoring = 'accuracy'
  93.         models = []
  94.         models.append(('LR', LogisticRegression()))
  95.         models.append(('LDA', LinearDiscriminantAnalysis()))
  96.         models.append(('KNN', KNeighborsClassifier()))
  97.         models.append(('CART', DecisionTreeClassifier()))
  98.         models.append(('NB', GaussianNB()))
  99.         models.append(('SVM', SVC()))
  100.         # evaluate each model in turn
  101.         results = []
  102.         names = []
  103.         for name, model in models:
  104.             kfold = model_selection.KFold(n_splits=10, random_state=Program.seed)
  105.             cv_results = model_selection.cross_val_score(model, X_train, Y_train, cv=kfold, scoring=scoring)
  106.             results.append(cv_results)
  107.             names.append(name)
  108.             msg = "%s: %f (%f)" % (name, cv_results.mean(), cv_results.std())
  109.             print(msg)
  110.         # visualize test data
  111.         fig = plt.figure()
  112.         fig.suptitle('Alg comparison')
  113.         ax = fig.add_subplot(111)
  114.         plt.boxplot(results)
  115.         ax.set_xticklabels(names)
  116.         plt.show()
  117.  
  118.     def predict_with_knn(self):
  119.         knn = KNeighborsClassifier()
  120.         x_train, x_validation, y_train, y_validation = Program.__split_validation_dataset(self)
  121.         knn.fit(x_train, y_train)
  122.         predictions = knn.predict(x_validation)
  123.         print(accuracy_score(y_validation, predictions))
  124.         print(confusion_matrix(y_validation, predictions))
  125.         print(classification_report(y_validation, predictions))
  126.  
  127.  
  128. def main():
  129.     p = Program()
  130.     #p.test_models()
  131.     p.predict_with_knn()
  132.     p.show_histogram()
  133.  
  134.  
  135. if __name__ == "__main__":
  136.     main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement