Guest User

Untitled

a guest
Dec 16th, 2018
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.94 KB | None | 0 0
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3.  
  4. # In[1]:
  5.  
  6.  
  7. import numpy as np
  8. import pandas as pd
  9. import sklearn as sk
  10. import matplotlib.pyplot as plt
  11. import math
  12.  
  13. # Initialize values and load data
  14. data_path = r'F:\AIUB\DM\Project Supervised\glass_data.csv'
  15. data = pd.read_csv(data_path, index_col='ID')
  16.  
  17. features = ['RI', 'NA2O', 'MGO', 'AL2O3', 'SIO2', 'K2O', 'CAO', 'BAO', 'FE2O3']
  18. oxyde_columns = features[1:]
  19. class_col = 'TYPE'
  20.  
  21. # keys:
  22. # FP = Float Processed
  23. # NFP = Non Float Processed
  24. labels = ['Building Windows FP', 'Building Windows NFP', 'Vehicle Windows FP',
  25. 'Vehicle Windows NFP', 'Containers', 'Tableware','Headlamps']
  26.  
  27.  
  28. # In[2]:
  29.  
  30.  
  31. # Replace the TYPE column values from forigen keys to their actual names
  32.  
  33. def getLabelNameFromReference(ref):
  34. global labels
  35. if ref < 1 or ref > 7:
  36. raise ValueError('Invalid reference number for Glass Type column: {}'.format(ref))
  37.  
  38. return labels[ref-1]
  39.  
  40. data['TYPE'] = data['TYPE'].map(getLabelNameFromReference)
  41.  
  42.  
  43. # In[3]:
  44.  
  45.  
  46. # All instances have very similar value for Refractive index (RI)
  47. # By scaling the column by 1000 we
  48.  
  49. data['RI']*= 1000
  50.  
  51.  
  52. # In[4]:
  53.  
  54.  
  55. # Train the classifier
  56.  
  57. from sklearn.neighbors import KNeighborsClassifier
  58.  
  59. k_neighbours= 3
  60. predictor = KNeighborsClassifier(n_neighbors= k_neighbours, metric= 'manhattan', weights='distance')
  61. # predictor.fit(data[features], data[class_col])
  62.  
  63.  
  64. # In[5]:
  65.  
  66.  
  67. # make predictions
  68.  
  69. # predictor.predict(data[features][:3])
  70.  
  71.  
  72. # ## Classifier Evaluation:
  73. #
  74.  
  75. # In[6]:
  76.  
  77.  
  78. # Do k fold cross validation, ,many times
  79. from sklearn.model_selection import cross_val_score
  80.  
  81. k= 8
  82. repeat = 5
  83.  
  84. print('{} fold cross validation:'.format(k))
  85.  
  86. grand_sum = 0
  87. for _ in range(repeat):
  88. data = data.sample(frac= 1).reset_index(drop= True)
  89.  
  90. scores = cross_val_score(predictor, data[features], data[class_col], scoring= 'accuracy', cv= k, n_jobs= -1)
  91. grand_sum += sum(scores)
  92.  
  93. print('Results: -----------------')
  94. print('Scores:', ', '.join([str(round(x, 3)) for x in scores]))
  95. print('Average:', round(np.average(scores), 4))
  96. print()
  97.  
  98. grand_avg_accuracy= grand_sum/(k*repeat)
  99.  
  100. print('Grand Average:', round(grand_avg_accuracy, 3))
  101.  
  102.  
  103. # In[7]:
  104.  
  105.  
  106. # Helper functoins
  107. def getConfidenceInterval(accuracy):
  108. """ accuracy = 0-1 """
  109. zcl= 1.64 # z value for 0.9 confidence level
  110. std_error = math.sqrt((accuracy*(1-accuracy))/len(data))
  111.  
  112. return accuracy-std_error*zcl, accuracy+std_error*zcl
  113.  
  114. lower, upper = getConfidenceInterval(grand_avg_accuracy)
  115. lower = round(lower, 3)
  116. upper = round(upper, 3)
  117. print('Predictive accuricy lies within the range: [{}, {}] (90% confidence)'.format(lower, upper))
  118.  
  119.  
  120. # In[8]:
  121.  
  122.  
  123. # Construct a confusion matrix usinghte entire data set
  124.  
  125. # from sklearn.metrics import confusion_matrix
  126.  
  127. # confusion_matrix = confusion_matrix(data[class_col], predictor.predict(data[features]), labels)
  128. # confusion_matrix = pd.DataFrame(confusion_matrix, columns= labels, )
  129. # confusion_matrix.insert(0, '', labels)
  130.  
  131. # confusion_matrix
  132.  
  133.  
  134. # In[ ]:
Add Comment
Please, Sign In to add comment