Advertisement
Ritam_C

ds3l4q3corrected

Oct 1st, 2021
162
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.94 KB | None | 0 0
  1. '''Name: Ritam Chakraborty
  2.   Roll: B20127
  3.   Phone: 7439257709'''
  4.  
  5. # import libraries
  6. import pandas as pd
  7. import numpy as np
  8. from numpy.linalg import det, inv
  9. # for computing confusion matrix and accuracy score
  10. from sklearn.metrics import confusion_matrix, accuracy_score
  11. # for making comparator to sort the distances and corresponding classes based on distances
  12.  
  13. def prob(x, inv_cov_mat, mu_mat, Pci):
  14.     diff = x-mu_mat
  15.     diff_T = diff.T
  16.     dotprod = np.dot(diff.T, inv_cov_mat)
  17.     prod = np.dot(dotprod, diff)
  18.     return np.log(Pci)+0.5*np.log(det(inv_cov_mat))-11.5*np.log(2*np.pi)-0.5*prod
  19.  
  20. df_train = pd.read_csv("SteelPlateFaults-train.csv")
  21. df_test = pd.read_csv("SteelPlateFaults-test.csv")
  22. # remove the following attributes since they make the covariance matrix singular
  23. delete_items = ["TypeOfSteel_A300", "TypeOfSteel_A400", "X_Minimum", "Y_Minimum"]
  24. for items in delete_items:
  25.     df_train.__delitem__(items)
  26.     df_test.__delitem__(items)
  27.  
  28. # separate the classes from the training dataset
  29. df_class1 = df_train[df_train["Class"] == 1].copy()
  30. df_class1.__delitem__("Class")
  31. df_class0 = df_train[df_train["Class"] == 0].copy()
  32. df_class0.__delitem__("Class")
  33.  
  34. # compute mean vectors from both classes
  35. mu1 = df_class1.mean().to_numpy()
  36. mu0 = df_class0.mean().to_numpy()
  37. inv1 = inv(df_class1.cov())
  38. inv0 = inv(df_class0.cov())
  39. pd.DataFrame(mu0).to_csv("mean0.csv")
  40. pd.DataFrame(mu1).to_csv("mean1.csv")
  41. df_class1.cov().to_csv("cov1")
  42. df_class0.cov().to_csv("cov0")
  43.  
  44. # predict based on bayes classifier
  45. pred = []
  46. for ind in df_test.index:
  47.     p1 = prob(df_test.iloc[ind, :23].to_numpy(), inv1, mu1, 509/(509+273))
  48.     p0 = prob(df_test.iloc[ind, :23].to_numpy(), inv0, mu0, 273/(509+273))
  49.     if p1 > p0:
  50.         pred.append(1)
  51.     else:
  52.         pred.append(0)
  53.  
  54. #print the confusion matrix and accuracy scores
  55. print("Confusion matrix:")
  56. print(confusion_matrix(df_test["Class"], pred))
  57. print("Accuracy score:")
  58. print(accuracy_score(df_test["Class"], pred))
  59.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement