Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- '''Name: Ritam Chakraborty
- Roll: B20127
- Phone: 7439257709'''
- # import libraries
- import pandas as pd
- import numpy as np
- from numpy.linalg import det, inv
- # for computing confusion matrix and accuracy score
- from sklearn.metrics import confusion_matrix, accuracy_score
- # for making comparator to sort the distances and corresponding classes based on distances
- def prob(x, inv_cov_mat, mu_mat, Pci):
- diff = x-mu_mat
- diff_T = diff.T
- dotprod = np.dot(diff.T, inv_cov_mat)
- prod = np.dot(dotprod, diff)
- return np.log(Pci)+0.5*np.log(det(inv_cov_mat))-11.5*np.log(2*np.pi)-0.5*prod
- df_train = pd.read_csv("SteelPlateFaults-train.csv")
- df_test = pd.read_csv("SteelPlateFaults-test.csv")
- # remove the following attributes since they make the covariance matrix singular
- delete_items = ["TypeOfSteel_A300", "TypeOfSteel_A400", "X_Minimum", "Y_Minimum"]
- for items in delete_items:
- df_train.__delitem__(items)
- df_test.__delitem__(items)
- # separate the classes from the training dataset
- df_class1 = df_train[df_train["Class"] == 1].copy()
- df_class1.__delitem__("Class")
- df_class0 = df_train[df_train["Class"] == 0].copy()
- df_class0.__delitem__("Class")
- # compute mean vectors from both classes
- mu1 = df_class1.mean().to_numpy()
- mu0 = df_class0.mean().to_numpy()
- inv1 = inv(df_class1.cov())
- inv0 = inv(df_class0.cov())
- pd.DataFrame(mu0).to_csv("mean0.csv")
- pd.DataFrame(mu1).to_csv("mean1.csv")
- df_class1.cov().to_csv("cov1")
- df_class0.cov().to_csv("cov0")
- # predict based on bayes classifier
- pred = []
- for ind in df_test.index:
- p1 = prob(df_test.iloc[ind, :23].to_numpy(), inv1, mu1, 509/(509+273))
- p0 = prob(df_test.iloc[ind, :23].to_numpy(), inv0, mu0, 273/(509+273))
- if p1 > p0:
- pred.append(1)
- else:
- pred.append(0)
- #print the confusion matrix and accuracy scores
- print("Confusion matrix:")
- print(confusion_matrix(df_test["Class"], pred))
- print("Accuracy score:")
- print(accuracy_score(df_test["Class"], pred))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement