Advertisement
Guest User

WINEQUALITY_RED

a guest
Nov 15th, 2019
110
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 1.36 KB | None | 0 0
  1. import matplotlib.pyplot as plt
  2. import pandas
  3. from pandas.plotting import scatter_matrix
  4.  
  5. from sklearn import model_selection
  6. from sklearn.model_selection import train_test_split
  7.  
  8. from sklearn.naive_bayes import GaussianNB
  9. from sklearn import metrics
  10.  
  11. url="C:/Users/Martin Njuguna/Documents/winequality-red.csv"
  12. names=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides free', 'sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality']
  13. dataset=pandas.read_csv(url,names=names)
  14.  
  15. print(dataset.shape)
  16. print(dataset.head(5))
  17. print(dataset)
  18. print(dataset.describe())
  19. print(dataset.groupby('quality').size())
  20.  
  21. dataset.plot(kind='box',subplots=True,layout=(5,5),sharex=False,sharey=False)
  22. plt.show()
  23.  
  24. dataset.hist()
  25. plt.show()
  26.  
  27. scatter_matrix(dataset)
  28. plt.show()
  29.  
  30. array=dataset.values
  31. X=array[:,0:11] #Extract fields
  32. Y=array[:,4] #Extract results
  33.  
  34. #Split the data into 80%, 20% for training and testing
  35. validation_size=0.20
  36. seed=7
  37. X_train,X_test,Y_train,Y_test=model_selection.train_test_split\
  38.     (X,Y, test_size=validation_size,random_state=seed)
  39.  
  40. print("X_train",X_train)
  41. print("X_test",X_test)
  42. print("Y_train",Y_train)
  43. print("Y_test",Y_test)
  44.  
  45. model=GaussianNB()
  46. model=model.fit(X_train,Y_train)
  47.  
  48. y_predicted =model.predict(X_test)
  49. print(metrics.accuracy_score(Y_test,y_predicted))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement