Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import matplotlib.pyplot as plt
- import pandas
- from pandas.plotting import scatter_matrix
- from sklearn import model_selection
- from sklearn.model_selection import train_test_split
- from sklearn.naive_bayes import GaussianNB
- from sklearn import metrics
- url="C:/Users/Martin Njuguna/Documents/winequality-red.csv"
- names=['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar', 'chlorides free', 'sulfur dioxide', 'total sulfur dioxide', 'density', 'pH', 'sulphates', 'alcohol', 'quality']
- dataset=pandas.read_csv(url,names=names)
- print(dataset.shape)
- print(dataset.head(5))
- print(dataset)
- print(dataset.describe())
- print(dataset.groupby('quality').size())
- dataset.plot(kind='box',subplots=True,layout=(5,5),sharex=False,sharey=False)
- plt.show()
- dataset.hist()
- plt.show()
- scatter_matrix(dataset)
- plt.show()
- array=dataset.values
- X=array[:,0:11] #Extract fields
- Y=array[:,4] #Extract results
- #Split the data into 80%, 20% for training and testing
- validation_size=0.20
- seed=7
- X_train,X_test,Y_train,Y_test=model_selection.train_test_split\
- (X,Y, test_size=validation_size,random_state=seed)
- print("X_train",X_train)
- print("X_test",X_test)
- print("Y_train",Y_train)
- print("Y_test",Y_test)
- model=GaussianNB()
- model=model.fit(X_train,Y_train)
- y_predicted =model.predict(X_test)
- print(metrics.accuracy_score(Y_test,y_predicted))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement