Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import matplotlib.pyplot as plt #explanatory modeling
- import pandas
- from pandas.plotting import scatter_matrix
- from sklearn import model_selection
- from sklearn.model_selection import train_test_split
- from sklearn.naive_bayes import GaussianNB
- from sklearn import metrics
- url="c:/Users/bmaxwel/Documents/winequality-red.csv"
- names=['fixed acidity','volatile acidity ','citric acid' ,'residual sugar', 'chlorides','free sulfur dioxide','total sulfur dioxide',
- 'density','pH','sulphates','alcohol','quality']
- dataset = pandas.read_csv(url,names=names)
- print(dataset.shape)
- print(dataset.head)
- print(dataset)
- print(dataset.describe())
- dataset.plot(kind='box',subplots=True,layout=(6,6),sharex=False, sharey=False)
- plt.show()
- dataset.hist()
- plt.show()
- scatter_matrix(dataset)
- plt.show()
- array = dataset.values
- x = array[:,0:11]
- y = array[:,11]
- validation_size =0.20
- seed =7
- x_train,x_test,y_train,y_test = model_selection.train_test_split\
- (x, y, test_size=validation_size, random_state=seed)
- print("x_train",x_train)
- print("x_test",x_test)
- print("y_train",y_train)
- print("y_test",y_test)
- model = GaussianNB()
- model = model.fit(x_train ,y_train)
- y_predicted = model.predict(x_test)
- print(metrics.accuracy_score(y_test, y_predicted))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement