Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import numpy as np
- from sklearn import preprocessing
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- wineData=pd.read_csv('winequality.csv')
- print(wineData.columns)
- quality=wineData['Quality']
- del wineData['Quality']
- wineDataArray=wineData.values
- min_max_scaler=preprocessing.MinMaxScaler()
- wineData_scaled=min_max_scaler.fit_transform(wineDataArray)
- wineData=pd.DataFrame(wineData_scaled,columns=wineData.columns)
- wineData['Quality']=quality
- print(wineData.head())
- train_accuracy=np.empty(9)
- test_accuracy=np.empty(9)
- X=wineData.iloc[:,wineData.columns!='Quality']
- y=wineData['Quality']
- X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.3,random_state=10,stratify=y)
- max=0
- bestk=0
- for k in range(1,10):
- knn=KNeighborsClassifier(n_neighbors=k)
- knn.fit(X_train,y_train)
- y_pred=knn.predict(X_test)
- train_accuracy[k-1]=knn.score(X_train,y_train)
- test_accuracy[k-1]=knn.score(X_test,y_test)
- if knn.score(X_train,y_train)>=max:
- max=knn.score(X_train,y_train)
- bestk=k
- print(max)
- print(bestk)
- print(train_accuracy)
- print(test_accuracy)
- print('k=',bestk,'produced the best accuracy')#part 4
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement