Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pand
- import numpy as np
- from sklearn.model_selection import train_test_split
- from sklearn.neighbors import KNeighborsClassifier
- from sklearn.neighbors import KNeighborsRegressor
- from sklearn.metrics import confusion_matrix
- from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
- from sklearn.preprocessing import StandardScaler as SS
- def infoCsv(filename):
- dataFrame = pand.read_csv(filename);
- print(dataFrame.shape);
- print(dataFrame.info());
- print(dataFrame.head()) ;
- print(dataFrame.describe());
- return dataFrame
- def splitBase(dataFrame):
- random_seed = 14;
- train, test = train_test_split(dataFrame, train_size=0.5, test_size=0.5);
- return train, test;
- def extractColumns(dfTest, dfTrain):
- xTrain = dfTrain;
- yTrain = dfTrain["Species"];
- del(xTrain["Id"]);
- del(xTrain["Species"]);
- xTest = dfTest;
- yTest = dfTest["Species"];
- del(xTest["Id"]);
- del(xTest["Species"]);
- return xTest, yTest, xTrain, yTrain;
- def extractColumnsRegr(dfTest, dfTrain):
- xTrain = dfTrain;
- yTrain = dfTrain["mpg"];
- del(xTrain["name"]);
- del(xTrain["mpg"]);
- xTest = dfTest;
- yTest = dfTest["mpg"];
- del(xTest["name"]);
- del(xTest["mpg"]);
- return xTest, yTest, xTrain, yTrain;
- def learnAndTrain(Xtest, Ytest, Xtrain, Ytrain):
- Knm = KNeighborsClassifier(3)
- model = Knm.fit(Xtrain, Ytrain)
- print("Score train : ", model.score(Xtrain, Ytrain))
- predictTest = model.predict(Xtest)
- print("Score test : ", model.score(Xtest, Ytest))
- print(confusion_matrix(Ytest, predictTest))
- def learnAndTrainRegr(Xtest, Ytest, Xtrain, Ytrain):
- Knm = KNeighborsRegressor(3)
- model = Knm.fit(Xtrain, Ytrain)
- predictTest = model.predict(Xtest)
- print("R2 score : ", r2_score(predictTest, Ytest))
- print("MAE : ", mean_absolute_error(Ytest, predictTest))
- print("MSE : ", mean_squared_error(Ytest, predictTest))
- def main():
- print("Classification")
- print("=======================================================")
- dfClassif =infoCsv("iris.csv");
- train, test = splitBase(dfClassif);
- X_test, Y_test, X_train, Y_train = extractColumns(test, train);
- learnAndTrain(X_test, Y_test, X_train, Y_train)
- print("=======================================================")
- print("Régression")
- dfRegr = infoCsv("auto-mpg.data")
- trainRegr, testRegr = splitBase(dfRegr)
- X_test, Y_test, X_train, Y_train = extractColumnsRegr(testRegr, trainRegr);
- scaler = SS()
- X_test = scaler.fit_transform(X_test)
- X_train = scaler.fit_transform(X_train)
- learnAndTrainRegr(X_test, Y_test, X_train, Y_train)
- return 0
- if __name__ == '__main__':
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement