Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- This code has been amended from the Udacity Coding Resource page for the Machine Learnig 1 project.
- """
- # Import libraries
- # numpy is used for numerical function
- # pylab is used for plotting
- # sklearn is the sci-kit library
- from numpy import *
- import pylab as pl
- from sklearn.utils import shuffle
- from sklearn.metrics import mean_squared_error
- from sklearn import datasets
- from sklearn.tree import DecisionTreeRegressor
- from sklearn.neighbors import KNeighborsRegressor
- from pybrain.structure import FeedForwardNetwork
- from pybrain.tools.shortcuts import buildNetwork
- from pybrain.datasets import SupervisedDataSet
- from pybrain.supervised.trainers import BackpropTrainer
- from sklearn.svm import SVR
- def DecisionTreeF(x, iDepth, X_trainData, y_trainData, X_testData, y_testData):
- # Setup a Decision Tree Regressor so that it learns a tree with depth 5
- regressor = DecisionTreeRegressor(max_depth=iDepth)
- regressor.fit(X_trainData, y_trainData)
- # Use the model to predict the output of a particular sample
- y = regressor.predict(x)
- print "Prediction for Decision Trees = " + str(y)
- # Find the MSE on the training set
- train_err = mean_squared_error(y_train, regressor.predict(X_trainData))
- print "Training Error = " + str(train_err)
- # Find the MSE on the testing set
- test_err = mean_squared_error(y_test, regressor.predict(X_testData))
- print "Testing Error = " + str(test_err)
- def kNN(x, iNeigbours, X_trainData, y_trainData, X_testData, y_testData):
- # Setup a Decision Tree Regressor so that it learns a tree with depth 5
- regressor = KNeighborsRegressor(n_neighbors=iNeigbours)
- regressor.fit(X_trainData, y_trainData)
- # Use the model to predict the output of a particular sample
- y = regressor.predict(x)
- print "Prediction for k-NN = " + str(y)
- # Find the MSE on the training set
- train_err = mean_squared_error(y_train, regressor.predict(X_trainData))
- print "Training Error = " + str(train_err)
- # Find the MSE on the testing set
- test_err = mean_squared_error(y_test, regressor.predict(X_testData))
- print "Testing Error = " + str(test_err)
- def Boosting(x, iDegree, X_trainData, y_trainData, X_testData, y_testData):
- # Setup a Decision Tree Regressor so that it learns a tree with depth 5
- regressor = SVR(kernel='rbf', degree=iDegree)
- regressor.fit(X_trainData, y_trainData)
- # Use the model to predict the output of a particular sample
- y = regressor.predict(x)
- print "Prediction for Boosting = " + str(y)
- # Find the MSE on the training set
- train_err = mean_squared_error(y_train, regressor.predict(X_trainData))
- print "Training Error = " + str(train_err)
- # Find the MSE on the testing set
- test_err = mean_squared_error(y_test, regressor.predict(X_testData))
- print "Testing Error = " + str(test_err)
- def NN(x, iInputFeatures, iOutputs, X_trainData, y_trainData, X_testData, y_testData):
- # List all the different networks we want to test again
- # All networks have 13 input nodes and 1 output nodes
- # All networks are fully connected
- net = buildNetwork(13,9,6,3,1)
- net_arr = 1 #can ignore left over from prev file
- # The dataset will have 13 features and 1 target label
- ds = SupervisedDataSet(13, 1)
- train_err = 0
- test_err = 0
- # We will train each NN for 50 epochs
- max_epochs = 50
- # Convert the boston dataset into SupervisedDataset
- for j in range(1, len(X_trainData)):
- ds.addSample(X_trainData[j], y_trainData[j])
- # Setup a trainer that will use backpropogation for training
- trainer = BackpropTrainer(net, ds)
- # Run backprop for max_epochs number of times
- for k in range(1, max_epochs):
- train_err = trainer.train()
- # Find the labels for test set
- y = zeros(len(X_testData))
- for j in range(0, len(X_testData)):
- y[j] = net.activate(X_testData[j])
- # Calculate MSE for all samples in the test set
- test_err = mean_squared_error(y, y_testData)
- y2 = net.activate(x)
- print "Prediction for Neural Networks = " + str(y2)
- # Find the MSE on the training set
- print "Training Error = " + str(train_err)
- # Find the MSE on the testing set
- print "Testing Error = " + str(test_err)
- # Load the boston dataset
- boston = datasets.load_boston()
- # Shuffle it and seperate it into training and testing set
- # We need to shuffle it so that when we split it, we sample from the dataset uniformly
- X, y = shuffle(boston.data, boston.target)
- # Training and testing set is divided in the ration 7:3
- offset = int(0.7*len(X))
- X_train, y_train = X[:offset], y[:offset]
- X_test, y_test = X[offset:], y[offset:]
- x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
- print "Predictions for data set:" + str(x)
- DecisionTreeF(x,8,X_train,y_train,X_test,y_test)
- kNN(x,5,X_train,y_train,X_test,y_test)
- Boosting(x,40,X_train,y_train,X_test,y_test)
- NN(x,13,1,X_train,y_train,X_test,y_test)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement