Untitled

"""
This code has been amended from the Udacity Coding Resource page for the Machine Learnig 1 project.
"""

# Import libraries
# numpy is used for numerical function
# pylab is used for plotting
# sklearn is the sci-kit library
from numpy import *
import pylab as pl
from sklearn.utils import shuffle
from sklearn.metrics import mean_squared_error
from sklearn import datasets
from sklearn.tree import DecisionTreeRegressor
from sklearn.neighbors import KNeighborsRegressor
from pybrain.structure import FeedForwardNetwork
from pybrain.tools.shortcuts import buildNetwork
from pybrain.datasets import SupervisedDataSet
from pybrain.supervised.trainers import BackpropTrainer
from sklearn.svm import SVR

def DecisionTreeF(x, iDepth, X_trainData, y_trainData, X_testData, y_testData):
    # Setup a Decision Tree Regressor so that it learns a tree with depth 5
    regressor = DecisionTreeRegressor(max_depth=iDepth)
    regressor.fit(X_trainData, y_trainData)
    # Use the model to predict the output of a particular sample
    y = regressor.predict(x)
    print "Prediction for Decision Trees = " + str(y)

    # Find the MSE on the training set
    train_err = mean_squared_error(y_train, regressor.predict(X_trainData))
    print "Training Error = " + str(train_err)

    # Find the MSE on the testing set
    test_err = mean_squared_error(y_test, regressor.predict(X_testData))
    print "Testing Error = " + str(test_err)

def kNN(x, iNeigbours, X_trainData, y_trainData, X_testData, y_testData):
    # Setup a Decision Tree Regressor so that it learns a tree with depth 5
    regressor = KNeighborsRegressor(n_neighbors=iNeigbours)
    regressor.fit(X_trainData, y_trainData)
    # Use the model to predict the output of a particular sample
    y = regressor.predict(x)
    print "Prediction for k-NN = " + str(y)

    # Find the MSE on the training set
    train_err = mean_squared_error(y_train, regressor.predict(X_trainData))
    print "Training Error = " + str(train_err)

    # Find the MSE on the testing set
    test_err = mean_squared_error(y_test, regressor.predict(X_testData))
    print "Testing Error = " + str(test_err)


def Boosting(x, iDegree, X_trainData, y_trainData, X_testData, y_testData):
    # Setup a Decision Tree Regressor so that it learns a tree with depth 5
    regressor = SVR(kernel='rbf', degree=iDegree)
    regressor.fit(X_trainData, y_trainData)
    # Use the model to predict the output of a particular sample
    y = regressor.predict(x)
    print "Prediction for Boosting = " + str(y)

    # Find the MSE on the training set
    train_err = mean_squared_error(y_train, regressor.predict(X_trainData))
    print "Training Error = " + str(train_err)

    # Find the MSE on the testing set
    test_err = mean_squared_error(y_test, regressor.predict(X_testData))
    print "Testing Error = " + str(test_err)


def NN(x, iInputFeatures, iOutputs, X_trainData, y_trainData, X_testData, y_testData):
    # List all the different networks we want to test again
    # All networks have 13 input nodes and 1 output nodes
    # All networks are fully connected

    net = buildNetwork(13,9,6,3,1)
    net_arr = 1 #can ignore left over from prev file

    # The dataset will have 13 features and 1 target label
    ds = SupervisedDataSet(13, 1)

    train_err = 0
    test_err = 0

    # We will train each NN for 50 epochs
    max_epochs = 50

    # Convert the boston dataset into SupervisedDataset
    for j in range(1, len(X_trainData)):
        ds.addSample(X_trainData[j], y_trainData[j])


    # Setup a trainer that will use backpropogation for training
    trainer = BackpropTrainer(net, ds)

    # Run backprop for max_epochs number of times
    for k in range(1, max_epochs):
        train_err = trainer.train()

    # Find the labels for test set
    y = zeros(len(X_testData))

    for j in range(0, len(X_testData)):
        y[j] = net.activate(X_testData[j])

    # Calculate MSE for all samples in the test set
    test_err = mean_squared_error(y, y_testData)

    y2 = net.activate(x)
    print "Prediction for Neural Networks = " + str(y2)

    # Find the MSE on the training set
    print "Training Error = " + str(train_err)

    # Find the MSE on the testing set
    print "Testing Error = " + str(test_err)

# Load the boston dataset
boston = datasets.load_boston()
# Shuffle it and seperate it into training and testing set
# We need to shuffle it so that when we split it, we sample from the dataset uniformly
X, y = shuffle(boston.data, boston.target)
# Training and testing set is divided in the ration 7:3
offset = int(0.7*len(X))
X_train, y_train = X[:offset], y[:offset]
X_test, y_test = X[offset:], y[offset:]

x = [11.95, 0.00, 18.100, 0, 0.6590, 5.6090, 90.00, 1.385, 24, 680.0, 20.20, 332.09, 12.13]
print "Predictions for data set:" + str(x)
DecisionTreeF(x,8,X_train,y_train,X_test,y_test)
kNN(x,5,X_train,y_train,X_test,y_test)
Boosting(x,40,X_train,y_train,X_test,y_test)
NN(x,13,1,X_train,y_train,X_test,y_test)