Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/python
- # -*- coding: utf-8 -*-
- import math
- import csv
- import numpy as np
- import matplotlib.pyplot as plt
- ###############################################################################
- # Question 1 #
- ###############################################################################
- # (a)
- showresults = 0
- def load_data(filename):
- """should load the data from the given file, returning a matrix for X and a vector for y"""
- my_data = np.genfromtxt(filename, delimiter=',')
- #print(my_data)
- return (my_data[:,0:2]), (my_data[:,2])
- X,y = load_data('ex_data.csv')
- m = len(y)
- # extend the data in order to add a bias term to the dot product with theta
- X = np.column_stack([np.ones(m), X]) #TODO ???
- if showresults:
- print("Matrix X")
- print(X)
- print("Vector y")
- print(y)
- ## now plot the data
- pos = np.where(y == 1)
- neg = np.where(y == 0)
- plt.scatter(X[pos, 1], X[pos, 2], marker='o', c='g')
- plt.scatter(X[neg, 1], X[neg, 2], marker='x', c='r')
- plt.xlabel('Exam score 1')
- plt.ylabel('Exam score 2')
- plt.legend(['Admitted', 'Not Admitted'])
- plt.grid(True)
- if showresults:
- plt.show()
- #TODO minmaxtransform?
- # (b)
- def Hypothesis(theta, x):#TODO vectorize hypothesis? Test the function
- z=0
- for i in range(len(theta)):
- z+= x[i]*theta[i]
- return Sigmoid(z)
- def Sigmoid(x):
- result = float(1.0 / float((1.0 + math.exp(-1.0*x))))
- return result
- Sigmoid = np.vectorize(Sigmoid)
- if showresults-1:
- thetatest = np.zeros(3)
- print("Hypothesis theta, X[0]:")
- print(Hypothesis(thetatest, X[0]))
- print("Hypothesis theta, X[13]:")
- print(Hypothesis(thetatest, X[13]))
- if showresults:
- print("sigmoid X:")
- print(Sigmoid(X))
- print("sigmoid 1:")
- print(Sigmoid(1))
- print("sigmoid 0:")
- print(Sigmoid(0))
- print("sigmoid -1:")
- print(Sigmoid(-1))
- # (c)
- def cost(X, y, theta):
- m = len(y)
- sumOfErrors = 0
- for i in range(m):
- xi= X[i]
- hi=Hypothesis(theta, xi)
- # TODO fix
- if hi==1:
- hi=0.9999999999999999
- error_i = -(y[i]*math.log(hi)) - ((1-y[i])*math.log(1-hi))
- sumOfErrors += error_i
- result = sumOfErrors / m
- return result
- thetatest= [0,0,0]
- debug = cost(X, y, thetatest)
- print("Cost:")
- print(debug)
- def grad(X, y, theta, j):
- m=len(y)
- sum = 0
- for i in range(m):
- xi=X[i]
- hi=Hypothesis(theta, xi)
- term=hi*-y[i]
- term= term*xi[j]
- sum+=term
- result = sum/ m
- return result
- if showresults:
- debug = [grad(X, y, theta, 0), grad(X, y, theta, 1), grad(X, y, theta, 2)]
- print("Gradient cost:")
- print(debug)
- # (d)
- #test
- def GD(costf, gradf, theta0, lr, steps):
- theta = theta0
- for i in range(steps):
- newtheta = []
- for j in range(len(theta)):
- newtheta_j = theta[j] - (lr * grad(X, y, theta, j))
- newtheta.append(newtheta_j)
- theta=newtheta
- if i % (steps/10) == 0:
- print("************************")
- print("Iteraction: ", i)
- print("Theta")
- print(theta)
- print("cost(X, y, theta)")
- print(cost(X, y, theta))
- print("grad(X, y, theta, j)")
- print([grad(X, y, theta, 0),grad(X, y, theta, 1),grad(X, y, theta, 2)])
- return theta
- """
- Args:
- costf: cost function (only needed for debugging/outputting intermediate results)
- gradf: gradient of the cost function
- theta0: initail value for the parameters theta
- lr: learing rate
- steps: total number of iterations to perform
- returns the final value for theta
- """
- # (e)
- train = GD(cost, grad, np.zeros(3), 0.001, 100)
- print(train)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement