Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- # Our const
- FILENAME = 'ex2data1.txt'
- # Example splitting
- TRAIN_RATIO = 0.7
- CROSS_VALIDATION_RATIO = 0.1
- # The test set will be the rest
- # Learning const
- ALPHA = 0.001
- # Number of iterations
- NUM_ITERS = 40000
- # The size of a batch (if applies)
- BATCH_SIZE = 40
- # Polynomial to add
- POLYNOMIAL = 1
- # Lambda for regularization
- LAMBDA = 0.5
- def get_data():
- data = np.loadtxt(FILENAME, delimiter=',')
- x = data[:, :-1]
- y = data[:, [np.shape(data)[1] - 1]]
- return (x, y)
- def normalize_data(X):
- for i in range(0, np.shape(X)[1]):
- std = np.std(X[:, i])
- mean = np.mean(X[:, i])
- X[:, i] = (X[:, 1] - mean) / std
- return X
- def add_polynomials(X):
- for i in range(0, np.shape(X)[1]):
- newX = np.zeros((np.shape(X)[0], POLYNOMIAL - 1))
- for j in range(1, POLYNOMIAL - 1):
- newX[:, j] = X[:, i]**(j+1)
- X = np.concatenate((X, newX), 1)
- return X
- def sigmoid(x):
- return 1 / (1+np.exp(-x))
- def cost_function(X, y, theta):
- num_examples = np.shape(X)[0]
- h = sigmoid(np.dot(X, np.transpose(theta)))
- return ((ALPHA / num_examples) * sum((np.dot(np.transpose(y), np.log(h))) - np.dot(np.transpose(1 - y), np.log(1 - h))))[0]
- # Our gradient function, uses the defined const
- def batch_gradient(X, y, theta):
- num_examples = np.shape(X)[0]
- for i in range(0, NUM_ITERS):
- h = sigmoid(np.dot(X, np.transpose(theta)))
- theta = theta - ALPHA/num_examples * np.transpose((np.dot(np.transpose(X),(h - y))))
- if i % 10000 == 0:
- print(str(i) + '/' + str(NUM_ITERS))
- print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))
- test(X, y, theta)
- return theta
- def mini_batch_gradient(X, y, theta):
- num_examples = np.shape(X)[0]
- for i in range(0, NUM_ITERS):
- for j in range(0, int(np.floor(num_examples / BATCH_SIZE))):
- x = X[(j * BATCH_SIZE):(j * BATCH_SIZE + 40), :]
- sub_y = y[(j * BATCH_SIZE):(j * BATCH_SIZE + 40), :]
- h = sigmoid(np.dot(x, np.transpose(theta)))
- theta = theta - ALPHA/num_examples * np.transpose((np.dot(np.transpose(x),(h - sub_y))))
- if i % 100 == 0:
- print(str(i) + '/' + str(NUM_ITERS))
- print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))
- return theta
- def stochastic_gradient(X, y, theta):
- num_examples = np.shape(X)[0]
- for i in range(0, NUM_ITERS):
- for j in range(0, num_examples):
- x = X[j, :]
- sub_y = y[j, :]
- h = sigmoid(np.dot(x, np.transpose(theta)))
- theta = theta - ALPHA/num_examples * np.transpose(np.transpose(x) * (h - sub_y))
- if i % 100 == 0:
- print(str(i) + '/' + str(NUM_ITERS))
- print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))
- return theta
- def predict(x, theta):
- pred = sigmoid(np.dot(x, np.transpose(theta)))
- if pred >= 0.5:
- return 1
- else:
- return 0
- def train(X, y):
- return batch_gradient(X, y, np.zeros((1, np.shape(X)[1])))
- def test(X, y, theta):
- good = 0
- falseNegative = 0
- falsePositive = 0
- for i in range(0, np.shape(X)[0]):
- pred = predict(X[i, :], theta)
- if pred == y[i][0]:
- good = good + 1
- elif pred == 0 and y[i][0] == 1:
- falseNegative = falseNegative + 1
- elif pred == 1 and y[i][0] == 0:
- falsePositive = falsePositive + 1
- print('Good: {}\n False negative: {}\n False positive: {}\n'.format(good / np.shape(X)[0], falseNegative / np.shape(X)[0], falsePositive / np.shape(X)[0]))
- return good / np.shape(X)[0]
- def main():
- # Getting data
- x, y = get_data()
- # Normalizing data
- x = normalize_data(x)
- #Adding polynomials
- x = add_polynomials(x)
- # Adding bias
- bias = np.ones((np.shape(x)[0], np.shape(x)[1] + 1))
- bias[:, 1:] = x
- x = bias
- # Creating the sets
- numTraining = int(np.shape(x)[0] * TRAIN_RATIO)
- numCrossVal = int(np.shape(x)[0] * CROSS_VALIDATION_RATIO)
- numTest = np.shape(x)[0] - (numTraining + numCrossVal)
- trainingSetX = x[0:numTraining, :]
- crossValSetX = x[numTraining:(numTraining + numCrossVal), :]
- testSetX = x[(numTraining + numCrossVal):, :]
- trainingSetY = y[0:numTraining, :]
- crossValSetY = y[numTraining:(numTraining + numCrossVal), :]
- testSetY = y[(numTraining + numCrossVal):, :]
- # Training
- thetas = train(trainingSetX, trainingSetY)
- # Test
- pourcentSuccessTrain = test(trainingSetX, trainingSetY, thetas)
- pourcentSuccessCrossValidation = test(crossValSetX, crossValSetY, thetas)
- pourcentSuccessTest = test(testSetX, testSetY, thetas)
- print('Success training set: {:.2f} %'.format(pourcentSuccessTrain * 100))
- print('Success crossvalidation set: {:.2f} %'.format(pourcentSuccessCrossValidation * 100))
- print('Success test set: {:.2f} %'.format(pourcentSuccessTest * 100))
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement