SHARE
TWEET

Untitled

a guest Jan 24th, 2017 10 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import numpy as np
  2.  
  3. # Our const
  4. FILENAME = 'ex2data1.txt'
  5. # Example splitting
  6. TRAIN_RATIO = 0.7
  7. CROSS_VALIDATION_RATIO = 0.1
  8. # The test set will be the rest
  9. # Learning const
  10. ALPHA = 0.001
  11. # Number of iterations
  12. NUM_ITERS = 40000
  13. # The size of a batch (if applies)
  14. BATCH_SIZE = 40
  15. # Polynomial to add
  16. POLYNOMIAL = 1
  17. # Lambda for regularization
  18. LAMBDA = 0.5
  19.  
  20. def get_data():
  21.     data = np.loadtxt(FILENAME, delimiter=',')
  22.     x = data[:, :-1]
  23.     y = data[:, [np.shape(data)[1] - 1]]
  24.     return (x, y)
  25.  
  26. def normalize_data(X):
  27.     for i in range(0, np.shape(X)[1]):
  28.         std = np.std(X[:, i])
  29.         mean = np.mean(X[:, i])
  30.         X[:, i] = (X[:, 1] - mean) / std
  31.     return X
  32.  
  33. def add_polynomials(X):
  34.     for i in range(0, np.shape(X)[1]):
  35.         newX = np.zeros((np.shape(X)[0], POLYNOMIAL - 1))
  36.         for j in range(1, POLYNOMIAL - 1):
  37.             newX[:, j] = X[:, i]**(j+1)
  38.         X = np.concatenate((X, newX), 1)
  39.     return X
  40.  
  41. def sigmoid(x):
  42.     return 1 / (1+np.exp(-x))
  43.  
  44. def cost_function(X, y, theta):
  45.     num_examples = np.shape(X)[0]
  46.     h = sigmoid(np.dot(X, np.transpose(theta)))
  47.     return ((ALPHA / num_examples) * sum((np.dot(np.transpose(y), np.log(h))) - np.dot(np.transpose(1 - y), np.log(1 - h))))[0]
  48.  
  49. # Our gradient function, uses the defined const
  50. def batch_gradient(X, y, theta):
  51.     num_examples = np.shape(X)[0]
  52.     for i in range(0, NUM_ITERS):
  53.         h = sigmoid(np.dot(X, np.transpose(theta)))
  54.         theta = theta - ALPHA/num_examples * np.transpose((np.dot(np.transpose(X),(h - y))))
  55.         if i % 10000 == 0:
  56.             print(str(i) + '/' + str(NUM_ITERS))
  57.             print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))
  58.             test(X, y, theta)
  59.     return theta
  60.  
  61. def mini_batch_gradient(X, y, theta):
  62.     num_examples = np.shape(X)[0]
  63.     for i in range(0, NUM_ITERS):
  64.         for j in range(0, int(np.floor(num_examples / BATCH_SIZE))):
  65.             x = X[(j * BATCH_SIZE):(j * BATCH_SIZE + 40), :]
  66.             sub_y = y[(j * BATCH_SIZE):(j * BATCH_SIZE + 40), :]
  67.             h = sigmoid(np.dot(x, np.transpose(theta)))
  68.             theta = theta - ALPHA/num_examples * np.transpose((np.dot(np.transpose(x),(h - sub_y))))
  69.         if i % 100 == 0:
  70.             print(str(i) + '/' + str(NUM_ITERS))
  71.             print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))
  72.     return theta
  73.    
  74. def stochastic_gradient(X, y, theta):
  75.     num_examples = np.shape(X)[0]
  76.     for i in range(0, NUM_ITERS):
  77.         for j in range(0, num_examples):
  78.             x = X[j, :]
  79.             sub_y = y[j, :]
  80.             h = sigmoid(np.dot(x, np.transpose(theta)))
  81.             theta = theta - ALPHA/num_examples * np.transpose(np.transpose(x) * (h - sub_y))
  82.         if i % 100 == 0:
  83.             print(str(i) + '/' + str(NUM_ITERS))
  84.             print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))            
  85.     return theta
  86.  
  87. def predict(x, theta):
  88.     pred = sigmoid(np.dot(x, np.transpose(theta)))
  89.     if pred >= 0.5:
  90.         return 1
  91.     else:
  92.         return 0
  93.  
  94. def train(X, y):
  95.     return batch_gradient(X, y, np.zeros((1, np.shape(X)[1])))
  96.  
  97. def test(X, y, theta):
  98.     good = 0
  99.     falseNegative = 0
  100.     falsePositive = 0
  101.     for i in range(0, np.shape(X)[0]):
  102.         pred = predict(X[i, :], theta)
  103.         if pred == y[i][0]:
  104.             good = good + 1
  105.         elif pred == 0 and y[i][0] == 1:
  106.             falseNegative = falseNegative + 1
  107.         elif pred == 1 and y[i][0] == 0:
  108.             falsePositive = falsePositive + 1
  109.     print('Good: {}\n False negative: {}\n False positive: {}\n'.format(good / np.shape(X)[0], falseNegative / np.shape(X)[0], falsePositive / np.shape(X)[0]))
  110.     return good / np.shape(X)[0]
  111.  
  112. def main():
  113.     # Getting data
  114.     x, y = get_data()
  115.     # Normalizing data
  116.     x = normalize_data(x)
  117.     #Adding polynomials
  118.     x = add_polynomials(x)
  119.     # Adding bias
  120.     bias = np.ones((np.shape(x)[0], np.shape(x)[1] + 1))
  121.     bias[:, 1:] = x
  122.     x = bias
  123.     # Creating the sets
  124.     numTraining = int(np.shape(x)[0] * TRAIN_RATIO)
  125.     numCrossVal = int(np.shape(x)[0] * CROSS_VALIDATION_RATIO)
  126.     numTest = np.shape(x)[0] - (numTraining + numCrossVal)
  127.     trainingSetX = x[0:numTraining, :]
  128.     crossValSetX = x[numTraining:(numTraining + numCrossVal), :]
  129.     testSetX = x[(numTraining + numCrossVal):, :]
  130.     trainingSetY = y[0:numTraining, :]
  131.     crossValSetY = y[numTraining:(numTraining + numCrossVal), :]
  132.     testSetY = y[(numTraining + numCrossVal):, :]
  133.     # Training
  134.     thetas = train(trainingSetX, trainingSetY)
  135.     # Test
  136.     pourcentSuccessTrain = test(trainingSetX, trainingSetY, thetas)
  137.     pourcentSuccessCrossValidation = test(crossValSetX, crossValSetY, thetas)
  138.     pourcentSuccessTest = test(testSetX, testSetY, thetas)
  139.     print('Success training set: {:.2f} %'.format(pourcentSuccessTrain * 100))
  140.     print('Success crossvalidation set: {:.2f} %'.format(pourcentSuccessCrossValidation * 100))
  141.     print('Success test set: {:.2f} %'.format(pourcentSuccessTest * 100))
  142.  
  143. main()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top