Advertisement
Guest User

Untitled

a guest
Jan 24th, 2017
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.03 KB | None | 0 0
  1. import numpy as np
  2.  
  3. # Our const
  4. FILENAME = 'ex2data1.txt'
  5. # Example splitting
  6. TRAIN_RATIO = 0.7
  7. CROSS_VALIDATION_RATIO = 0.1
  8. # The test set will be the rest
  9. # Learning const
  10. ALPHA = 0.001
  11. # Number of iterations
  12. NUM_ITERS = 40000
  13. # The size of a batch (if applies)
  14. BATCH_SIZE = 40
  15. # Polynomial to add
  16. POLYNOMIAL = 1
  17. # Lambda for regularization
  18. LAMBDA = 0.5
  19.  
  20. def get_data():
  21. data = np.loadtxt(FILENAME, delimiter=',')
  22. x = data[:, :-1]
  23. y = data[:, [np.shape(data)[1] - 1]]
  24. return (x, y)
  25.  
  26. def normalize_data(X):
  27. for i in range(0, np.shape(X)[1]):
  28. std = np.std(X[:, i])
  29. mean = np.mean(X[:, i])
  30. X[:, i] = (X[:, 1] - mean) / std
  31. return X
  32.  
  33. def add_polynomials(X):
  34. for i in range(0, np.shape(X)[1]):
  35. newX = np.zeros((np.shape(X)[0], POLYNOMIAL - 1))
  36. for j in range(1, POLYNOMIAL - 1):
  37. newX[:, j] = X[:, i]**(j+1)
  38. X = np.concatenate((X, newX), 1)
  39. return X
  40.  
  41. def sigmoid(x):
  42. return 1 / (1+np.exp(-x))
  43.  
  44. def cost_function(X, y, theta):
  45. num_examples = np.shape(X)[0]
  46. h = sigmoid(np.dot(X, np.transpose(theta)))
  47. return ((ALPHA / num_examples) * sum((np.dot(np.transpose(y), np.log(h))) - np.dot(np.transpose(1 - y), np.log(1 - h))))[0]
  48.  
  49. # Our gradient function, uses the defined const
  50. def batch_gradient(X, y, theta):
  51. num_examples = np.shape(X)[0]
  52. for i in range(0, NUM_ITERS):
  53. h = sigmoid(np.dot(X, np.transpose(theta)))
  54. theta = theta - ALPHA/num_examples * np.transpose((np.dot(np.transpose(X),(h - y))))
  55. if i % 10000 == 0:
  56. print(str(i) + '/' + str(NUM_ITERS))
  57. print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))
  58. test(X, y, theta)
  59. return theta
  60.  
  61. def mini_batch_gradient(X, y, theta):
  62. num_examples = np.shape(X)[0]
  63. for i in range(0, NUM_ITERS):
  64. for j in range(0, int(np.floor(num_examples / BATCH_SIZE))):
  65. x = X[(j * BATCH_SIZE):(j * BATCH_SIZE + 40), :]
  66. sub_y = y[(j * BATCH_SIZE):(j * BATCH_SIZE + 40), :]
  67. h = sigmoid(np.dot(x, np.transpose(theta)))
  68. theta = theta - ALPHA/num_examples * np.transpose((np.dot(np.transpose(x),(h - sub_y))))
  69. if i % 100 == 0:
  70. print(str(i) + '/' + str(NUM_ITERS))
  71. print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))
  72. return theta
  73.  
  74. def stochastic_gradient(X, y, theta):
  75. num_examples = np.shape(X)[0]
  76. for i in range(0, NUM_ITERS):
  77. for j in range(0, num_examples):
  78. x = X[j, :]
  79. sub_y = y[j, :]
  80. h = sigmoid(np.dot(x, np.transpose(theta)))
  81. theta = theta - ALPHA/num_examples * np.transpose(np.transpose(x) * (h - sub_y))
  82. if i % 100 == 0:
  83. print(str(i) + '/' + str(NUM_ITERS))
  84. print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))
  85. return theta
  86.  
  87. def predict(x, theta):
  88. pred = sigmoid(np.dot(x, np.transpose(theta)))
  89. if pred >= 0.5:
  90. return 1
  91. else:
  92. return 0
  93.  
  94. def train(X, y):
  95. return batch_gradient(X, y, np.zeros((1, np.shape(X)[1])))
  96.  
  97. def test(X, y, theta):
  98. good = 0
  99. falseNegative = 0
  100. falsePositive = 0
  101. for i in range(0, np.shape(X)[0]):
  102. pred = predict(X[i, :], theta)
  103. if pred == y[i][0]:
  104. good = good + 1
  105. elif pred == 0 and y[i][0] == 1:
  106. falseNegative = falseNegative + 1
  107. elif pred == 1 and y[i][0] == 0:
  108. falsePositive = falsePositive + 1
  109. print('Good: {}\n False negative: {}\n False positive: {}\n'.format(good / np.shape(X)[0], falseNegative / np.shape(X)[0], falsePositive / np.shape(X)[0]))
  110. return good / np.shape(X)[0]
  111.  
  112. def main():
  113. # Getting data
  114. x, y = get_data()
  115. # Normalizing data
  116. x = normalize_data(x)
  117. #Adding polynomials
  118. x = add_polynomials(x)
  119. # Adding bias
  120. bias = np.ones((np.shape(x)[0], np.shape(x)[1] + 1))
  121. bias[:, 1:] = x
  122. x = bias
  123. # Creating the sets
  124. numTraining = int(np.shape(x)[0] * TRAIN_RATIO)
  125. numCrossVal = int(np.shape(x)[0] * CROSS_VALIDATION_RATIO)
  126. numTest = np.shape(x)[0] - (numTraining + numCrossVal)
  127. trainingSetX = x[0:numTraining, :]
  128. crossValSetX = x[numTraining:(numTraining + numCrossVal), :]
  129. testSetX = x[(numTraining + numCrossVal):, :]
  130. trainingSetY = y[0:numTraining, :]
  131. crossValSetY = y[numTraining:(numTraining + numCrossVal), :]
  132. testSetY = y[(numTraining + numCrossVal):, :]
  133. # Training
  134. thetas = train(trainingSetX, trainingSetY)
  135. # Test
  136. pourcentSuccessTrain = test(trainingSetX, trainingSetY, thetas)
  137. pourcentSuccessCrossValidation = test(crossValSetX, crossValSetY, thetas)
  138. pourcentSuccessTest = test(testSetX, testSetY, thetas)
  139. print('Success training set: {:.2f} %'.format(pourcentSuccessTrain * 100))
  140. print('Success crossvalidation set: {:.2f} %'.format(pourcentSuccessCrossValidation * 100))
  141. print('Success test set: {:.2f} %'.format(pourcentSuccessTest * 100))
  142.  
  143. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement