• API
• FAQ
• Tools
• Archive
SHARE
TWEET

Untitled

a guest Jan 24th, 2017 10 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
1. import numpy as np
2.
3. # Our const
4. FILENAME = 'ex2data1.txt'
5. # Example splitting
6. TRAIN_RATIO = 0.7
7. CROSS_VALIDATION_RATIO = 0.1
8. # The test set will be the rest
9. # Learning const
10. ALPHA = 0.001
11. # Number of iterations
12. NUM_ITERS = 40000
13. # The size of a batch (if applies)
14. BATCH_SIZE = 40
16. POLYNOMIAL = 1
17. # Lambda for regularization
18. LAMBDA = 0.5
19.
20. def get_data():
22.     x = data[:, :-1]
23.     y = data[:, [np.shape(data)[1] - 1]]
24.     return (x, y)
25.
26. def normalize_data(X):
27.     for i in range(0, np.shape(X)[1]):
28.         std = np.std(X[:, i])
29.         mean = np.mean(X[:, i])
30.         X[:, i] = (X[:, 1] - mean) / std
31.     return X
32.
34.     for i in range(0, np.shape(X)[1]):
35.         newX = np.zeros((np.shape(X)[0], POLYNOMIAL - 1))
36.         for j in range(1, POLYNOMIAL - 1):
37.             newX[:, j] = X[:, i]**(j+1)
38.         X = np.concatenate((X, newX), 1)
39.     return X
40.
41. def sigmoid(x):
42.     return 1 / (1+np.exp(-x))
43.
44. def cost_function(X, y, theta):
45.     num_examples = np.shape(X)[0]
46.     h = sigmoid(np.dot(X, np.transpose(theta)))
47.     return ((ALPHA / num_examples) * sum((np.dot(np.transpose(y), np.log(h))) - np.dot(np.transpose(1 - y), np.log(1 - h))))[0]
48.
49. # Our gradient function, uses the defined const
51.     num_examples = np.shape(X)[0]
52.     for i in range(0, NUM_ITERS):
53.         h = sigmoid(np.dot(X, np.transpose(theta)))
54.         theta = theta - ALPHA/num_examples * np.transpose((np.dot(np.transpose(X),(h - y))))
55.         if i % 10000 == 0:
56.             print(str(i) + '/' + str(NUM_ITERS))
57.             print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))
58.             test(X, y, theta)
59.     return theta
60.
62.     num_examples = np.shape(X)[0]
63.     for i in range(0, NUM_ITERS):
64.         for j in range(0, int(np.floor(num_examples / BATCH_SIZE))):
65.             x = X[(j * BATCH_SIZE):(j * BATCH_SIZE + 40), :]
66.             sub_y = y[(j * BATCH_SIZE):(j * BATCH_SIZE + 40), :]
67.             h = sigmoid(np.dot(x, np.transpose(theta)))
68.             theta = theta - ALPHA/num_examples * np.transpose((np.dot(np.transpose(x),(h - sub_y))))
69.         if i % 100 == 0:
70.             print(str(i) + '/' + str(NUM_ITERS))
71.             print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))
72.     return theta
73.
75.     num_examples = np.shape(X)[0]
76.     for i in range(0, NUM_ITERS):
77.         for j in range(0, num_examples):
78.             x = X[j, :]
79.             sub_y = y[j, :]
80.             h = sigmoid(np.dot(x, np.transpose(theta)))
81.             theta = theta - ALPHA/num_examples * np.transpose(np.transpose(x) * (h - sub_y))
82.         if i % 100 == 0:
83.             print(str(i) + '/' + str(NUM_ITERS))
84.             print('Cost function value: {:.10f}'.format(cost_function(X, y, theta)))
85.     return theta
86.
87. def predict(x, theta):
88.     pred = sigmoid(np.dot(x, np.transpose(theta)))
89.     if pred >= 0.5:
90.         return 1
91.     else:
92.         return 0
93.
94. def train(X, y):
95.     return batch_gradient(X, y, np.zeros((1, np.shape(X)[1])))
96.
97. def test(X, y, theta):
98.     good = 0
99.     falseNegative = 0
100.     falsePositive = 0
101.     for i in range(0, np.shape(X)[0]):
102.         pred = predict(X[i, :], theta)
103.         if pred == y[i][0]:
104.             good = good + 1
105.         elif pred == 0 and y[i][0] == 1:
106.             falseNegative = falseNegative + 1
107.         elif pred == 1 and y[i][0] == 0:
108.             falsePositive = falsePositive + 1
109.     print('Good: {}\n False negative: {}\n False positive: {}\n'.format(good / np.shape(X)[0], falseNegative / np.shape(X)[0], falsePositive / np.shape(X)[0]))
110.     return good / np.shape(X)[0]
111.
112. def main():
113.     # Getting data
114.     x, y = get_data()
115.     # Normalizing data
116.     x = normalize_data(x)
120.     bias = np.ones((np.shape(x)[0], np.shape(x)[1] + 1))
121.     bias[:, 1:] = x
122.     x = bias
123.     # Creating the sets
124.     numTraining = int(np.shape(x)[0] * TRAIN_RATIO)
125.     numCrossVal = int(np.shape(x)[0] * CROSS_VALIDATION_RATIO)
126.     numTest = np.shape(x)[0] - (numTraining + numCrossVal)
127.     trainingSetX = x[0:numTraining, :]
128.     crossValSetX = x[numTraining:(numTraining + numCrossVal), :]
129.     testSetX = x[(numTraining + numCrossVal):, :]
130.     trainingSetY = y[0:numTraining, :]
131.     crossValSetY = y[numTraining:(numTraining + numCrossVal), :]
132.     testSetY = y[(numTraining + numCrossVal):, :]
133.     # Training
134.     thetas = train(trainingSetX, trainingSetY)
135.     # Test
136.     pourcentSuccessTrain = test(trainingSetX, trainingSetY, thetas)
137.     pourcentSuccessCrossValidation = test(crossValSetX, crossValSetY, thetas)
138.     pourcentSuccessTest = test(testSetX, testSetY, thetas)
139.     print('Success training set: {:.2f} %'.format(pourcentSuccessTrain * 100))
140.     print('Success crossvalidation set: {:.2f} %'.format(pourcentSuccessCrossValidation * 100))
141.     print('Success test set: {:.2f} %'.format(pourcentSuccessTest * 100))
142.
143. main()
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy.

Top