Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- network = {
- 1: 28 * 28,
- 2: 45,
- 3: 10
- }
- def sigmoid(value):
- return 1.0/(1.0 + math.exp(-value))
- def train_network(_data_set, _theta1, _theta2, _should_output_deltas):
- # Vectorizing the sigmoid method
- sigmoid_vec = np.vectorize(sigmoid)
- # Initializing the deltas (accumulators)
- delta1 = np.zeros(np.shape(_theta1))
- delta2 = np.zeros(np.shape(_theta2))
- # initializing lists to cache the outputs at every iteration
- h_x = []
- y = []
- mean_train = 0
- for k in range(len(_data_set)):
- mean_train += np.mean(_data_set[k][0])
- mean_train /= len(_data_set)
- for i in xrange(len(_data_set)):
- if i in batches and _should_output_deltas:
- print('%d iterations complete...' % (i + 1))
- # Subtract the mean from each element in the matrix and divide by 255
- a1 = np.divide((_data_set[i][0] - mean_train), 255)
- # Insert the bias unit
- a1 = np.insert(a1, 0, 1).reshape(network[1] + 1, 1)
- # Compute the sigmoid
- z2 = sigmoid_vec(np.matmul(_theta1, a1))
- # Add the bias unit
- a2 = np.insert(z2, 0, 1).reshape(network[2] + 1, 1)
- # Compute the sigmoid of the output
- z3 = np.matmul(_theta2, a2)
- a3 = sigmoid_vec(z3)
- # Softmax -> Commented out due to confusion in usage
- # a3 = softmax(a3)
- # print('After softmax, a3 = %s' % str(a3))
- # Reshaping to a column vector
- label = np.reshape(_data_set[i][1], (10, 1))
- # Caching
- y.append(label)
- h_x.append(a3)
- # Calculate d3
- d3 = np.subtract(a3, label)
- # Calculate d2
- _d2 = np.multiply(np.matmul(np.transpose(_theta2), d3), np.multiply(a2, 1 - a2))
- # Remove d2[0]
- _d2 = np.delete(_d2, 0, axis=0)
- # Accumulate in delta matrices
- delta2 += (np.matmul(d3, np.transpose(a2)))
- delta1 += (np.matmul(_d2, np.transpose(a1)))
- if _should_output_deltas:
- delta1 *= 1/(len(_data_set))
- delta2 *= 1/(len(_data_set))
- return delta1, delta2
- else:
- return compute_cost(np.asarray(h_x), np.asarray(y), len(_data_set))
- def softmax(matrix):
- """
- A very redundant & innefecient method to calculate
- the softmax outputs of the column vector
- """
- shape = np.shape(matrix)
- t = matrix.flatten().tolist()
- e_t = [math.exp(_z) for _z in t]
- sum_e_t = sum(e_t)
- soft_e_t = [i/sum_e_t for i in e_t]
- return np.asarray(soft_e_t).reshape(shape)
- def compute_cost(h_x, y, n):
- cost = 0
- for i in range(n):
- cost += (-(y[i][0] * math.log(h_x[i][0])) - ((1 - y[i][0]) * math.log(1 - h_x[i][0])))
- cost *= 1/n
- return cost
- cost_iter = {}
- # Print debugging
- print('Performing gradient descent for %d iterations with %f learning rate...' % (_max_iter, _alpha))
- for i in range(max_iter):
- _theta1 -= np.multiply(alpha, _d1)
- _theta2 -= np.multiply(alpha, _d2)
- # Current cost
- cost = train_network(training_data, _theta1, _theta2, False)
- # Cache the cost.
- cost_iter[i] = cost
- # If reduction in cost is less than 10^-4, stop.
- if i >= 1 and (cost_iter[i-1] - cost_iter[i] <= 0.0001):
- print("It seems that the cost isn't converging quickly. Breaking loop at iteration %d..." % (i+1))
- del cost_iter[i]
- break
- print('Iteration:%dtCost:%f' % (i, cost))
- return _theta1, _theta2, cost_iter
- def test_network(optimal_theta1, optimal_theta2, _test_set):
- """
- Very similar to the 'train' method, but
- back-propagation is not done here.
- Instead, a counter will maintain how many times
- the neural network predicted the right answer, and return the
- accuracy.
- """
- print('Now in the testing phase...')
- if _test_set is None:
- raise ValueError('Test data or test labels are null')
- sigmoid_vec = np.vectorize(sigmoid)
- hit_counter = 0
- mean_test = 0
- for __k in range(len(_test_set)):
- mean_test += np.mean(_test_set[__k][0])
- mean_test /= len(_test_set)
- # Only iterating over 150 examples for now
- for k in range(150):
- a1 = np.divide((_test_set[k][0] - mean_test), 255)
- a1 = np.insert(a1, 0, 1).reshape(network[1] + 1, 1)
- z2 = sigmoid_vec(np.matmul(optimal_theta1, a1))
- a2 = np.insert(z2, 0, 1).reshape(network[2] + 1, 1)
- z3 = np.matmul(optimal_theta2, a2)
- a3 = sigmoid_vec(z3)
- # print(a3)
- # Take the softmax?!
- # a3 = softmax(a3)
- label = np.reshape(_test_set[k][1], (10, 1))
- print('(Computed) %s = (Actual) %s' % (str(np.argmax(a3)), str(np.argmax(label))))
- if np.argmax(a3) == np.argmax(label):
- hit_counter += 1
- # cache = np.reshape(cache, (28, 28))
- # plt.imshow(cache, interpolation='nearest')
- # plt.show()
- print('Hits: %d' % hit_counter)
- (Computed) 0 = (Actual) 7
- (Computed) 1 = (Actual) 7
- (Computed) 0 = (Actual) 8
- (Computed) 1 = (Actual) 1
- (Computed) 1 = (Actual) 3
- (Computed) 1 = (Actual) 4
- (Computed) 1 = (Actual) 9
- (Computed) 0 = (Actual) 3
- (Computed) 1 = (Actual) 1
- (Computed) 0 = (Actual) 7
- (Computed) 0 = (Actual) 3
- (Computed) 1 = (Actual) 7
- (Computed) 0 = (Actual) 3
- (Computed) 1 = (Actual) 3
- (Computed) 1 = (Actual) 9
- (Computed) 1 = (Actual) 3
- (Computed) 0 = (Actual) 5
- (Computed) 1 = (Actual) 1
- (Computed) 0 = (Actual) 9
- (Computed) 0 = (Actual) 4
- (Computed) 0 = (Actual) 9
- (Computed) 0 = (Actual) 1
- (Computed) 1 = (Actual) 1
- (Computed) 0 = (Actual) 7
- (Computed) 0 = (Actual) 3
- (Computed) 0 = (Actual) 2
- (Computed) 1 = (Actual) 8
- (Computed) 0 = (Actual) 5
- (Computed) 0 = (Actual) 3
- (Computed) 0 = (Actual) 1
- (Computed) 1 = (Actual) 2
- (Computed) 0 = (Actual) 7
- (Computed) 0 = (Actual) 5
- (Computed) 1 = (Actual) 7
- (Computed) 1 = (Actual) 7
- (Computed) 0 = (Actual) 5
- (Computed) 1 = (Actual) 1
- (Computed) 0 = (Actual) 5
- (Computed) 0 = (Actual) 9
- (Computed) 1 = (Actual) 1
- (Computed) 1 = (Actual) 6
Add Comment
Please, Sign In to add comment