Untitled

network = {
1: 28 * 28,
2: 45,
3: 10
}

def sigmoid(value):
    return 1.0/(1.0 + math.exp(-value))


def train_network(_data_set, _theta1, _theta2, _should_output_deltas):

    # Vectorizing the sigmoid method
    sigmoid_vec = np.vectorize(sigmoid)

    # Initializing the deltas (accumulators)
    delta1 = np.zeros(np.shape(_theta1))
    delta2 = np.zeros(np.shape(_theta2))

    # initializing lists to cache the outputs at every iteration
    h_x = []
    y = []

    mean_train = 0
    for k in range(len(_data_set)):
        mean_train += np.mean(_data_set[k][0])
    mean_train /= len(_data_set)

    for i in xrange(len(_data_set)):

        if i in batches and _should_output_deltas:
            print('%d iterations complete...' % (i + 1))

        # Subtract the mean from each element in the matrix and divide by 255
        a1 = np.divide((_data_set[i][0] - mean_train), 255)

        # Insert the bias unit
        a1 = np.insert(a1, 0, 1).reshape(network[1] + 1, 1)

        # Compute the sigmoid
        z2 = sigmoid_vec(np.matmul(_theta1, a1))

        # Add the bias unit
        a2 = np.insert(z2, 0, 1).reshape(network[2] + 1, 1)

        # Compute the sigmoid of the output
        z3 = np.matmul(_theta2, a2)
        a3 = sigmoid_vec(z3)

        # Softmax -> Commented out due to confusion in usage
        # a3 = softmax(a3)
        # print('After softmax, a3 = %s' % str(a3))

        # Reshaping to a column vector
        label = np.reshape(_data_set[i][1], (10, 1))

        # Caching
        y.append(label)
        h_x.append(a3)

        # Calculate d3
        d3 = np.subtract(a3, label)

        # Calculate d2
        _d2 = np.multiply(np.matmul(np.transpose(_theta2), d3), np.multiply(a2, 1 - a2))

        # Remove d2[0]
        _d2 = np.delete(_d2, 0, axis=0)

        # Accumulate in delta matrices
        delta2 += (np.matmul(d3, np.transpose(a2)))
        delta1 += (np.matmul(_d2, np.transpose(a1)))

    if _should_output_deltas:
        delta1 *= 1/(len(_data_set))
        delta2 *= 1/(len(_data_set))

        return delta1, delta2
    else:
        return compute_cost(np.asarray(h_x), np.asarray(y), len(_data_set))

def softmax(matrix):
    """
    A very redundant & innefecient method to calculate
    the softmax outputs of the column vector
    """
    shape = np.shape(matrix)
    t = matrix.flatten().tolist()
    e_t = [math.exp(_z) for _z in t]
    sum_e_t = sum(e_t)
    soft_e_t = [i/sum_e_t for i in e_t]
    return np.asarray(soft_e_t).reshape(shape)

def compute_cost(h_x, y, n):
    cost = 0
    for i in range(n):
        cost += (-(y[i][0] * math.log(h_x[i][0])) - ((1 - y[i][0]) * math.log(1 - h_x[i][0])))
    cost *= 1/n
    return cost

cost_iter = {}

# Print debugging
print('Performing gradient descent for %d iterations with %f learning rate...' % (_max_iter, _alpha))

for i in range(max_iter):
    _theta1 -= np.multiply(alpha, _d1)
    _theta2 -= np.multiply(alpha, _d2)
    # Current cost
    cost = train_network(training_data, _theta1, _theta2, False)
    # Cache the cost.
    cost_iter[i] = cost

    # If reduction in cost is less than 10^-4, stop.
    if i >= 1 and (cost_iter[i-1] - cost_iter[i] <= 0.0001):
        print("It seems that the cost isn't converging quickly. Breaking loop at iteration %d..." % (i+1))
        del cost_iter[i]
        break

    print('Iteration:%dtCost:%f' % (i, cost))

return _theta1, _theta2, cost_iter

def test_network(optimal_theta1, optimal_theta2, _test_set):
    """
    Very similar to the 'train' method, but
    back-propagation is not done here.

    Instead, a counter will maintain how many times
    the neural network predicted the right answer, and return the
    accuracy.
    """

    print('Now in the testing phase...')

    if _test_set is None:
        raise ValueError('Test data or test labels are null')

    sigmoid_vec = np.vectorize(sigmoid)

    hit_counter = 0

    mean_test = 0
    for __k in range(len(_test_set)):
        mean_test += np.mean(_test_set[__k][0])

    mean_test /= len(_test_set)

    # Only iterating over 150 examples for now
    for k in range(150):
        a1 = np.divide((_test_set[k][0] - mean_test), 255)
        a1 = np.insert(a1, 0, 1).reshape(network[1] + 1, 1)
        z2 = sigmoid_vec(np.matmul(optimal_theta1, a1))

        a2 = np.insert(z2, 0, 1).reshape(network[2] + 1, 1)
        z3 = np.matmul(optimal_theta2, a2)

        a3 = sigmoid_vec(z3)
        # print(a3)

        # Take the softmax?!
        # a3 = softmax(a3)

        label = np.reshape(_test_set[k][1], (10, 1))

        print('(Computed) %s = (Actual) %s' % (str(np.argmax(a3)), str(np.argmax(label))))
        if np.argmax(a3) == np.argmax(label):
            hit_counter += 1

    # cache = np.reshape(cache, (28, 28))
    # plt.imshow(cache, interpolation='nearest')
    # plt.show()

    print('Hits: %d' % hit_counter)

(Computed) 0 = (Actual) 7
(Computed) 1 = (Actual) 7
(Computed) 0 = (Actual) 8
(Computed) 1 = (Actual) 1
(Computed) 1 = (Actual) 3
(Computed) 1 = (Actual) 4
(Computed) 1 = (Actual) 9
(Computed) 0 = (Actual) 3
(Computed) 1 = (Actual) 1
(Computed) 0 = (Actual) 7
(Computed) 0 = (Actual) 3
(Computed) 1 = (Actual) 7
(Computed) 0 = (Actual) 3
(Computed) 1 = (Actual) 3
(Computed) 1 = (Actual) 9
(Computed) 1 = (Actual) 3
(Computed) 0 = (Actual) 5
(Computed) 1 = (Actual) 1
(Computed) 0 = (Actual) 9
(Computed) 0 = (Actual) 4
(Computed) 0 = (Actual) 9
(Computed) 0 = (Actual) 1
(Computed) 1 = (Actual) 1
(Computed) 0 = (Actual) 7
(Computed) 0 = (Actual) 3
(Computed) 0 = (Actual) 2
(Computed) 1 = (Actual) 8
(Computed) 0 = (Actual) 5
(Computed) 0 = (Actual) 3
(Computed) 0 = (Actual) 1
(Computed) 1 = (Actual) 2
(Computed) 0 = (Actual) 7
(Computed) 0 = (Actual) 5
(Computed) 1 = (Actual) 7
(Computed) 1 = (Actual) 7
(Computed) 0 = (Actual) 5
(Computed) 1 = (Actual) 1
(Computed) 0 = (Actual) 5
(Computed) 0 = (Actual) 9
(Computed) 1 = (Actual) 1
(Computed) 1 = (Actual) 6