Untitled

batch_size = 256
# train_dataset = train_dataset[0:data_size]
# train_labels = train_labels[0:data_size]

# num_examples = len(train_dataset) # training set size
nn_input_dim = 784 # input layer dimensionality
nn_output_dim = 10 # output layer dimensionality


# Helper function to evaluate the total loss on the dataset
def calculate_loss(model):
    W1, b1, W2, b2 = model['W1'], model['b1'], model['W2'], model['b2']

    batch_train_dataset, batch_train_labels = sample_training_data(batch_size)

    # Forward propagation to calculate our predictions
    z1 = batch_train_dataset.dot(W1) + b1
    a1 = z1 * (z1 > 0) # Implemenatation of ReLU
    # a1 = np.tanh(z1)
    z2 = a1.dot(W2) + b2
    exp_scores = np.exp(z2)
    probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

    # Calculating the loss
    corect_logprobs = -np.log(probs[range(batch_size), np.nonzero(batch_train_labels)[(0)][0].astype('int64')])
    data_loss = np.sum(corect_logprobs)

    # Add regulatization term to loss (optional)
    data_loss += reg_lambda/2 * (np.sum(np.square(W1)) + np.sum(np.square(W2)))
    return 1./batch_size * data_loss

def sample_training_data(batch_size):
    import random
    random_indexes = random.sample(range(len(train_dataset)), batch_size)

    batch_train_dataset = train_dataset[random_indexes]
    batch_train_labels = train_labels[random_indexes]

    return batch_train_dataset, batch_train_labels

def build_model(nn_hdim, batch_size, num_passes=10000, print_loss=False):
    """
    This function learns parameters for the neural network and returns the model.
    - nn_hdim: Number of nodes in the hidden layer
    - num_passes: Number of passes through the training data for gradient descent
    - print_loss: If True, print the loss every 1000 iterations
    """
    # Initialize the parameters to random values. We need to learn these.
    np.random.seed(0)
    W1 = np.random.randn(nn_input_dim, nn_hdim) / np.sqrt(nn_input_dim)
    b1 = np.zeros((1, nn_hdim))
    W2 = np.random.randn(nn_hdim, nn_output_dim) / np.sqrt(nn_hdim)
    b2 = np.zeros((1, nn_output_dim))

    # This is what we return at the end
    model = {}

    # Gradient descent. For each batch...
    for i in range(0, num_passes):
        batch_train_dataset, batch_train_labels = sample_training_data(batch_size)
        #
        # Forward propagation
        #
        z1 = some_train_dataset.dot(W1) + b1
        a1 = z1 * (z1 > 0) # Implemenatation of ReLU
        # a1 = np.tanh(z1)
        z2 = a1.dot(W2) + b2
        exp_scores = np.exp(z2)
        probs = exp_scores / np.sum(exp_scores, axis=1, keepdims=True)

        #
        # Backpropagation
        #
        delta3 = (probs - some_train_labels)

        db2 = np.sum(delta3, axis=0, keepdims=True)  # tr set 여러개인 것을 고려해야함
        dW2 = (a1.T).dot(delta3)
        dW2 += reg_lambda * W2

        W2 += -epsilon * dW2
        b2 += -epsilon * db2


        delta2 = delta3.dot(W2.T) * (z1>0)

        db1 = np.sum(delta2, axis=0, keepdims=True)
        dW1 = (some_train_dataset.T).dot(delta2)
        dW1 += reg_lambda * W1

        W1 += -epsilon * dW1
        b1 += -epsilon * db1

        # Assign new parameters to the model
        model = { 'W1': W1, 'b1': b1, 'W2': W2, 'b2': b2}

        # Optionally print the loss.
        # This is expensive because it uses the whole dataset, so we don't want to do it too often.
        # if print_loss and i % 1000 == 0:
        #     print("Loss after iteration %i: %f" %(i, calculate_loss(model)))
        # if print_loss and i % 1000 == 0:
        print("Loss after iteration %i: %f" %(i, calculate_loss(model)))

    return model


# In[23]:


model = build_model(10, batch_size, num_passes=20000, print_loss=True)