Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import matplotlib.pyplot as plt
- from utils import *
- import copy
- import math
- %matplotlib inline
- # load dataset
- X_train, y_train = load_data("data/ex2data1.txt")
- print("First five elements in X_train are:\n", X_train[:5])
- print("Type of X_train:",type(X_train))
- print("First five elements in y_train are:\n", y_train[:5])
- print("Type of y_train:",type(y_train))
- print ('The shape of X_train is: ' + str(X_train.shape))
- print ('The shape of y_train is: ' + str(y_train.shape))
- print ('We have m = %d training examples' % (len(y_train)))
- # Plot examples
- plot_data(X_train, y_train[:], pos_label="Admitted", neg_label="Not admitted")
- # Set the y-axis label
- plt.ylabel('Exam 2 score')
- # Set the x-axis label
- plt.xlabel('Exam 1 score')
- plt.legend(loc="upper right")
- plt.show()
- # UNQ_C1
- # GRADED FUNCTION: sigmoid
- def sigmoid(z):
- """
- Compute the sigmoid of z
- Args:
- z (ndarray): A scalar, numpy array of any size.
- Returns:
- g (ndarray): sigmoid(z), with the same shape as z
- """
- ### START CODE HERE ###
- return 1/(1+(math.e)**(-z))
- ### END SOLUTION ###
- return g
- print ("sigmoid(0) = " + str(sigmoid(0)))
- print ("sigmoid([ -1, 0, 1, 2]) = " + str(sigmoid(np.array([-1, 0, 1, 2]))))
- # UNIT TESTS
- from public_tests import *
- sigmoid_test(sigmoid)
- # UNQ_C2
- # GRADED FUNCTION: compute_cost
- def compute_cost(X, y, w, b, lambda_= 1):
- """
- Computes the cost over all examples
- Args:
- X : (ndarray Shape (m,n)) data, m examples by n features
- y : (array_like Shape (m,)) target value
- w : (array_like Shape (n,)) Values of parameters of the model
- b : scalar Values of bias parameter of the model
- lambda_: unused placeholder
- Returns:
- total_cost: (scalar) cost
- """
- m, n = X.shape
- ### START CODE HERE ###
- total_cost=0
- for i in range (m):
- z_wb=0
- for j in range (n):
- z_wb+=w[j]*X[i][j]
- z_wb+=b
- f_wb=sigmoid(z_wb)
- loss = (-y[i]*np.log(f_wb))-(1-y[i])*np.log(1-f_wb)
- total_cost+=loss
- total_cost/=m
- ### END CODE HERE ###
- return total_cost
- m, n = X_train.shape
- # Compute and display cost with w initialized to zeroes
- initial_w = np.zeros(n)
- initial_b = 0.
- cost = compute_cost(X_train, y_train, initial_w, initial_b)
- print('Cost at initial w (zeros): {:.3f}'.format(cost))
- # Compute and display cost with non-zero w
- test_w = np.array([0.2, 0.2])
- test_b = -24.
- cost = compute_cost(X_train, y_train, test_w, test_b)
- print('Cost at test w,b: {:.3f}'.format(cost))
- # UNIT TESTS
- compute_cost_test(compute_cost)
- # UNQ_C3
- # GRADED FUNCTION: compute_gradient
- def compute_gradient(X, y, w, b, lambda_=None):
- """
- Computes the gradient for logistic regression
- Args:
- X : (ndarray Shape (m,n)) variable such as house size
- y : (array_like Shape (m,1)) actual value
- w : (array_like Shape (n,1)) values of parameters of the model
- b : (scalar) value of parameter of the model
- lambda_: unused placeholder.
- Returns
- dj_dw: (array_like Shape (n,1)) The gradient of the cost w.r.t. the parameters w.
- dj_db: (scalar) The gradient of the cost w.r.t. the parameter b.
- """
- m, n = X.shape
- dj_dw = np.zeros(w.shape)
- dj_db = 0.
- ### START CODE HERE ###
- for i in range(m):
- z_wb = 0
- for j in range(n):
- z_wb += w[j]*X[i][j]
- z_wb += b
- f_wb = sigmoid(z_wb)
- dj_db += f_wb-y[i]
- for j in range(n):
- dj_dw[j] += (f_wb-y[i])*X[i][j]
- dj_dw = dj_dw/m
- dj_db = dj_db/m
- ### END CODE HERE ###
- return dj_db, dj_dw
- # Compute and display gradient with w initialized to zeroes
- initial_w = np.zeros(n)
- initial_b = 0.
- dj_db, dj_dw = compute_gradient(X_train, y_train, initial_w, initial_b)
- print(f'dj_db at initial w (zeros):{dj_db}' )
- print(f'dj_dw at initial w (zeros):{dj_dw.tolist()}' )
- # Compute and display cost and gradient with non-zero w
- test_w = np.array([ 0.2, -0.5])
- test_b = -24
- dj_db, dj_dw = compute_gradient(X_train, y_train, test_w, test_b)
- print('dj_db at test_w:', dj_db)
- print('dj_dw at test_w:', dj_dw.tolist())
- # UNIT TESTS
- compute_gradient_test(compute_gradient)
- def gradient_descent(X, y, w_in, b_in, cost_function, gradient_function, alpha, num_iters, lambda_):
- """
- Performs batch gradient descent to learn theta. Updates theta by taking
- num_iters gradient steps with learning rate alpha
- Args:
- X : (array_like Shape (m, n)
- y : (array_like Shape (m,))
- w_in : (array_like Shape (n,)) Initial values of parameters of the model
- b_in : (scalar) Initial value of parameter of the model
- cost_function: function to compute cost
- alpha : (float) Learning rate
- num_iters : (int) number of iterations to run gradient descent
- lambda_ (scalar, float) regularization constant
- Returns:
- w : (array_like Shape (n,)) Updated values of parameters of the model after
- running gradient descent
- b : (scalar) Updated value of parameter of the model after
- running gradient descent
- """
- # number of training examples
- m = len(X)
- # An array to store cost J and w's at each iteration primarily for graphing later
- J_history = []
- w_history = []
- for i in range(num_iters):
- # Calculate the gradient and update the parameters
- dj_db, dj_dw = gradient_function(X, y, w_in, b_in, lambda_)
- # Update Parameters using w, b, alpha and gradient
- w_in = w_in - alpha * dj_dw
- b_in = b_in - alpha * dj_db
- # Save cost J at each iteration
- if i<100000: # prevent resource exhaustion
- cost = cost_function(X, y, w_in, b_in, lambda_)
- J_history.append(cost)
- # Print cost every at intervals 10 times or as many iterations if < 10
- if i% math.ceil(num_iters/10) == 0 or i == (num_iters-1):
- w_history.append(w_in)
- print(f"Iteration {i:4}: Cost {float(J_history[-1]):8.2f} ")
- return w_in, b_in, J_history, w_history #return w and J,w history for graphing
- np.random.seed(1)
- intial_w = 0.01 * (np.random.rand(2).reshape(-1,1) - 0.5)
- initial_b = -8
- # Some gradient descent settings
- iterations = 10000
- alpha = 0.001
- w,b, J_history,_ = gradient_descent(X_train ,y_train, initial_w, initial_b,
- compute_cost, compute_gradient, alpha, iterations, 0)
- plot_decision_boundary(w, b, X_train, y_train)
- # UNQ_C4
- # GRADED FUNCTION: predict
- def predict(X, w, b):
- """
- Predict whether the label is 0 or 1 using learned logistic
- regression parameters w
- Args:
- X : (ndarray Shape (m, n))
- w : (array_like Shape (n,)) Parameters of the model
- b : (scalar, float) Parameter of the model
- Returns:
- p: (ndarray (m,1))
- The predictions for X using a threshold at 0.5
- """
- # number of training examples
- m, n = X.shape
- p = np.zeros(m)
- ### START CODE HERE ###
- # Loop over each example
- for i in range(m):
- z_wb = 0
- # Loop over each feature
- for j in range(n):
- # Add the corresponding term to z_wb
- z_wb += w[j]*X[i][j]
- # Add bias term
- z_wb += b
- # Calculate the prediction for this example
- f_wb = sigmoid(z_wb)
- # Apply the threshold
- p[i] = f_wb>=0.5
- ### END CODE HERE ###
- return p
- # Test your predict code
- np.random.seed(1)
- tmp_w = np.random.randn(2)
- tmp_b = 0.3
- tmp_X = np.random.randn(4, 2) - 0.5
- tmp_p = predict(tmp_X, tmp_w, tmp_b)
- print(f'Output of predict: shape {tmp_p.shape}, value {tmp_p}')
- # UNIT TESTS
- predict_test(predict)
- #Compute accuracy on our training set
- p = predict(X_train, w,b)
- print('Train Accuracy: %f'%(np.mean(p == y_train) * 100))
- #Compute accuracy on our training set
- p = predict(X_train, w,b)
- print('Train Accuracy: %f'%(np.mean(p == y_train) * 100))
- # print X_train
- print("X_train:", X_train[:5])
- print("Type of X_train:",type(X_train))
- # print y_train
- print("y_train:", y_train[:5])
- print("Type of y_train:",type(y_train))
- print ('The shape of X_train is: ' + str(X_train.shape))
- print ('The shape of y_train is: ' + str(y_train.shape))
- print ('We have m = %d training examples' % (len(y_train)))
- # Plot examples
- plot_data(X_train, y_train[:], pos_label="Accepted", neg_label="Rejected")
- # Set the y-axis label
- plt.ylabel('Microchip Test 2')
- # Set the x-axis label
- plt.xlabel('Microchip Test 1')
- plt.legend(loc="upper right")
- plt.show()
- print("Original shape of data:", X_train.shape)
- mapped_X = map_feature(X_train[:, 0], X_train[:, 1])
- print("Shape after feature mapping:", mapped_X.shape)
- print("X_train[0]:", X_train[0])
- print("mapped X_train[0]:", mapped_X[0])
- # UNQ_C5
- def compute_cost_reg(X, y, w, b, lambda_ = 1):
- """
- Computes the cost over all examples
- Args:
- X : (array_like Shape (m,n)) data, m examples by n features
- y : (array_like Shape (m,)) target value
- w : (array_like Shape (n,)) Values of parameters of the model
- b : (array_like Shape (n,)) Values of bias parameter of the model
- lambda_ : (scalar, float) Controls amount of regularization
- Returns:
- total_cost: (scalar) cost
- """
- m, n = X.shape
- # Calls the compute_cost function that you implemented above
- cost_without_reg = compute_cost(X, y, w, b)
- # You need to calculate this value
- reg_cost = 0.
- ### START CODE HERE ###
- for j in range (n):
- reg_cost+=w[j]**2
- #reg_cost/=(2*m)
- ### END CODE HERE ###
- # Add the regularization cost to get the total cost
- total_cost = cost_without_reg + (lambda_/(2 * m)) * reg_cost
- return total_cost
- X_mapped = map_feature(X_train[:, 0], X_train[:, 1])
- np.random.seed(1)
- initial_w = np.random.rand(X_mapped.shape[1]) - 0.5
- initial_b = 0.5
- lambda_ = 0.5
- cost = compute_cost_reg(X_mapped, y_train, initial_w, initial_b, lambda_)
- print("Regularized cost :", cost)
- # UNIT TEST
- compute_cost_reg_test(compute_cost_reg)
- # UNQ_C6
- def compute_gradient_reg(X, y, w, b, lambda_ = 1):
- """
- Computes the gradient for linear regression
- Args:
- X : (ndarray Shape (m,n)) variable such as house size
- y : (ndarray Shape (m,)) actual value
- w : (ndarray Shape (n,)) values of parameters of the model
- b : (scalar) value of parameter of the model
- lambda_ : (scalar,float) regularization constant
- Returns
- dj_db: (scalar) The gradient of the cost w.r.t. the parameter b.
- dj_dw: (ndarray Shape (n,)) The gradient of the cost w.r.t. the parameters w.
- """
- m, n = X.shape
- dj_db, dj_dw = compute_gradient(X, y, w, b)
- ### START CODE HERE ###
- for j in range (n):
- dj_dw[j]+=(lambda_/m)*w[j]
- ### END CODE HERE ###
- return dj_db, dj_dw
- X_mapped = map_feature(X_train[:, 0], X_train[:, 1])
- np.random.seed(1)
- initial_w = np.random.rand(X_mapped.shape[1]) - 0.5
- initial_b = 0.5
- lambda_ = 0.5
- dj_db, dj_dw = compute_gradient_reg(X_mapped, y_train, initial_w, initial_b, lambda_)
- print(f"dj_db: {dj_db}", )
- print(f"First few elements of regularized dj_dw:\n {dj_dw[:4].tolist()}", )
- # UNIT TESTS
- compute_gradient_reg_test(compute_gradient_reg)
- # Initialize fitting parameters
- np.random.seed(1)
- initial_w = np.random.rand(X_mapped.shape[1])-0.5
- initial_b = 1.
- # Set regularization parameter lambda_ to 1 (you can try varying this)
- lambda_ = 0.01;
- # Some gradient descent settings
- iterations = 10000
- alpha = 0.01
- w,b, J_history,_ = gradient_descent(X_mapped, y_train, initial_w, initial_b,
- compute_cost_reg, compute_gradient_reg,
- alpha, iterations, lambda_)
- plot_decision_boundary(w, b, X_mapped, y_train)
- #Compute accuracy on the training set
- p = predict(X_mapped, w, b)
- print('Train Accuracy: %f'%(np.mean(p == y_train) * 100))
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement