Untitled

def compute_grads(X,Y,P,Ws,bs,lambd):
        #dl_dWs = [np.zeros( (Wi.shape) ) for Wi in W]
        #dl_dbs = [np.zeros( (bi.hape) ) for bi in b]

        W1 = Ws[0]
        b1 = bs[0]
        W2 = Ws[1]
        b2 = bs[1]
        # Forward pass
        H_batch = np.maximum( W1@X + b1 ,np.zeros( shape=(W1.shape[0],X.shape[1]) ))
        P_batch = softmax( W2 @ H_batch + b2)
        # Backwards pass
        G_batch = - (Y-P_batch)

        dl_dw2 = (1/X.shape[0]) * G_batch @ H_batch.T
        dl_db2 = (1/X.shape[0]) * G_batch @ np.ones( (X.shape[1],1))

        G_batch = W2.T @ G_batch
        G_batch = G_batch * np.where(H_batch > 0, 1, 0)

        dl_dw1 = (1/X.shape[0]) * G_batch @ X.T
        dl_db1 = (1/X.shape[0]) * G_batch @ np.ones( (X.shape[1],1))