Untitled

import numpy as np
from sklearn.preprocessing import PolynomialFeatures

import torch
from torch.autograd import Variable

def index_batch(X,batch_indices,dtype):
    '''
    returns the batch indexed/sliced batch
    '''
    if len(X.shape) == 1: # i.e. dimension (M,) just a vector
        batch_xs = torch.FloatTensor(X[batch_indices]).type(dtype)
    else:
        batch_xs = torch.FloatTensor(X[batch_indices,:]).type(dtype)
    return batch_xs

def get_batch2(X,Y,M,dtype):
    '''
    get batch for pytorch model
    '''
    # TODO fix and make it nicer, there is pytorch forum question
    X,Y = X.data.numpy(), Y.data.numpy()
    N = len(Y)
    valid_indices = np.array( range(N) )
    batch_indices = np.random.choice(valid_indices,size=M,replace=False)
    batch_xs = index_batch(X,batch_indices,dtype)
    batch_ys = index_batch(Y,batch_indices,dtype)
    return Variable(batch_xs, requires_grad=False), Variable(batch_ys, requires_grad=False)

def get_sequential_lifted_mdl(nb_monomials,D_out, bias=False):
    return torch.nn.Sequential(torch.nn.Linear(nb_monomials,D_out,bias=bias))

def train_SGD(mdl, M,eta,nb_iter,logging_freq ,dtype, X_train,Y_train, X_test,Y_test,c_pinv):
    ##
    N_train,_ = tuple( X_train.size() )
    #print(N_train)
    for i in range(nb_iter):
        for W in mdl.parameters():
            W_before_update = np.copy( W.data.numpy() )
        # Forward pass: compute predicted Y using operations on Variables
        batch_xs, batch_ys = get_batch2(X_train,Y_train,M,dtype) # [M, D], [M, 1]
        ## FORWARD PASS
        y_pred = mdl.forward(batch_xs)
        ## LOSS + Regularization
        batch_loss = (1/M)*(y_pred - batch_ys).pow(2).sum()
        ## BACKARD PASS
        batch_loss.backward() # Use autograd to compute the backward pass. Now w will have gradients
        ## SGD update
        for W in mdl.parameters():
            delta = eta*W.grad.data
            #W.data.copy_(W.data - delta)
            W.data -= delta
        ## train stats
        if i % (nb_iter/50) == 0 or i == 0:
        #if True:
        #if i % logging_freq == 0 or i == 0:
            current_train_loss = (1/N_train)*(mdl.forward(X_train) - Y_train).pow(2).sum().data.numpy()
            print('n-------------')
            print(f'i = {i}, current_train_loss = {current_train_loss}')
            print(f'N_train = {N_train}')
            print(f'W_before_update={W_before_update}')
            print(f'W.data = {W.data.numpy()}')
            print(f'W.grad.data = {W.grad.data.numpy()}')
            diff = W_before_update - W.data.numpy()
            print(f' w_^(t) - w^(t-1) = {diff/eta}')
            diff_norm = np.linalg.norm(diff, 2)
            print(f'|| w_^(t) - w^(t-1) ||^2 = {diff_norm}')
            print(f'c_pinv = {c_pinv.T}')
            train_error_c_pinv = (1/N_train)*(np.linalg.norm(Y_train.data.numpy() - np.dot(X_train.data.numpy(),c_pinv) )**2)
            print(f'train_error_c_pinv = {train_error_c_pinv}')
        ## Manually zero the gradients after updating weights
        mdl.zero_grad()
##
logging_freq = 100
dtype = torch.FloatTensor
## SGD params
M = 5
eta = 0.03
nb_iter = 100*1000
##
lb,ub=0,1
freq_sin = 4
f_target = lambda x: np.sin(2*np.pi*freq_sin*x).reshape(x.shape[0],1)
N_train = 10
X_train = np.linspace(lb,ub,N_train).reshape(N_train,1)
Y_train = f_target(X_train)
N_test = 200
X_test = np.linspace(lb,ub,N_test).reshape(N_test,1)
Y_test = f_target(X_test)
## degree of mdl
Degree_mdl = 9
## pseudo-inverse solution
c_pinv = np.polyfit( X_train.reshape( (N_train,) ), Y_train , Degree_mdl )[::-1]
## linear mdl to train with SGD
nb_terms = c_pinv.shape[0]
mdl_sgd = get_sequential_lifted_mdl(nb_monomials=nb_terms,D_out=1, bias=False)
#mdl_sgd[0].weight.data.normal_(mean=0,std=0.0)
#mdl_sgd[0].weight.data.fill_(0)
print(f'mdl_sgd[0].weight.data={mdl_sgd[0].weight.data}')
## Make polynomial Kernel
poly_feat = PolynomialFeatures(degree=Degree_mdl)
Kern_train, Kern_test = poly_feat.fit_transform(X_train.reshape(N_train,1)), poly_feat.fit_transform(X_test.reshape(N_test,1))
Kern_train_pt, Y_train_pt = Variable(torch.FloatTensor(Kern_train).type(dtype), requires_grad=False), Variable(torch.FloatTensor(Y_train).type(dtype), requires_grad=False)
Kern_test_pt, Y_test_pt = Variable(torch.FloatTensor(Kern_test).type(dtype), requires_grad=False ), Variable(torch.FloatTensor(Y_test).type(dtype), requires_grad=False)
train_SGD(mdl_sgd, M,eta,nb_iter,logging_freq ,dtype, Kern_train_pt,Y_train_pt, Kern_test_pt,Y_test_pt,c_pinv)
##
legend_mdl = f'SGD solution standard parametrization, number of monomials={nb_terms}, batch-size={M}, iterations={nb_iter}, step size={eta}'
#### PLOTS
X_plot = poly_feat.fit_transform(x_horizontal)
X_plot_pytorch = Variable( torch.FloatTensor(X_plot), requires_grad=False)
##
fig1 = plt.figure()
##
p_sgd_tf, = plt.plot(x_horizontal, Y_tf )
p_sgd_pt, = plt.plot(x_horizontal, [ float(f_val) for f_val in mdl_sgd.forward(X_plot_pytorch).data.numpy() ])
p_pinv, = plt.plot(x_horizontal, np.dot(X_plot,c_pinv))
p_data, = plt.plot(X_train,Y_train,'ro')
## legend
nb_terms = c_pinv.shape[0]
legend_mdl = f'SGD solution standard parametrization, number of monomials={nb_terms}, batch-size={M}, iterations={nb_iter}, step size={eta}'
plt.legend(
        [p_sgd_tf,p_sgd_pt,p_pinv,p_data],
        ['TF '+legend_mdl,'Pytorch '+legend_mdl,f'linear algebra soln, number of monomials={nb_terms}',f'data points = {N_train}']
    )
##
plt.xlabel('x'), plt.ylabel('f(x)')
plt.show()

c_pinv = [[ -7.36275143e-11   9.94955061e+02  -2.27235773e+04   2.02776690e+05
   -9.45987901e+05   2.56477290e+06  -4.18670905e+06   4.05381875e+06
   -2.14321212e+06   4.76269361e+05]]

mdl_sgd[0].weight.data=
 0.2769  0.2238 -0.1786 -0.2836  0.0282 -0.2650  0.1517  0.0609 -0.1799  0.2518
[torch.FloatTensor of size 1x10]


-------------
i = 0, current_train_loss = [ 0.51122922]
N_train = 10
W_before_update=[[ 0.276916    0.22384584 -0.17859279 -0.28359878  0.02818507 -0.26502955
   0.15169969  0.06087267 -0.17991513  0.25179213]]
W.data = [[ 0.27278039  0.2223435  -0.17868967 -0.28320512  0.02860935 -0.26476261
   0.15175563  0.06072243 -0.18024531  0.251313  ]]
W.grad.data = [[ 0.13785343  0.05007789  0.00322947 -0.01312203 -0.01414278 -0.00889825
  -0.00186479  0.00500792  0.01100619  0.01597152]]
 w_^(t) - w^(t-1) = [[ 0.13785362  0.05007784  0.00322958 -0.01312196 -0.01414275 -0.00889798
  -0.00186463  0.00500791  0.011006    0.01597106]]
|| w_^(t) - w^(t-1) ||^2 = 0.004487781319767237
c_pinv = [[ -7.36275143e-11   9.94955061e+02  -2.27235773e+04   2.02776690e+05
   -9.45987901e+05   2.56477290e+06  -4.18670905e+06   4.05381875e+06
   -2.14321212e+06   4.76269361e+05]]
train_error_c_pinv = 0.00041026620352414134

-------------
i = 2000, current_train_loss = [ 0.45121056]
N_train = 10
W_before_update=[[ 0.05377455  0.14968246 -0.0918882  -0.18873887  0.0875883  -0.24442779
   0.14100061  0.02913089 -0.22231367  0.20818822]]
W.data = [[ 0.02684817  0.13449876 -0.10165974 -0.19549945  0.08267717 -0.24813652
   0.13810736  0.02681178 -0.22421438  0.20660225]]
W.grad.data = [[ 0.89754611  0.50612354  0.32571793  0.2253527   0.16370434  0.12362462
   0.0964416   0.07730356  0.06335653  0.05286586]]
 w_^(t) - w^(t-1) = [[ 0.89754611  0.50612342  0.32571805  0.22535275  0.16370441  0.1236245
   0.09644181  0.07730357  0.06335676  0.05286584]]
|| w_^(t) - w^(t-1) ||^2 = 0.03397814929485321
c_pinv = [[ -7.36275143e-11   9.94955061e+02  -2.27235773e+04   2.02776690e+05
   -9.45987901e+05   2.56477290e+06  -4.18670905e+06   4.05381875e+06
   -2.14321212e+06   4.76269361e+05]]
train_error_c_pinv = 0.00041026620352414134

import numpy as np
from sklearn.preprocessing import PolynomialFeatures
from numpy.polynomial.hermite import hermvander

import torch
from torch.autograd import Variable

from maps import NamedDict

from plotting_utils import *

def index_batch(X,batch_indices,dtype):
    '''
    returns the batch indexed/sliced batch
    '''
    if len(X.shape) == 1: # i.e. dimension (M,) just a vector
        batch_xs = torch.FloatTensor(X[batch_indices]).type(dtype)
    else:
        batch_xs = torch.FloatTensor(X[batch_indices,:]).type(dtype)
    return batch_xs

def get_batch2(X,Y,M,dtype):
    '''
    get batch for pytorch model
    '''
    # TODO fix and make it nicer, there is pytorch forum question
    X,Y = X.data.numpy(), Y.data.numpy()
    N = len(Y)
    valid_indices = np.array( range(N) )
    batch_indices = np.random.choice(valid_indices,size=M,replace=False)
    batch_xs = index_batch(X,batch_indices,dtype)
    batch_ys = index_batch(Y,batch_indices,dtype)
    return Variable(batch_xs, requires_grad=False), Variable(batch_ys, requires_grad=False)

def get_sequential_lifted_mdl(nb_monomials,D_out, bias=False):
    return torch.nn.Sequential(torch.nn.Linear(nb_monomials,D_out,bias=bias))

def train_SGD(mdl, M,eta,nb_iter,logging_freq ,dtype, X_train,Y_train):
    ##
    N_train,_ = tuple( X_train.size() )
    #print(N_train)
    for i in range(nb_iter):
        # Forward pass: compute predicted Y using operations on Variables
        batch_xs, batch_ys = get_batch2(X_train,Y_train,M,dtype) # [M, D], [M, 1]
        ## FORWARD PASS
        y_pred = mdl.forward(batch_xs)
        ## LOSS + Regularization
        batch_loss = (1/M)*(y_pred - batch_ys).pow(2).sum()
        ## BACKARD PASS
        batch_loss.backward() # Use autograd to compute the backward pass. Now w will have gradients
        ## SGD update
        for W in mdl.parameters():
            delta = eta*W.grad.data
            W.data.copy_(W.data - delta)
        ## train stats
        if i % (nb_iter/10) == 0 or i == 0:
            current_train_loss = (1/N_train)*(mdl.forward(X_train) - Y_train).pow(2).sum().data.numpy()
            print('n-------------')
            print(f'i = {i}, current_train_loss = {current_train_loss}n')
            print(f'eta*W.grad.data = {eta*W.grad.data}')
            print(f'W.grad.data = {W.grad.data}')
        ## Manually zero the gradients after updating weights
        mdl.zero_grad()
##
logging_freq = 100
dtype = torch.FloatTensor
## SGD params
M = 3
eta = 0.002
nb_iter = 20*1000
##
lb,ub = 0,1
f_target = lambda x: np.sin(2*np.pi*x)
N_train = 5
X_train = np.linspace(lb,ub,N_train)
Y_train = f_target(X_train)
## degree of mdl
Degree_mdl = 4
## pseudo-inverse solution
c_pinv = np.polyfit( X_train, Y_train , Degree_mdl )[::-1]
## linear mdl to train with SGD
nb_terms = c_pinv.shape[0]
mdl_sgd = get_sequential_lifted_mdl(nb_monomials=nb_terms,D_out=1, bias=False)
mdl_sgd[0].weight.data.normal_(mean=0,std=0.001)
## Make polynomial Kernel
#poly_feat = PolynomialFeatures(degree=Degree_mdl)
#Kern_train = poly_feat.fit_transform(X_train.reshape(N_train,1))
Kern_train = hermvander(X_train,Degree_mdl)
Kern_train = Kern_train.reshape(N_train,Kern_train.shape[1])
Kern_train_pt, Y_train_pt = Variable(torch.FloatTensor(Kern_train).type(dtype), requires_grad=False), Variable(torch.FloatTensor(Y_train).type(dtype), requires_grad=False)
train_SGD(mdl_sgd, M,eta,nb_iter,logging_freq ,dtype, Kern_train_pt,Y_train_pt)

#### PLOTTING
x_horizontal = np.linspace(lb,ub,1000).reshape(1000,1)
#X_plot = poly_feat.fit_transform(x_horizontal)
X_plot = hermvander(x_horizontal,Degree_mdl)
X_plot = X_plot.reshape(1000,X_plot.shape[2])
X_plot_pytorch = Variable( torch.FloatTensor(X_plot), requires_grad=False)
##
fig1 = plt.figure()
#plots objs
p_sgd, = plt.plot(x_horizontal, [ float(f_val) for f_val in mdl_sgd.forward(X_plot_pytorch).data.numpy() ])
p_pinv, = plt.plot(x_horizontal, np.dot(X_plot,c_pinv))
p_data, = plt.plot(X_train,Y_train,'ro')
## legend
nb_terms = c_pinv.shape[0]
legend_mdl = f'SGD solution standard parametrization, number of monomials={nb_terms}, batch-size={M}, iterations={nb_iter}, step size={eta}'
plt.legend(
        [p_sgd,p_pinv,p_data],
        [legend_mdl,f'linear algebra soln, number of monomials={nb_terms}',f'data points = {N_train}']
    )
##
plt.xlabel('x'), plt.ylabel('f(x)')
plt.show()