Guest User

Untitled

a guest
Nov 20th, 2017
129
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.73 KB | None | 0 0
  1. import numpy as np
  2. from sklearn.preprocessing import PolynomialFeatures
  3.  
  4. import torch
  5. from torch.autograd import Variable
  6.  
  7. def index_batch(X,batch_indices,dtype):
  8. '''
  9. returns the batch indexed/sliced batch
  10. '''
  11. if len(X.shape) == 1: # i.e. dimension (M,) just a vector
  12. batch_xs = torch.FloatTensor(X[batch_indices]).type(dtype)
  13. else:
  14. batch_xs = torch.FloatTensor(X[batch_indices,:]).type(dtype)
  15. return batch_xs
  16.  
  17. def get_batch2(X,Y,M,dtype):
  18. '''
  19. get batch for pytorch model
  20. '''
  21. # TODO fix and make it nicer, there is pytorch forum question
  22. X,Y = X.data.numpy(), Y.data.numpy()
  23. N = len(Y)
  24. valid_indices = np.array( range(N) )
  25. batch_indices = np.random.choice(valid_indices,size=M,replace=False)
  26. batch_xs = index_batch(X,batch_indices,dtype)
  27. batch_ys = index_batch(Y,batch_indices,dtype)
  28. return Variable(batch_xs, requires_grad=False), Variable(batch_ys, requires_grad=False)
  29.  
  30. def get_sequential_lifted_mdl(nb_monomials,D_out, bias=False):
  31. return torch.nn.Sequential(torch.nn.Linear(nb_monomials,D_out,bias=bias))
  32.  
  33. def train_SGD(mdl, M,eta,nb_iter,logging_freq ,dtype, X_train,Y_train, X_test,Y_test,c_pinv):
  34. ##
  35. N_train,_ = tuple( X_train.size() )
  36. #print(N_train)
  37. for i in range(nb_iter):
  38. for W in mdl.parameters():
  39. W_before_update = np.copy( W.data.numpy() )
  40. # Forward pass: compute predicted Y using operations on Variables
  41. batch_xs, batch_ys = get_batch2(X_train,Y_train,M,dtype) # [M, D], [M, 1]
  42. ## FORWARD PASS
  43. y_pred = mdl.forward(batch_xs)
  44. ## LOSS + Regularization
  45. batch_loss = (1/M)*(y_pred - batch_ys).pow(2).sum()
  46. ## BACKARD PASS
  47. batch_loss.backward() # Use autograd to compute the backward pass. Now w will have gradients
  48. ## SGD update
  49. for W in mdl.parameters():
  50. delta = eta*W.grad.data
  51. #W.data.copy_(W.data - delta)
  52. W.data -= delta
  53. ## train stats
  54. if i % (nb_iter/50) == 0 or i == 0:
  55. #if True:
  56. #if i % logging_freq == 0 or i == 0:
  57. current_train_loss = (1/N_train)*(mdl.forward(X_train) - Y_train).pow(2).sum().data.numpy()
  58. print('n-------------')
  59. print(f'i = {i}, current_train_loss = {current_train_loss}')
  60. print(f'N_train = {N_train}')
  61. print(f'W_before_update={W_before_update}')
  62. print(f'W.data = {W.data.numpy()}')
  63. print(f'W.grad.data = {W.grad.data.numpy()}')
  64. diff = W_before_update - W.data.numpy()
  65. print(f' w_^(t) - w^(t-1) = {diff/eta}')
  66. diff_norm = np.linalg.norm(diff, 2)
  67. print(f'|| w_^(t) - w^(t-1) ||^2 = {diff_norm}')
  68. print(f'c_pinv = {c_pinv.T}')
  69. train_error_c_pinv = (1/N_train)*(np.linalg.norm(Y_train.data.numpy() - np.dot(X_train.data.numpy(),c_pinv) )**2)
  70. print(f'train_error_c_pinv = {train_error_c_pinv}')
  71. ## Manually zero the gradients after updating weights
  72. mdl.zero_grad()
  73. ##
  74. logging_freq = 100
  75. dtype = torch.FloatTensor
  76. ## SGD params
  77. M = 5
  78. eta = 0.03
  79. nb_iter = 100*1000
  80. ##
  81. lb,ub=0,1
  82. freq_sin = 4
  83. f_target = lambda x: np.sin(2*np.pi*freq_sin*x).reshape(x.shape[0],1)
  84. N_train = 10
  85. X_train = np.linspace(lb,ub,N_train).reshape(N_train,1)
  86. Y_train = f_target(X_train)
  87. N_test = 200
  88. X_test = np.linspace(lb,ub,N_test).reshape(N_test,1)
  89. Y_test = f_target(X_test)
  90. ## degree of mdl
  91. Degree_mdl = 9
  92. ## pseudo-inverse solution
  93. c_pinv = np.polyfit( X_train.reshape( (N_train,) ), Y_train , Degree_mdl )[::-1]
  94. ## linear mdl to train with SGD
  95. nb_terms = c_pinv.shape[0]
  96. mdl_sgd = get_sequential_lifted_mdl(nb_monomials=nb_terms,D_out=1, bias=False)
  97. #mdl_sgd[0].weight.data.normal_(mean=0,std=0.0)
  98. #mdl_sgd[0].weight.data.fill_(0)
  99. print(f'mdl_sgd[0].weight.data={mdl_sgd[0].weight.data}')
  100. ## Make polynomial Kernel
  101. poly_feat = PolynomialFeatures(degree=Degree_mdl)
  102. Kern_train, Kern_test = poly_feat.fit_transform(X_train.reshape(N_train,1)), poly_feat.fit_transform(X_test.reshape(N_test,1))
  103. Kern_train_pt, Y_train_pt = Variable(torch.FloatTensor(Kern_train).type(dtype), requires_grad=False), Variable(torch.FloatTensor(Y_train).type(dtype), requires_grad=False)
  104. Kern_test_pt, Y_test_pt = Variable(torch.FloatTensor(Kern_test).type(dtype), requires_grad=False ), Variable(torch.FloatTensor(Y_test).type(dtype), requires_grad=False)
  105. train_SGD(mdl_sgd, M,eta,nb_iter,logging_freq ,dtype, Kern_train_pt,Y_train_pt, Kern_test_pt,Y_test_pt,c_pinv)
  106. ##
  107. legend_mdl = f'SGD solution standard parametrization, number of monomials={nb_terms}, batch-size={M}, iterations={nb_iter}, step size={eta}'
  108. #### PLOTS
  109. X_plot = poly_feat.fit_transform(x_horizontal)
  110. X_plot_pytorch = Variable( torch.FloatTensor(X_plot), requires_grad=False)
  111. ##
  112. fig1 = plt.figure()
  113. ##
  114. p_sgd_tf, = plt.plot(x_horizontal, Y_tf )
  115. p_sgd_pt, = plt.plot(x_horizontal, [ float(f_val) for f_val in mdl_sgd.forward(X_plot_pytorch).data.numpy() ])
  116. p_pinv, = plt.plot(x_horizontal, np.dot(X_plot,c_pinv))
  117. p_data, = plt.plot(X_train,Y_train,'ro')
  118. ## legend
  119. nb_terms = c_pinv.shape[0]
  120. legend_mdl = f'SGD solution standard parametrization, number of monomials={nb_terms}, batch-size={M}, iterations={nb_iter}, step size={eta}'
  121. plt.legend(
  122. [p_sgd_tf,p_sgd_pt,p_pinv,p_data],
  123. ['TF '+legend_mdl,'Pytorch '+legend_mdl,f'linear algebra soln, number of monomials={nb_terms}',f'data points = {N_train}']
  124. )
  125. ##
  126. plt.xlabel('x'), plt.ylabel('f(x)')
  127. plt.show()
  128.  
  129. c_pinv = [[ -7.36275143e-11 9.94955061e+02 -2.27235773e+04 2.02776690e+05
  130. -9.45987901e+05 2.56477290e+06 -4.18670905e+06 4.05381875e+06
  131. -2.14321212e+06 4.76269361e+05]]
  132.  
  133. mdl_sgd[0].weight.data=
  134. 0.2769 0.2238 -0.1786 -0.2836 0.0282 -0.2650 0.1517 0.0609 -0.1799 0.2518
  135. [torch.FloatTensor of size 1x10]
  136.  
  137.  
  138. -------------
  139. i = 0, current_train_loss = [ 0.51122922]
  140. N_train = 10
  141. W_before_update=[[ 0.276916 0.22384584 -0.17859279 -0.28359878 0.02818507 -0.26502955
  142. 0.15169969 0.06087267 -0.17991513 0.25179213]]
  143. W.data = [[ 0.27278039 0.2223435 -0.17868967 -0.28320512 0.02860935 -0.26476261
  144. 0.15175563 0.06072243 -0.18024531 0.251313 ]]
  145. W.grad.data = [[ 0.13785343 0.05007789 0.00322947 -0.01312203 -0.01414278 -0.00889825
  146. -0.00186479 0.00500792 0.01100619 0.01597152]]
  147. w_^(t) - w^(t-1) = [[ 0.13785362 0.05007784 0.00322958 -0.01312196 -0.01414275 -0.00889798
  148. -0.00186463 0.00500791 0.011006 0.01597106]]
  149. || w_^(t) - w^(t-1) ||^2 = 0.004487781319767237
  150. c_pinv = [[ -7.36275143e-11 9.94955061e+02 -2.27235773e+04 2.02776690e+05
  151. -9.45987901e+05 2.56477290e+06 -4.18670905e+06 4.05381875e+06
  152. -2.14321212e+06 4.76269361e+05]]
  153. train_error_c_pinv = 0.00041026620352414134
  154.  
  155. -------------
  156. i = 2000, current_train_loss = [ 0.45121056]
  157. N_train = 10
  158. W_before_update=[[ 0.05377455 0.14968246 -0.0918882 -0.18873887 0.0875883 -0.24442779
  159. 0.14100061 0.02913089 -0.22231367 0.20818822]]
  160. W.data = [[ 0.02684817 0.13449876 -0.10165974 -0.19549945 0.08267717 -0.24813652
  161. 0.13810736 0.02681178 -0.22421438 0.20660225]]
  162. W.grad.data = [[ 0.89754611 0.50612354 0.32571793 0.2253527 0.16370434 0.12362462
  163. 0.0964416 0.07730356 0.06335653 0.05286586]]
  164. w_^(t) - w^(t-1) = [[ 0.89754611 0.50612342 0.32571805 0.22535275 0.16370441 0.1236245
  165. 0.09644181 0.07730357 0.06335676 0.05286584]]
  166. || w_^(t) - w^(t-1) ||^2 = 0.03397814929485321
  167. c_pinv = [[ -7.36275143e-11 9.94955061e+02 -2.27235773e+04 2.02776690e+05
  168. -9.45987901e+05 2.56477290e+06 -4.18670905e+06 4.05381875e+06
  169. -2.14321212e+06 4.76269361e+05]]
  170. train_error_c_pinv = 0.00041026620352414134
  171.  
  172. import numpy as np
  173. from sklearn.preprocessing import PolynomialFeatures
  174. from numpy.polynomial.hermite import hermvander
  175.  
  176. import torch
  177. from torch.autograd import Variable
  178.  
  179. from maps import NamedDict
  180.  
  181. from plotting_utils import *
  182.  
  183. def index_batch(X,batch_indices,dtype):
  184. '''
  185. returns the batch indexed/sliced batch
  186. '''
  187. if len(X.shape) == 1: # i.e. dimension (M,) just a vector
  188. batch_xs = torch.FloatTensor(X[batch_indices]).type(dtype)
  189. else:
  190. batch_xs = torch.FloatTensor(X[batch_indices,:]).type(dtype)
  191. return batch_xs
  192.  
  193. def get_batch2(X,Y,M,dtype):
  194. '''
  195. get batch for pytorch model
  196. '''
  197. # TODO fix and make it nicer, there is pytorch forum question
  198. X,Y = X.data.numpy(), Y.data.numpy()
  199. N = len(Y)
  200. valid_indices = np.array( range(N) )
  201. batch_indices = np.random.choice(valid_indices,size=M,replace=False)
  202. batch_xs = index_batch(X,batch_indices,dtype)
  203. batch_ys = index_batch(Y,batch_indices,dtype)
  204. return Variable(batch_xs, requires_grad=False), Variable(batch_ys, requires_grad=False)
  205.  
  206. def get_sequential_lifted_mdl(nb_monomials,D_out, bias=False):
  207. return torch.nn.Sequential(torch.nn.Linear(nb_monomials,D_out,bias=bias))
  208.  
  209. def train_SGD(mdl, M,eta,nb_iter,logging_freq ,dtype, X_train,Y_train):
  210. ##
  211. N_train,_ = tuple( X_train.size() )
  212. #print(N_train)
  213. for i in range(nb_iter):
  214. # Forward pass: compute predicted Y using operations on Variables
  215. batch_xs, batch_ys = get_batch2(X_train,Y_train,M,dtype) # [M, D], [M, 1]
  216. ## FORWARD PASS
  217. y_pred = mdl.forward(batch_xs)
  218. ## LOSS + Regularization
  219. batch_loss = (1/M)*(y_pred - batch_ys).pow(2).sum()
  220. ## BACKARD PASS
  221. batch_loss.backward() # Use autograd to compute the backward pass. Now w will have gradients
  222. ## SGD update
  223. for W in mdl.parameters():
  224. delta = eta*W.grad.data
  225. W.data.copy_(W.data - delta)
  226. ## train stats
  227. if i % (nb_iter/10) == 0 or i == 0:
  228. current_train_loss = (1/N_train)*(mdl.forward(X_train) - Y_train).pow(2).sum().data.numpy()
  229. print('n-------------')
  230. print(f'i = {i}, current_train_loss = {current_train_loss}n')
  231. print(f'eta*W.grad.data = {eta*W.grad.data}')
  232. print(f'W.grad.data = {W.grad.data}')
  233. ## Manually zero the gradients after updating weights
  234. mdl.zero_grad()
  235. ##
  236. logging_freq = 100
  237. dtype = torch.FloatTensor
  238. ## SGD params
  239. M = 3
  240. eta = 0.002
  241. nb_iter = 20*1000
  242. ##
  243. lb,ub = 0,1
  244. f_target = lambda x: np.sin(2*np.pi*x)
  245. N_train = 5
  246. X_train = np.linspace(lb,ub,N_train)
  247. Y_train = f_target(X_train)
  248. ## degree of mdl
  249. Degree_mdl = 4
  250. ## pseudo-inverse solution
  251. c_pinv = np.polyfit( X_train, Y_train , Degree_mdl )[::-1]
  252. ## linear mdl to train with SGD
  253. nb_terms = c_pinv.shape[0]
  254. mdl_sgd = get_sequential_lifted_mdl(nb_monomials=nb_terms,D_out=1, bias=False)
  255. mdl_sgd[0].weight.data.normal_(mean=0,std=0.001)
  256. ## Make polynomial Kernel
  257. #poly_feat = PolynomialFeatures(degree=Degree_mdl)
  258. #Kern_train = poly_feat.fit_transform(X_train.reshape(N_train,1))
  259. Kern_train = hermvander(X_train,Degree_mdl)
  260. Kern_train = Kern_train.reshape(N_train,Kern_train.shape[1])
  261. Kern_train_pt, Y_train_pt = Variable(torch.FloatTensor(Kern_train).type(dtype), requires_grad=False), Variable(torch.FloatTensor(Y_train).type(dtype), requires_grad=False)
  262. train_SGD(mdl_sgd, M,eta,nb_iter,logging_freq ,dtype, Kern_train_pt,Y_train_pt)
  263.  
  264. #### PLOTTING
  265. x_horizontal = np.linspace(lb,ub,1000).reshape(1000,1)
  266. #X_plot = poly_feat.fit_transform(x_horizontal)
  267. X_plot = hermvander(x_horizontal,Degree_mdl)
  268. X_plot = X_plot.reshape(1000,X_plot.shape[2])
  269. X_plot_pytorch = Variable( torch.FloatTensor(X_plot), requires_grad=False)
  270. ##
  271. fig1 = plt.figure()
  272. #plots objs
  273. p_sgd, = plt.plot(x_horizontal, [ float(f_val) for f_val in mdl_sgd.forward(X_plot_pytorch).data.numpy() ])
  274. p_pinv, = plt.plot(x_horizontal, np.dot(X_plot,c_pinv))
  275. p_data, = plt.plot(X_train,Y_train,'ro')
  276. ## legend
  277. nb_terms = c_pinv.shape[0]
  278. legend_mdl = f'SGD solution standard parametrization, number of monomials={nb_terms}, batch-size={M}, iterations={nb_iter}, step size={eta}'
  279. plt.legend(
  280. [p_sgd,p_pinv,p_data],
  281. [legend_mdl,f'linear algebra soln, number of monomials={nb_terms}',f'data points = {N_train}']
  282. )
  283. ##
  284. plt.xlabel('x'), plt.ylabel('f(x)')
  285. plt.show()
Add Comment
Please, Sign In to add comment