Advertisement
lskeeper

Untitled

Oct 20th, 2017
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.79 KB | None | 0 0
  1. #!/usr/bin/env python2
  2. from caffe2.python import brew, model_helper, workspace, core, utils
  3. from caffe2.proto import caffe2_pb2
  4. from caffe2.caffe2.fb.predictor import predictor_exporter
  5. import torch
  6. import sys
  7. import numpy as np
  8.  
  9.  
  10. def load_nvdm_inference_pt_model(pytorch_model_path):
  11. trained_params = torch.load(pytorch_model_path)
  12.  
  13. weight_dict = {}
  14.  
  15. # params for inference network of NVDM
  16. # first linear layer
  17. fc1_weight = trained_params['fc1.0.weight'].cpu().numpy()
  18. print(fc1_weight.size)
  19. fc1_bias = trained_params['fc1.0.bias'].cpu().numpy()
  20. weight_dict['fc1_w'] = fc1_weight
  21. weight_dict['fc1_b'] = fc1_bias
  22.  
  23. # first BN layer
  24. bn1_weight = trained_params['fc1.1.weight'].cpu().numpy()
  25. bn1_bias = trained_params['fc1.1.bias'].cpu().numpy()
  26. bn1_rmean = trained_params['fc1.1.running_mean'].cpu().numpy()
  27. bn1_rvar = trained_params['fc1.1.running_var'].cpu().numpy()
  28. weight_dict['bn1_w'] = bn1_weight
  29. weight_dict['bn1_b'] = bn1_bias
  30. weight_dict['bn1_rmean'] = bn1_rmean
  31. weight_dict['bn1_rvar'] = bn1_rvar
  32.  
  33. # second linear layer
  34. fc2_weight = trained_params['fc2.0.weight'].cpu().numpy()
  35. fc2_bias = trained_params['fc2.0.bias'].cpu().numpy()
  36. weight_dict['fc2_w'] = fc2_weight
  37. weight_dict['fc2_b'] = fc2_bias
  38.  
  39. # second BN layer
  40. bn2_weight = trained_params['fc2.1.weight'].cpu().numpy()
  41. bn2_bias = trained_params['fc2.1.bias'].cpu().numpy()
  42. bn2_rmean = trained_params['fc2.1.running_mean'].cpu().numpy()
  43. bn2_rvar = trained_params['fc2.1.running_var'].cpu().numpy()
  44. weight_dict['bn2_w'] = bn2_weight
  45. weight_dict['bn2_b'] = bn2_bias
  46. weight_dict['bn2_rmean'] = bn2_rmean
  47. weight_dict['bn2_rvar'] = bn2_rvar
  48.  
  49. # linear layer for mu(z|x)
  50. fc31_weight = trained_params['fc31.0.weight'].cpu().numpy()
  51. fc31_bias = trained_params['fc31.0.bias'].cpu().numpy()
  52. weight_dict['fc31_w'] = fc31_weight
  53. weight_dict['fc31_b'] = fc31_bias
  54.  
  55. # BN layer
  56. bn31_weight = trained_params['fc31.1.weight'].cpu().numpy()
  57. bn31_bias = trained_params['fc31.1.bias'].cpu().numpy()
  58. bn31_rmean = trained_params['fc31.1.running_mean'].cpu().numpy()
  59. bn31_rvar = trained_params['fc31.1.running_var'].cpu().numpy()
  60. weight_dict['bn31_w'] = bn31_weight
  61. weight_dict['bn31_b'] = bn31_bias
  62. weight_dict['bn31_rmean'] = bn31_rmean
  63. weight_dict['bn31_rvar'] = bn31_rvar
  64.  
  65. # linear layer for var(z|x)
  66. fc32_weight = trained_params['fc32.0.weight'].cpu().numpy()
  67. fc32_bias = trained_params['fc32.0.bias'].cpu().numpy()
  68. weight_dict['fc32_w'] = fc32_weight
  69. weight_dict['fc32_b'] = fc32_bias
  70.  
  71. # BN layer
  72. bn32_weight = trained_params['fc32.1.weight'].cpu().numpy()
  73. bn32_bias = trained_params['fc32.1.bias'].cpu().numpy()
  74. bn32_rmean = trained_params['fc32.1.running_mean'].cpu().numpy()
  75. bn32_rvar = trained_params['fc32.1.running_var'].cpu().numpy()
  76. weight_dict['bn32_w'] = bn32_weight
  77. weight_dict['bn32_b'] = bn32_bias
  78. weight_dict['bn32_rmean'] = bn32_rmean
  79. weight_dict['bn32_rvar'] = bn32_rvar
  80.  
  81. return weight_dict
  82.  
  83.  
  84. def make_bn_param(model, weight_dict, param_name):
  85. assert param_name in weight_dict
  86. print(weight_dict[param_name].size)
  87. param = model.param_init_net.GivenTensorFill([],
  88. param_name,
  89. shape=[weight_dict[param_name].size],
  90. values=weight_dict[param_name].T)
  91. return param
  92.  
  93.  
  94. def make_nvdm_inference_cf2_model(model,
  95. weight_dict,
  96. vocab_size, hidden_dim, latent_dim):
  97. data = model.net.AddExternalInput('data')
  98. # First FC layer in inference net
  99. fc1 = brew.fc(model, data, 'fc1', dim_in=vocab_size, dim_out=hidden_dim,
  100. weight_init=('GivenTensorFill', dict(values=weight_dict['fc1_w'])),
  101. bias_init=('GivenTensorFill', dict(values=weight_dict['fc1_b'].T)))
  102. fc1 = brew.relu(model, fc1, fc1)
  103.  
  104. # First BN layer
  105. # reshape first
  106. fc1_shape = weight_dict['fc1_w'].shape
  107. print fc1_shape[0]
  108. print fc1_shape[1]
  109. fc1, _ = model.Reshape(fc1, ['fc1', '_'], shape=[1, hidden_dim, 1, 1])
  110. # model.Reshape(fc1, shape=[])? # TODO
  111. bn1_w = make_bn_param(model, weight_dict, 'bn1_w')
  112. bn1_b = make_bn_param(model, weight_dict, 'bn1_b')
  113. bn1_rmean = make_bn_param(model, weight_dict, 'bn1_rmean')
  114. bn1_rvar = make_bn_param(model, weight_dict, 'bn1_rvar')
  115.  
  116. model.net.SpatialBN([fc1,
  117. bn1_w,
  118. bn1_b,
  119. bn1_rmean,
  120. bn1_rvar],
  121. [fc1],
  122. is_test=True,
  123. epsilon=1e-05 # be the same with PyTorch
  124. )
  125. # reshape back #TODO
  126. fc1, _ = model.Reshape(fc1, ['fc1', '_'], shape=[1, hidden_dim])
  127. print('good')
  128. # Second FC layer in inference net
  129. fc2 = brew.fc(model, fc1, 'fc2', dim_in=hidden_dim, dim_out=hidden_dim,
  130. weight_init=('GivenTensorFill', dict(values=weight_dict['fc2_w'])),
  131. bias_init=('GivenTensorFill', dict(values=weight_dict['fc2_b'].T)))
  132. fc2 = brew.relu(model, fc2, fc2)
  133.  
  134. # reshape first
  135. fc2, _ = model.Reshape(fc2, ['fc2', '_'], shape=[1, hidden_dim, 1, 1])
  136. print('good')
  137. # model.Reshape(fc2, shape=[])? # TODO
  138. bn2_w = make_bn_param(model, weight_dict, 'bn2_w')
  139. bn2_b = make_bn_param(model, weight_dict, 'bn2_b')
  140. bn2_rmean = make_bn_param(model, weight_dict, 'bn2_rmean')
  141. bn2_rvar = make_bn_param(model, weight_dict, 'bn2_rvar')
  142.  
  143. model.net.SpatialBN([fc2,
  144. bn2_w,
  145. bn2_b,
  146. bn2_rmean,
  147. bn2_rvar],
  148. [fc2],
  149. is_test=True,
  150. epsilon=1e-05 # be the same with PyTorch
  151. )
  152. # reshape back #TODO
  153. fc2, _ = model.Reshape(fc2, ['fc2', '_'], shape=[1, hidden_dim])
  154. print('good')
  155.  
  156. # Linear layer for the mean
  157. fc31 = brew.fc(model, fc2, 'fc31', dim_in=hidden_dim, dim_out=latent_dim,
  158. weight_init=('GivenTensorFill', dict(values=weight_dict['fc31_w'])),
  159. bias_init=('GivenTensorFill', dict(values=weight_dict['fc31_b'].T)))
  160.  
  161. # reshape first
  162. fc31, _ = model.Reshape(fc31, ['fc31', '_'], shape=[1, latent_dim, 1, 1])
  163. print('good')
  164. # model.Reshape(fc31, shape=[])? # TODO
  165. bn31_w = make_bn_param(model, weight_dict, 'bn31_w')
  166. bn31_b = make_bn_param(model, weight_dict, 'bn31_b')
  167. bn31_rmean = make_bn_param(model, weight_dict, 'bn31_rmean')
  168. bn31_rvar = make_bn_param(model, weight_dict, 'bn31_rvar')
  169.  
  170. model.net.SpatialBN([fc31,
  171. bn31_w,
  172. bn31_b,
  173. bn31_rmean,
  174. bn31_rvar],
  175. [fc31],
  176. is_test=True,
  177. epsilon=1e-05 # be the same with PyTorch
  178. )
  179. # reshape back #TODO
  180. fc31, _ = model.Reshape(fc31, ['fc31', '_'], shape=[1, latent_dim])
  181. # Linear layer for the log-variance
  182. fc32 = brew.fc(model, fc2, 'fc32', dim_in=hidden_dim, dim_out=latent_dim,
  183. weight_init=('GivenTensorFill', dict(values=weight_dict['fc32_w'])),
  184. bias_init=('GivenTensorFill', dict(values=weight_dict['fc32_b'].T)))
  185.  
  186. # reshape first
  187. fc32, _ = model.Reshape(fc32, ['fc32', '_'], shape=[1, latent_dim, 1, 1])
  188. print('good')
  189. # model.Reshape(fc32, shape=[])? # TODO
  190. bn32_w = make_bn_param(model, weight_dict, 'bn32_w')
  191. bn32_b = make_bn_param(model, weight_dict, 'bn32_b')
  192. bn32_rmean = make_bn_param(model, weight_dict, 'bn32_rmean')
  193. bn32_rvar = make_bn_param(model, weight_dict, 'bn32_rvar')
  194.  
  195. model.net.SpatialBN([fc32,
  196. bn32_w,
  197. bn32_b,
  198. bn32_rmean,
  199. bn32_rvar],
  200. [fc32],
  201. is_test=True,
  202. epsilon=1e-05 # be the same with PyTorch
  203. )
  204. # reshape back #TODO
  205. fc32, _ = model.Reshape(fc32, ['fc32', '_'], shape=[1, latent_dim])
  206.  
  207. model.net.AddExternalOutput(fc31)
  208. model.net.AddExternalOutput(fc32)
  209. return fc31, fc32
  210.  
  211.  
  212. def save_caffe2_model(model, model_path):
  213. # two formats are written
  214.  
  215. # 1. write to pb formats
  216. print 'Saving to pb model files'
  217.  
  218. # write predict net
  219. with open('%s_predict.pb' % model_path, 'wb') as f:
  220. f.write(model.net._net.SerializeToString())
  221.  
  222. # write init net
  223. init_net = caffe2_pb2.NetDef()
  224. for param in model.params:
  225. blob = workspace.FetchBlob(param)
  226. shape = blob.shape
  227. op = core.CreateOperator('GivenTensorFill',
  228. [],
  229. [param],
  230. arg=[
  231. utils.MakeArgument('shape', shape),
  232. utils.MakeArgument('values', blob)
  233. ])
  234.  
  235. init_net.op.extend([op])
  236.  
  237. # write dummy data blob to the dump
  238. data_blob = workspace.FetchBlob('data')
  239. init_net.op.extend([
  240. core.CreateOperator('GivenTensorFill',
  241. [],
  242. ['data'],
  243. arg=[
  244. utils.MakeArgument('shape', data_blob.shape),
  245. utils.MakeArgument('values', data_blob)
  246. ])])
  247.  
  248. with open('%s_init.pb' % model_path, 'wb') as f:
  249. f.write(init_net.SerializeToString())
  250.  
  251. # 2. write to logfile_db format
  252. print 'Saving to logfile_db model file'
  253. predictor_export_meta = predictor_exporter.PredictorExportMeta(
  254. predict_net=model.net.Proto(),
  255. parameters=model.GetParams(),
  256. inputs=['data'],
  257. outputs=['fc31', 'fc32']
  258. )
  259.  
  260. predictor_exporter.save_to_db(
  261. db_type='log_file_db',
  262. db_destination='%s.logdb' % model_path,
  263. predictor_export_meta=predictor_export_meta,
  264. )
  265.  
  266.  
  267. if __name__ == '__main__':
  268. if len(sys.argv) != 3:
  269. print 'Usage: python %s <pytorch_model_path> <caffe2_model_prefix>' \
  270. % sys.argv[0]
  271. sys.exit(1)
  272.  
  273. pytorch_model_path = sys.argv[1]
  274. caffe2_model_prefix = sys.argv[2]
  275.  
  276. # load weights from PyTorch NVDM model
  277. weight_dict = load_nvdm_inference_pt_model(pytorch_model_path)
  278.  
  279. # get dims from NVDM
  280. vocab_size = weight_dict['fc1_w'].shape[1]
  281. hidden_dim = weight_dict['fc1_w'].shape[0]
  282. latent_dim = weight_dict['fc31_w'].shape[0]
  283. print 'vocab size: ', vocab_size
  284. print 'hidden size: ', hidden_dim
  285. print 'latent dim: ', latent_dim
  286.  
  287. # dummy batch size needed for Caffe2 model
  288. batch_size = 1
  289. data = np.array(np.random.randn(batch_size, vocab_size)).astype('float32')
  290. data = np.ones((batch_size, vocab_size), dtype=np.float32)
  291. weight_dict['data'] = data
  292.  
  293. workspace.FeedBlob('data', data) # device_option=device_opts)
  294.  
  295. # define caffe2 model and load weights
  296. cf2_model = model_helper.ModelHelper(name='NVDM-cf2')
  297. inference_mean, inference_variance = make_nvdm_inference_cf2_model(
  298. cf2_model,
  299. weight_dict,
  300. vocab_size,
  301. hidden_dim,
  302. latent_dim,
  303. )
  304.  
  305. workspace.RunNetOnce(cf2_model.param_init_net)
  306. workspace.RunNetOnce(cf2_model.net)
  307. print(workspace.FetchBlob(inference_mean))
  308.  
  309. # save caffe2 model
  310. save_caffe2_model(cf2_model, caffe2_model_prefix)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement