Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python2
- from caffe2.python import brew, model_helper, workspace, core, utils
- from caffe2.proto import caffe2_pb2
- from caffe2.caffe2.fb.predictor import predictor_exporter
- import torch
- import sys
- import numpy as np
- def load_nvdm_inference_pt_model(pytorch_model_path):
- trained_params = torch.load(pytorch_model_path)
- weight_dict = {}
- # params for inference network of NVDM
- # first linear layer
- fc1_weight = trained_params['fc1.0.weight'].cpu().numpy()
- print(fc1_weight.size)
- fc1_bias = trained_params['fc1.0.bias'].cpu().numpy()
- weight_dict['fc1_w'] = fc1_weight
- weight_dict['fc1_b'] = fc1_bias
- # first BN layer
- bn1_weight = trained_params['fc1.1.weight'].cpu().numpy()
- bn1_bias = trained_params['fc1.1.bias'].cpu().numpy()
- bn1_rmean = trained_params['fc1.1.running_mean'].cpu().numpy()
- bn1_rvar = trained_params['fc1.1.running_var'].cpu().numpy()
- weight_dict['bn1_w'] = bn1_weight
- weight_dict['bn1_b'] = bn1_bias
- weight_dict['bn1_rmean'] = bn1_rmean
- weight_dict['bn1_rvar'] = bn1_rvar
- # second linear layer
- fc2_weight = trained_params['fc2.0.weight'].cpu().numpy()
- fc2_bias = trained_params['fc2.0.bias'].cpu().numpy()
- weight_dict['fc2_w'] = fc2_weight
- weight_dict['fc2_b'] = fc2_bias
- # second BN layer
- bn2_weight = trained_params['fc2.1.weight'].cpu().numpy()
- bn2_bias = trained_params['fc2.1.bias'].cpu().numpy()
- bn2_rmean = trained_params['fc2.1.running_mean'].cpu().numpy()
- bn2_rvar = trained_params['fc2.1.running_var'].cpu().numpy()
- weight_dict['bn2_w'] = bn2_weight
- weight_dict['bn2_b'] = bn2_bias
- weight_dict['bn2_rmean'] = bn2_rmean
- weight_dict['bn2_rvar'] = bn2_rvar
- # linear layer for mu(z|x)
- fc31_weight = trained_params['fc31.0.weight'].cpu().numpy()
- fc31_bias = trained_params['fc31.0.bias'].cpu().numpy()
- weight_dict['fc31_w'] = fc31_weight
- weight_dict['fc31_b'] = fc31_bias
- # BN layer
- bn31_weight = trained_params['fc31.1.weight'].cpu().numpy()
- bn31_bias = trained_params['fc31.1.bias'].cpu().numpy()
- bn31_rmean = trained_params['fc31.1.running_mean'].cpu().numpy()
- bn31_rvar = trained_params['fc31.1.running_var'].cpu().numpy()
- weight_dict['bn31_w'] = bn31_weight
- weight_dict['bn31_b'] = bn31_bias
- weight_dict['bn31_rmean'] = bn31_rmean
- weight_dict['bn31_rvar'] = bn31_rvar
- # linear layer for var(z|x)
- fc32_weight = trained_params['fc32.0.weight'].cpu().numpy()
- fc32_bias = trained_params['fc32.0.bias'].cpu().numpy()
- weight_dict['fc32_w'] = fc32_weight
- weight_dict['fc32_b'] = fc32_bias
- # BN layer
- bn32_weight = trained_params['fc32.1.weight'].cpu().numpy()
- bn32_bias = trained_params['fc32.1.bias'].cpu().numpy()
- bn32_rmean = trained_params['fc32.1.running_mean'].cpu().numpy()
- bn32_rvar = trained_params['fc32.1.running_var'].cpu().numpy()
- weight_dict['bn32_w'] = bn32_weight
- weight_dict['bn32_b'] = bn32_bias
- weight_dict['bn32_rmean'] = bn32_rmean
- weight_dict['bn32_rvar'] = bn32_rvar
- return weight_dict
- def make_bn_param(model, weight_dict, param_name):
- assert param_name in weight_dict
- print(weight_dict[param_name].size)
- param = model.param_init_net.GivenTensorFill([],
- param_name,
- shape=[weight_dict[param_name].size],
- values=weight_dict[param_name].T)
- return param
- def make_nvdm_inference_cf2_model(model,
- weight_dict,
- vocab_size, hidden_dim, latent_dim):
- data = model.net.AddExternalInput('data')
- # First FC layer in inference net
- fc1 = brew.fc(model, data, 'fc1', dim_in=vocab_size, dim_out=hidden_dim,
- weight_init=('GivenTensorFill', dict(values=weight_dict['fc1_w'])),
- bias_init=('GivenTensorFill', dict(values=weight_dict['fc1_b'].T)))
- fc1 = brew.relu(model, fc1, fc1)
- # First BN layer
- # reshape first
- fc1_shape = weight_dict['fc1_w'].shape
- print fc1_shape[0]
- print fc1_shape[1]
- fc1, _ = model.Reshape(fc1, ['fc1', '_'], shape=[1, hidden_dim, 1, 1])
- # model.Reshape(fc1, shape=[])? # TODO
- bn1_w = make_bn_param(model, weight_dict, 'bn1_w')
- bn1_b = make_bn_param(model, weight_dict, 'bn1_b')
- bn1_rmean = make_bn_param(model, weight_dict, 'bn1_rmean')
- bn1_rvar = make_bn_param(model, weight_dict, 'bn1_rvar')
- model.net.SpatialBN([fc1,
- bn1_w,
- bn1_b,
- bn1_rmean,
- bn1_rvar],
- [fc1],
- is_test=True,
- epsilon=1e-05 # be the same with PyTorch
- )
- # reshape back #TODO
- fc1, _ = model.Reshape(fc1, ['fc1', '_'], shape=[1, hidden_dim])
- print('good')
- # Second FC layer in inference net
- fc2 = brew.fc(model, fc1, 'fc2', dim_in=hidden_dim, dim_out=hidden_dim,
- weight_init=('GivenTensorFill', dict(values=weight_dict['fc2_w'])),
- bias_init=('GivenTensorFill', dict(values=weight_dict['fc2_b'].T)))
- fc2 = brew.relu(model, fc2, fc2)
- # reshape first
- fc2, _ = model.Reshape(fc2, ['fc2', '_'], shape=[1, hidden_dim, 1, 1])
- print('good')
- # model.Reshape(fc2, shape=[])? # TODO
- bn2_w = make_bn_param(model, weight_dict, 'bn2_w')
- bn2_b = make_bn_param(model, weight_dict, 'bn2_b')
- bn2_rmean = make_bn_param(model, weight_dict, 'bn2_rmean')
- bn2_rvar = make_bn_param(model, weight_dict, 'bn2_rvar')
- model.net.SpatialBN([fc2,
- bn2_w,
- bn2_b,
- bn2_rmean,
- bn2_rvar],
- [fc2],
- is_test=True,
- epsilon=1e-05 # be the same with PyTorch
- )
- # reshape back #TODO
- fc2, _ = model.Reshape(fc2, ['fc2', '_'], shape=[1, hidden_dim])
- print('good')
- # Linear layer for the mean
- fc31 = brew.fc(model, fc2, 'fc31', dim_in=hidden_dim, dim_out=latent_dim,
- weight_init=('GivenTensorFill', dict(values=weight_dict['fc31_w'])),
- bias_init=('GivenTensorFill', dict(values=weight_dict['fc31_b'].T)))
- # reshape first
- fc31, _ = model.Reshape(fc31, ['fc31', '_'], shape=[1, latent_dim, 1, 1])
- print('good')
- # model.Reshape(fc31, shape=[])? # TODO
- bn31_w = make_bn_param(model, weight_dict, 'bn31_w')
- bn31_b = make_bn_param(model, weight_dict, 'bn31_b')
- bn31_rmean = make_bn_param(model, weight_dict, 'bn31_rmean')
- bn31_rvar = make_bn_param(model, weight_dict, 'bn31_rvar')
- model.net.SpatialBN([fc31,
- bn31_w,
- bn31_b,
- bn31_rmean,
- bn31_rvar],
- [fc31],
- is_test=True,
- epsilon=1e-05 # be the same with PyTorch
- )
- # reshape back #TODO
- fc31, _ = model.Reshape(fc31, ['fc31', '_'], shape=[1, latent_dim])
- # Linear layer for the log-variance
- fc32 = brew.fc(model, fc2, 'fc32', dim_in=hidden_dim, dim_out=latent_dim,
- weight_init=('GivenTensorFill', dict(values=weight_dict['fc32_w'])),
- bias_init=('GivenTensorFill', dict(values=weight_dict['fc32_b'].T)))
- # reshape first
- fc32, _ = model.Reshape(fc32, ['fc32', '_'], shape=[1, latent_dim, 1, 1])
- print('good')
- # model.Reshape(fc32, shape=[])? # TODO
- bn32_w = make_bn_param(model, weight_dict, 'bn32_w')
- bn32_b = make_bn_param(model, weight_dict, 'bn32_b')
- bn32_rmean = make_bn_param(model, weight_dict, 'bn32_rmean')
- bn32_rvar = make_bn_param(model, weight_dict, 'bn32_rvar')
- model.net.SpatialBN([fc32,
- bn32_w,
- bn32_b,
- bn32_rmean,
- bn32_rvar],
- [fc32],
- is_test=True,
- epsilon=1e-05 # be the same with PyTorch
- )
- # reshape back #TODO
- fc32, _ = model.Reshape(fc32, ['fc32', '_'], shape=[1, latent_dim])
- model.net.AddExternalOutput(fc31)
- model.net.AddExternalOutput(fc32)
- return fc31, fc32
- def save_caffe2_model(model, model_path):
- # two formats are written
- # 1. write to pb formats
- print 'Saving to pb model files'
- # write predict net
- with open('%s_predict.pb' % model_path, 'wb') as f:
- f.write(model.net._net.SerializeToString())
- # write init net
- init_net = caffe2_pb2.NetDef()
- for param in model.params:
- blob = workspace.FetchBlob(param)
- shape = blob.shape
- op = core.CreateOperator('GivenTensorFill',
- [],
- [param],
- arg=[
- utils.MakeArgument('shape', shape),
- utils.MakeArgument('values', blob)
- ])
- init_net.op.extend([op])
- # write dummy data blob to the dump
- data_blob = workspace.FetchBlob('data')
- init_net.op.extend([
- core.CreateOperator('GivenTensorFill',
- [],
- ['data'],
- arg=[
- utils.MakeArgument('shape', data_blob.shape),
- utils.MakeArgument('values', data_blob)
- ])])
- with open('%s_init.pb' % model_path, 'wb') as f:
- f.write(init_net.SerializeToString())
- # 2. write to logfile_db format
- print 'Saving to logfile_db model file'
- predictor_export_meta = predictor_exporter.PredictorExportMeta(
- predict_net=model.net.Proto(),
- parameters=model.GetParams(),
- inputs=['data'],
- outputs=['fc31', 'fc32']
- )
- predictor_exporter.save_to_db(
- db_type='log_file_db',
- db_destination='%s.logdb' % model_path,
- predictor_export_meta=predictor_export_meta,
- )
- if __name__ == '__main__':
- if len(sys.argv) != 3:
- print 'Usage: python %s <pytorch_model_path> <caffe2_model_prefix>' \
- % sys.argv[0]
- sys.exit(1)
- pytorch_model_path = sys.argv[1]
- caffe2_model_prefix = sys.argv[2]
- # load weights from PyTorch NVDM model
- weight_dict = load_nvdm_inference_pt_model(pytorch_model_path)
- # get dims from NVDM
- vocab_size = weight_dict['fc1_w'].shape[1]
- hidden_dim = weight_dict['fc1_w'].shape[0]
- latent_dim = weight_dict['fc31_w'].shape[0]
- print 'vocab size: ', vocab_size
- print 'hidden size: ', hidden_dim
- print 'latent dim: ', latent_dim
- # dummy batch size needed for Caffe2 model
- batch_size = 1
- data = np.array(np.random.randn(batch_size, vocab_size)).astype('float32')
- data = np.ones((batch_size, vocab_size), dtype=np.float32)
- weight_dict['data'] = data
- workspace.FeedBlob('data', data) # device_option=device_opts)
- # define caffe2 model and load weights
- cf2_model = model_helper.ModelHelper(name='NVDM-cf2')
- inference_mean, inference_variance = make_nvdm_inference_cf2_model(
- cf2_model,
- weight_dict,
- vocab_size,
- hidden_dim,
- latent_dim,
- )
- workspace.RunNetOnce(cf2_model.param_init_net)
- workspace.RunNetOnce(cf2_model.net)
- print(workspace.FetchBlob(inference_mean))
- # save caffe2 model
- save_caffe2_model(cf2_model, caffe2_model_prefix)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement