Untitled

#!/usr/bin/env python2
from caffe2.python import brew, model_helper, workspace, core, utils
from caffe2.proto import caffe2_pb2
from caffe2.caffe2.fb.predictor import predictor_exporter
import torch
import sys
import numpy as np


def load_nvdm_inference_pt_model(pytorch_model_path):
    trained_params = torch.load(pytorch_model_path)

    weight_dict = {}

    # params for inference network of NVDM
    # first linear layer
    fc1_weight = trained_params['fc1.0.weight'].cpu().numpy()
    print(fc1_weight.size)
    fc1_bias = trained_params['fc1.0.bias'].cpu().numpy()
    weight_dict['fc1_w'] = fc1_weight
    weight_dict['fc1_b'] = fc1_bias

    # first BN layer
    bn1_weight = trained_params['fc1.1.weight'].cpu().numpy()
    bn1_bias = trained_params['fc1.1.bias'].cpu().numpy()
    bn1_rmean = trained_params['fc1.1.running_mean'].cpu().numpy()
    bn1_rvar = trained_params['fc1.1.running_var'].cpu().numpy()
    weight_dict['bn1_w'] = bn1_weight
    weight_dict['bn1_b'] = bn1_bias
    weight_dict['bn1_rmean'] = bn1_rmean
    weight_dict['bn1_rvar'] = bn1_rvar

    # second linear layer
    fc2_weight = trained_params['fc2.0.weight'].cpu().numpy()
    fc2_bias = trained_params['fc2.0.bias'].cpu().numpy()
    weight_dict['fc2_w'] = fc2_weight
    weight_dict['fc2_b'] = fc2_bias

    # second BN layer
    bn2_weight = trained_params['fc2.1.weight'].cpu().numpy()
    bn2_bias = trained_params['fc2.1.bias'].cpu().numpy()
    bn2_rmean = trained_params['fc2.1.running_mean'].cpu().numpy()
    bn2_rvar = trained_params['fc2.1.running_var'].cpu().numpy()
    weight_dict['bn2_w'] = bn2_weight
    weight_dict['bn2_b'] = bn2_bias
    weight_dict['bn2_rmean'] = bn2_rmean
    weight_dict['bn2_rvar'] = bn2_rvar

    # linear layer for mu(z|x)
    fc31_weight = trained_params['fc31.0.weight'].cpu().numpy()
    fc31_bias = trained_params['fc31.0.bias'].cpu().numpy()
    weight_dict['fc31_w'] = fc31_weight
    weight_dict['fc31_b'] = fc31_bias

    # BN layer
    bn31_weight = trained_params['fc31.1.weight'].cpu().numpy()
    bn31_bias = trained_params['fc31.1.bias'].cpu().numpy()
    bn31_rmean = trained_params['fc31.1.running_mean'].cpu().numpy()
    bn31_rvar = trained_params['fc31.1.running_var'].cpu().numpy()
    weight_dict['bn31_w'] = bn31_weight
    weight_dict['bn31_b'] = bn31_bias
    weight_dict['bn31_rmean'] = bn31_rmean
    weight_dict['bn31_rvar'] = bn31_rvar

    # linear layer for var(z|x)
    fc32_weight = trained_params['fc32.0.weight'].cpu().numpy()
    fc32_bias = trained_params['fc32.0.bias'].cpu().numpy()
    weight_dict['fc32_w'] = fc32_weight
    weight_dict['fc32_b'] = fc32_bias

    # BN layer
    bn32_weight = trained_params['fc32.1.weight'].cpu().numpy()
    bn32_bias = trained_params['fc32.1.bias'].cpu().numpy()
    bn32_rmean = trained_params['fc32.1.running_mean'].cpu().numpy()
    bn32_rvar = trained_params['fc32.1.running_var'].cpu().numpy()
    weight_dict['bn32_w'] = bn32_weight
    weight_dict['bn32_b'] = bn32_bias
    weight_dict['bn32_rmean'] = bn32_rmean
    weight_dict['bn32_rvar'] = bn32_rvar

    return weight_dict


def make_bn_param(model, weight_dict, param_name):
    assert param_name in weight_dict
    print(weight_dict[param_name].size)
    param = model.param_init_net.GivenTensorFill([],
                                                 param_name,
                                                 shape=[weight_dict[param_name].size],
                                                 values=weight_dict[param_name].T)
    return param


def make_nvdm_inference_cf2_model(model,
                                  weight_dict,
                                  vocab_size, hidden_dim, latent_dim):
    data = model.net.AddExternalInput('data')
    # First FC layer in inference net
    fc1 = brew.fc(model, data, 'fc1', dim_in=vocab_size, dim_out=hidden_dim,
                  weight_init=('GivenTensorFill', dict(values=weight_dict['fc1_w'])),
                  bias_init=('GivenTensorFill', dict(values=weight_dict['fc1_b'].T)))
    fc1 = brew.relu(model, fc1, fc1)

    # First BN layer
    # reshape first
    fc1_shape = weight_dict['fc1_w'].shape
    print fc1_shape[0]
    print fc1_shape[1]
    fc1, _ = model.Reshape(fc1, ['fc1', '_'], shape=[1, hidden_dim, 1, 1])
    # model.Reshape(fc1, shape=[])? # TODO
    bn1_w = make_bn_param(model, weight_dict, 'bn1_w')
    bn1_b = make_bn_param(model, weight_dict, 'bn1_b')
    bn1_rmean = make_bn_param(model, weight_dict, 'bn1_rmean')
    bn1_rvar = make_bn_param(model, weight_dict, 'bn1_rvar')

    model.net.SpatialBN([fc1,
                         bn1_w,
                         bn1_b,
                         bn1_rmean,
                         bn1_rvar],
                        [fc1],
                        is_test=True,
                        epsilon=1e-05  # be the same with PyTorch
                        )
    # reshape back #TODO
    fc1, _ = model.Reshape(fc1, ['fc1', '_'], shape=[1, hidden_dim])
    print('good')
    # Second FC layer in inference net
    fc2 = brew.fc(model, fc1, 'fc2', dim_in=hidden_dim, dim_out=hidden_dim,
                  weight_init=('GivenTensorFill', dict(values=weight_dict['fc2_w'])),
                  bias_init=('GivenTensorFill', dict(values=weight_dict['fc2_b'].T)))
    fc2 = brew.relu(model, fc2, fc2)

    # reshape first
    fc2, _ = model.Reshape(fc2, ['fc2', '_'], shape=[1, hidden_dim, 1, 1])
    print('good')
    # model.Reshape(fc2, shape=[])? # TODO
    bn2_w = make_bn_param(model, weight_dict, 'bn2_w')
    bn2_b = make_bn_param(model, weight_dict, 'bn2_b')
    bn2_rmean = make_bn_param(model, weight_dict, 'bn2_rmean')
    bn2_rvar = make_bn_param(model, weight_dict, 'bn2_rvar')

    model.net.SpatialBN([fc2,
                         bn2_w,
                         bn2_b,
                         bn2_rmean,
                         bn2_rvar],
                        [fc2],
                        is_test=True,
                        epsilon=1e-05  # be the same with PyTorch
                        )
    # reshape back #TODO
    fc2, _ = model.Reshape(fc2, ['fc2', '_'], shape=[1, hidden_dim])
    print('good')

    # Linear layer for the mean
    fc31 = brew.fc(model, fc2, 'fc31', dim_in=hidden_dim, dim_out=latent_dim,
                   weight_init=('GivenTensorFill', dict(values=weight_dict['fc31_w'])),
                   bias_init=('GivenTensorFill', dict(values=weight_dict['fc31_b'].T)))

    # reshape first
    fc31, _ = model.Reshape(fc31, ['fc31', '_'], shape=[1, latent_dim, 1, 1])
    print('good')
    # model.Reshape(fc31, shape=[])? # TODO
    bn31_w = make_bn_param(model, weight_dict, 'bn31_w')
    bn31_b = make_bn_param(model, weight_dict, 'bn31_b')
    bn31_rmean = make_bn_param(model, weight_dict, 'bn31_rmean')
    bn31_rvar = make_bn_param(model, weight_dict, 'bn31_rvar')

    model.net.SpatialBN([fc31,
                         bn31_w,
                         bn31_b,
                         bn31_rmean,
                         bn31_rvar],
                        [fc31],
                        is_test=True,
                        epsilon=1e-05  # be the same with PyTorch
                        )
    # reshape back #TODO
    fc31, _ = model.Reshape(fc31, ['fc31', '_'], shape=[1, latent_dim])
    # Linear layer for the log-variance
    fc32 = brew.fc(model, fc2, 'fc32', dim_in=hidden_dim, dim_out=latent_dim,
                   weight_init=('GivenTensorFill', dict(values=weight_dict['fc32_w'])),
                   bias_init=('GivenTensorFill', dict(values=weight_dict['fc32_b'].T)))

    # reshape first
    fc32, _ = model.Reshape(fc32, ['fc32', '_'], shape=[1, latent_dim, 1, 1])
    print('good')
    # model.Reshape(fc32, shape=[])? # TODO
    bn32_w = make_bn_param(model, weight_dict, 'bn32_w')
    bn32_b = make_bn_param(model, weight_dict, 'bn32_b')
    bn32_rmean = make_bn_param(model, weight_dict, 'bn32_rmean')
    bn32_rvar = make_bn_param(model, weight_dict, 'bn32_rvar')

    model.net.SpatialBN([fc32,
                         bn32_w,
                         bn32_b,
                         bn32_rmean,
                         bn32_rvar],
                        [fc32],
                        is_test=True,
                        epsilon=1e-05  # be the same with PyTorch
                        )
    # reshape back #TODO
    fc32, _ = model.Reshape(fc32, ['fc32', '_'], shape=[1, latent_dim])

    model.net.AddExternalOutput(fc31)
    model.net.AddExternalOutput(fc32)
    return fc31, fc32


def save_caffe2_model(model, model_path):
    # two formats are written

    # 1. write to pb formats
    print 'Saving to pb model files'

    # write predict net
    with open('%s_predict.pb' % model_path, 'wb') as f:
        f.write(model.net._net.SerializeToString())

    # write init net
    init_net = caffe2_pb2.NetDef()
    for param in model.params:
        blob = workspace.FetchBlob(param)
        shape = blob.shape
        op = core.CreateOperator('GivenTensorFill',
                                 [],
                                 [param],
                                 arg=[
                                     utils.MakeArgument('shape', shape),
                                     utils.MakeArgument('values', blob)
                                 ])

        init_net.op.extend([op])

    # write dummy data blob to the dump
    data_blob = workspace.FetchBlob('data')
    init_net.op.extend([
        core.CreateOperator('GivenTensorFill',
                            [],
                            ['data'],
                            arg=[
                                utils.MakeArgument('shape', data_blob.shape),
                                utils.MakeArgument('values', data_blob)
                            ])])

    with open('%s_init.pb' % model_path, 'wb') as f:
        f.write(init_net.SerializeToString())

    # 2. write to logfile_db format
    print 'Saving to logfile_db model file'
    predictor_export_meta = predictor_exporter.PredictorExportMeta(
        predict_net=model.net.Proto(),
        parameters=model.GetParams(),
        inputs=['data'],
        outputs=['fc31', 'fc32']
    )

    predictor_exporter.save_to_db(
        db_type='log_file_db',
        db_destination='%s.logdb' % model_path,
        predictor_export_meta=predictor_export_meta,
    )


if __name__ == '__main__':
    if len(sys.argv) != 3:
        print 'Usage: python %s <pytorch_model_path> <caffe2_model_prefix>' \
            % sys.argv[0]
        sys.exit(1)

    pytorch_model_path = sys.argv[1]
    caffe2_model_prefix = sys.argv[2]

    # load weights from PyTorch NVDM model
    weight_dict = load_nvdm_inference_pt_model(pytorch_model_path)

    # get dims from NVDM
    vocab_size = weight_dict['fc1_w'].shape[1]
    hidden_dim = weight_dict['fc1_w'].shape[0]
    latent_dim = weight_dict['fc31_w'].shape[0]
    print 'vocab size: ', vocab_size
    print 'hidden size: ', hidden_dim
    print 'latent dim: ', latent_dim

    # dummy batch size needed for Caffe2 model
    batch_size = 1
    data = np.array(np.random.randn(batch_size, vocab_size)).astype('float32')
    data = np.ones((batch_size, vocab_size), dtype=np.float32)
    weight_dict['data'] = data

    workspace.FeedBlob('data', data)   # device_option=device_opts)

    # define caffe2 model and load weights
    cf2_model = model_helper.ModelHelper(name='NVDM-cf2')
    inference_mean, inference_variance = make_nvdm_inference_cf2_model(
        cf2_model,
        weight_dict,
        vocab_size,
        hidden_dim,
        latent_dim,
    )

    workspace.RunNetOnce(cf2_model.param_init_net)
    workspace.RunNetOnce(cf2_model.net)
    print(workspace.FetchBlob(inference_mean))

    # save caffe2 model
    save_caffe2_model(cf2_model, caffe2_model_prefix)