Untitled

from __future__ import absolute_import, print_function, division

import sys, math
import numpy as np
import theano
import theano.tests.unittest_tools as utt
from theano.gpuarray.basic_ops import infer_context_name, as_gpuarray_variable, gpu_contiguous, GpuAllocEmpty
from theano.gpuarray.dnn import GpuDnnConvDesc, GpuDnnConvGradI, get_precision
from theano.gpuarray.tests.config import mode_with_gpu, ref_cast
from theano.tensor.nnet.corr import CorrMM_gradInputs
from theano.tensor.nnet.abstract_conv import get_conv_output_shape, assert_conv_shape
from theano.tensor.opt import Assert
from theano.tensor.utils import hash_from_ndarray

def dnn_gradinput(kerns, topgrad, img_shp, alpha=1, beta=0, out=None, border_mode='valid', subsample=(1, 1),
                  dilation=(1, 1), conv_mode='conv', algo=None, precision=None):
    ctx_name = infer_context_name(kerns, topgrad)

    kerns = gpu_contiguous(as_gpuarray_variable(kerns, ctx_name))
    topgrad = gpu_contiguous(as_gpuarray_variable(topgrad, ctx_name))
    img_shp = theano.tensor.as_tensor_variable(img_shp)

    precision = get_precision(precision, [kerns, topgrad])
    desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
                          conv_mode=conv_mode, precision=precision)(kerns.shape)
    if beta == 0:
        real_out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
    else:
        assert out is not None
        out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
        check = Assert('GpuDnnConvGradI: qiven output (for beta not null) does not have expected shape')
        real_out = check(out, theano.tensor.all(theano.tensor.eq(out.shape, img_shp)))
    return GpuDnnConvGradI(algo=algo)(kerns, topgrad, real_out, desc, alpha, beta)

def _next_ten_exponent(val):
    # Return exponent for the next ten power that follows val.
    # val should be a positive integer.
    # Examples:
    # for 0 to 9, returns 1 (=> 10**1 == 10)
    # for 10 to 99, returns 2 (=> 10**2 == 100)
    ten_exponent = 1
    while val // 10 > 0:
        ten_exponent += 1
        val //= 10
    return ten_exponent

def scale_numpy_arrays_inplace(A, B, alpha):
    scale_factor = 1
    # Scale down simultaneously A and B if alpha is not 1.
    if alpha != 1:
        scale_factor *= alpha
    # Normalize A and B simultaneously so that any values in these tensors are in interval [0, 1)
    max_a = math.floor(abs(A.max()))
    max_b = math.floor(abs(B.max()))
    if max_a or max_b:
        m_a = _next_ten_exponent(max_a)
        m_b = _next_ten_exponent(max_b)
        max_m = max(m_a, m_b)
        scale_factor *= 10 ** max_m
    if scale_factor != 1:
        A /= scale_factor
        B /= scale_factor

def array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator=np.random.random):
    out_shp = get_conv_output_shape(inputs_shape, filters_shape, border_mode, subsample, dilation)
    out_shp = assert_conv_shape(out_shp)
    return allocator(out_shp).astype(dtype)

def run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.random.random):
    inputs_shape, filters_shape, subsample, dilation, border_mode, conv_mode, alpha, beta = parameters

    if beta == 0:
        inputs_val = None
    else:
        inputs_val = allocator(inputs_shape).astype(dtype)
        inputs_val /= 10
    filters_val = allocator(filters_shape).astype(dtype)
    topgrad_val = array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator)

    # Scale down the input values to prevent absolute errors in utt.assert_allclose.
    filters_val /= 10
    topgrad_val /= 10

    filters = theano.shared(filters_val)
    topgrad = theano.shared(topgrad_val)

    # Compile a theano function for the cuDNN implementation
    grad_i = dnn_gradinput(filters, topgrad, inputs_shape, alpha=alpha, beta=beta, out=inputs_val,
                           border_mode=border_mode, subsample=subsample, dilation=dilation,
                           conv_mode=conv_mode, algo=algo, precision=precision)

    f = theano.function([], grad_i, mode=mode_with_gpu)

    # If conv_mode is 'conv' the reference implementation should use
    # filters flipped according to the width, height and time axis
    if conv_mode == 'conv':
        if filters.ndim == 5:
            flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
        else:
            flipped_filters = filters[:, :, ::-1, ::-1]
    else:
        flipped_filters = filters

    # Compile a theano function for the reference implementation
    grad_i_ref = CorrMM_gradInputs(border_mode=border_mode,
                                   subsample=subsample,
                                   filter_dilation=dilation
                                   )(ref_cast(flipped_filters),
                                     ref_cast(topgrad),
                                     inputs_shape[2:])
    f_ref = theano.function([], grad_i_ref, mode="FAST_RUN")

    # Compare the results of the two implementations
    res_ref = f_ref()
    res = np.asarray(f())

    atol = 5e-2 if dtype == 'float16' else None
    rtol = atol
    if beta == 0:
        cpu_res = alpha * res_ref
    else:
        cpu_res = alpha * res_ref + beta * inputs_val
    print('Hash inputs_val            :', None if inputs_val is None else hash_from_ndarray(inputs_val))
    print('Hash filters_val           :', hash_from_ndarray(filters_val))
    print('Hash topgrad_val           :', hash_from_ndarray(topgrad_val))
    print('Hash CPU res before scaling:', hash_from_ndarray(cpu_res))
    print('Hash     res before scaling:', hash_from_ndarray(res))
    scale_numpy_arrays_inplace(cpu_res, res, alpha)
    print('Hash CPU res after  scaling:', hash_from_ndarray(cpu_res))
    print('Hash     res after  scaling:', hash_from_ndarray(res))
    utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
    print('CPU')
    print(cpu_res.flatten()[:5], cpu_res.flatten()[-5:])
    print('res')
    print(res.flatten()[:5], res.flatten()[-5:])

algo = 'deterministic'
dtype = sys.argv[1] if len(sys.argv) > 1 else 'float16'
precision = dtype
parameters = (
    (2, 3, 300, 5),
    (2, 3, 40, 4),
    (1, 1),
    (1, 1),
    (1, 1),
    'conv',
    2,
    -3
)
print(algo, dtype, precision, parameters)
utt.seed_rng(1234)
run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
# run_conv_gradinput(algo, dtype, precision, parameters)