Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from __future__ import absolute_import, print_function, division
- import sys, math
- import numpy as np
- import theano
- import theano.tests.unittest_tools as utt
- from theano.gpuarray.basic_ops import infer_context_name, as_gpuarray_variable, gpu_contiguous, GpuAllocEmpty
- from theano.gpuarray.dnn import GpuDnnConvDesc, GpuDnnConvGradI, get_precision
- from theano.gpuarray.tests.config import mode_with_gpu, ref_cast
- from theano.tensor.nnet.corr import CorrMM_gradInputs
- from theano.tensor.nnet.abstract_conv import get_conv_output_shape, assert_conv_shape
- from theano.tensor.opt import Assert
- from theano.tensor.utils import hash_from_ndarray
- def dnn_gradinput(kerns, topgrad, img_shp, alpha=1, beta=0, out=None, border_mode='valid', subsample=(1, 1),
- dilation=(1, 1), conv_mode='conv', algo=None, precision=None):
- ctx_name = infer_context_name(kerns, topgrad)
- kerns = gpu_contiguous(as_gpuarray_variable(kerns, ctx_name))
- topgrad = gpu_contiguous(as_gpuarray_variable(topgrad, ctx_name))
- img_shp = theano.tensor.as_tensor_variable(img_shp)
- precision = get_precision(precision, [kerns, topgrad])
- desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
- conv_mode=conv_mode, precision=precision)(kerns.shape)
- if beta == 0:
- real_out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
- else:
- assert out is not None
- out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
- check = Assert('GpuDnnConvGradI: qiven output (for beta not null) does not have expected shape')
- real_out = check(out, theano.tensor.all(theano.tensor.eq(out.shape, img_shp)))
- return GpuDnnConvGradI(algo=algo)(kerns, topgrad, real_out, desc, alpha, beta)
- def _next_ten_exponent(val):
- # Return exponent for the next ten power that follows val.
- # val should be a positive integer.
- # Examples:
- # for 0 to 9, returns 1 (=> 10**1 == 10)
- # for 10 to 99, returns 2 (=> 10**2 == 100)
- ten_exponent = 1
- while val // 10 > 0:
- ten_exponent += 1
- val //= 10
- return ten_exponent
- def scale_numpy_arrays_inplace(A, B, alpha):
- scale_factor = 1
- # Scale down simultaneously A and B if alpha is not 1.
- if alpha != 1:
- scale_factor *= alpha
- # Normalize A and B simultaneously so that any values in these tensors are in interval [0, 1)
- max_a = math.floor(abs(A.max()))
- max_b = math.floor(abs(B.max()))
- if max_a or max_b:
- m_a = _next_ten_exponent(max_a)
- m_b = _next_ten_exponent(max_b)
- max_m = max(m_a, m_b)
- scale_factor *= 10 ** max_m
- if scale_factor != 1:
- A /= scale_factor
- B /= scale_factor
- def array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator=np.random.random):
- out_shp = get_conv_output_shape(inputs_shape, filters_shape, border_mode, subsample, dilation)
- out_shp = assert_conv_shape(out_shp)
- return allocator(out_shp).astype(dtype)
- def run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.random.random):
- inputs_shape, filters_shape, subsample, dilation, border_mode, conv_mode, alpha, beta = parameters
- if beta == 0:
- inputs_val = None
- else:
- inputs_val = allocator(inputs_shape).astype(dtype)
- inputs_val /= 10
- filters_val = allocator(filters_shape).astype(dtype)
- topgrad_val = array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator)
- # Scale down the input values to prevent absolute errors in utt.assert_allclose.
- filters_val /= 10
- topgrad_val /= 10
- filters = theano.shared(filters_val)
- topgrad = theano.shared(topgrad_val)
- # Compile a theano function for the cuDNN implementation
- grad_i = dnn_gradinput(filters, topgrad, inputs_shape, alpha=alpha, beta=beta, out=inputs_val,
- border_mode=border_mode, subsample=subsample, dilation=dilation,
- conv_mode=conv_mode, algo=algo, precision=precision)
- f = theano.function([], grad_i, mode=mode_with_gpu)
- # If conv_mode is 'conv' the reference implementation should use
- # filters flipped according to the width, height and time axis
- if conv_mode == 'conv':
- if filters.ndim == 5:
- flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
- else:
- flipped_filters = filters[:, :, ::-1, ::-1]
- else:
- flipped_filters = filters
- # Compile a theano function for the reference implementation
- grad_i_ref = CorrMM_gradInputs(border_mode=border_mode,
- subsample=subsample,
- filter_dilation=dilation
- )(ref_cast(flipped_filters),
- ref_cast(topgrad),
- inputs_shape[2:])
- f_ref = theano.function([], grad_i_ref, mode="FAST_RUN")
- # Compare the results of the two implementations
- res_ref = f_ref()
- res = np.asarray(f())
- atol = 5e-2 if dtype == 'float16' else None
- rtol = atol
- if beta == 0:
- cpu_res = alpha * res_ref
- else:
- cpu_res = alpha * res_ref + beta * inputs_val
- print('Hash inputs_val :', None if inputs_val is None else hash_from_ndarray(inputs_val))
- print('Hash filters_val :', hash_from_ndarray(filters_val))
- print('Hash topgrad_val :', hash_from_ndarray(topgrad_val))
- print('Hash CPU res before scaling:', hash_from_ndarray(cpu_res))
- print('Hash res before scaling:', hash_from_ndarray(res))
- scale_numpy_arrays_inplace(cpu_res, res, alpha)
- print('Hash CPU res after scaling:', hash_from_ndarray(cpu_res))
- print('Hash res after scaling:', hash_from_ndarray(res))
- utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
- print('CPU')
- print(cpu_res.flatten()[:5], cpu_res.flatten()[-5:])
- print('res')
- print(res.flatten()[:5], res.flatten()[-5:])
- algo = 'deterministic'
- dtype = sys.argv[1] if len(sys.argv) > 1 else 'float16'
- precision = dtype
- parameters = (
- (2, 3, 300, 5),
- (2, 3, 40, 4),
- (1, 1),
- (1, 1),
- (1, 1),
- 'conv',
- 2,
- -3
- )
- print(algo, dtype, precision, parameters)
- utt.seed_rng(1234)
- run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
- run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
- # run_conv_gradinput(algo, dtype, precision, parameters)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement