Advertisement
Guest User

Untitled

a guest
Sep 20th, 2017
59
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.40 KB | None | 0 0
  1. from __future__ import absolute_import, print_function, division
  2.  
  3. import sys, math
  4. import numpy as np
  5. import theano
  6. import theano.tests.unittest_tools as utt
  7. from theano.gpuarray.basic_ops import infer_context_name, as_gpuarray_variable, gpu_contiguous, GpuAllocEmpty
  8. from theano.gpuarray.dnn import GpuDnnConvDesc, GpuDnnConvGradI, get_precision
  9. from theano.gpuarray.tests.config import mode_with_gpu, ref_cast
  10. from theano.tensor.nnet.corr import CorrMM_gradInputs
  11. from theano.tensor.nnet.abstract_conv import get_conv_output_shape, assert_conv_shape
  12. from theano.tensor.opt import Assert
  13. from theano.tensor.utils import hash_from_ndarray
  14.  
  15. def dnn_gradinput(kerns, topgrad, img_shp, alpha=1, beta=0, out=None, border_mode='valid', subsample=(1, 1),
  16. dilation=(1, 1), conv_mode='conv', algo=None, precision=None):
  17. ctx_name = infer_context_name(kerns, topgrad)
  18.  
  19. kerns = gpu_contiguous(as_gpuarray_variable(kerns, ctx_name))
  20. topgrad = gpu_contiguous(as_gpuarray_variable(topgrad, ctx_name))
  21. img_shp = theano.tensor.as_tensor_variable(img_shp)
  22.  
  23. precision = get_precision(precision, [kerns, topgrad])
  24. desc = GpuDnnConvDesc(border_mode=border_mode, subsample=subsample, dilation=dilation,
  25. conv_mode=conv_mode, precision=precision)(kerns.shape)
  26. if beta == 0:
  27. real_out = GpuAllocEmpty(dtype=kerns.dtype, context_name=ctx_name)(*img_shp)
  28. else:
  29. assert out is not None
  30. out = gpu_contiguous(as_gpuarray_variable(out, ctx_name))
  31. check = Assert('GpuDnnConvGradI: qiven output (for beta not null) does not have expected shape')
  32. real_out = check(out, theano.tensor.all(theano.tensor.eq(out.shape, img_shp)))
  33. return GpuDnnConvGradI(algo=algo)(kerns, topgrad, real_out, desc, alpha, beta)
  34.  
  35. def _next_ten_exponent(val):
  36. # Return exponent for the next ten power that follows val.
  37. # val should be a positive integer.
  38. # Examples:
  39. # for 0 to 9, returns 1 (=> 10**1 == 10)
  40. # for 10 to 99, returns 2 (=> 10**2 == 100)
  41. ten_exponent = 1
  42. while val // 10 > 0:
  43. ten_exponent += 1
  44. val //= 10
  45. return ten_exponent
  46.  
  47. def scale_numpy_arrays_inplace(A, B, alpha):
  48. scale_factor = 1
  49. # Scale down simultaneously A and B if alpha is not 1.
  50. if alpha != 1:
  51. scale_factor *= alpha
  52. # Normalize A and B simultaneously so that any values in these tensors are in interval [0, 1)
  53. max_a = math.floor(abs(A.max()))
  54. max_b = math.floor(abs(B.max()))
  55. if max_a or max_b:
  56. m_a = _next_ten_exponent(max_a)
  57. m_b = _next_ten_exponent(max_b)
  58. max_m = max(m_a, m_b)
  59. scale_factor *= 10 ** max_m
  60. if scale_factor != 1:
  61. A /= scale_factor
  62. B /= scale_factor
  63.  
  64. def array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator=np.random.random):
  65. out_shp = get_conv_output_shape(inputs_shape, filters_shape, border_mode, subsample, dilation)
  66. out_shp = assert_conv_shape(out_shp)
  67. return allocator(out_shp).astype(dtype)
  68.  
  69. def run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.random.random):
  70. inputs_shape, filters_shape, subsample, dilation, border_mode, conv_mode, alpha, beta = parameters
  71.  
  72. if beta == 0:
  73. inputs_val = None
  74. else:
  75. inputs_val = allocator(inputs_shape).astype(dtype)
  76. inputs_val /= 10
  77. filters_val = allocator(filters_shape).astype(dtype)
  78. topgrad_val = array_like_conv_output(inputs_shape, filters_shape, border_mode, subsample, dilation, dtype, allocator)
  79.  
  80. # Scale down the input values to prevent absolute errors in utt.assert_allclose.
  81. filters_val /= 10
  82. topgrad_val /= 10
  83.  
  84. filters = theano.shared(filters_val)
  85. topgrad = theano.shared(topgrad_val)
  86.  
  87. # Compile a theano function for the cuDNN implementation
  88. grad_i = dnn_gradinput(filters, topgrad, inputs_shape, alpha=alpha, beta=beta, out=inputs_val,
  89. border_mode=border_mode, subsample=subsample, dilation=dilation,
  90. conv_mode=conv_mode, algo=algo, precision=precision)
  91.  
  92. f = theano.function([], grad_i, mode=mode_with_gpu)
  93.  
  94. # If conv_mode is 'conv' the reference implementation should use
  95. # filters flipped according to the width, height and time axis
  96. if conv_mode == 'conv':
  97. if filters.ndim == 5:
  98. flipped_filters = filters[:, :, ::-1, ::-1, ::-1]
  99. else:
  100. flipped_filters = filters[:, :, ::-1, ::-1]
  101. else:
  102. flipped_filters = filters
  103.  
  104. # Compile a theano function for the reference implementation
  105. grad_i_ref = CorrMM_gradInputs(border_mode=border_mode,
  106. subsample=subsample,
  107. filter_dilation=dilation
  108. )(ref_cast(flipped_filters),
  109. ref_cast(topgrad),
  110. inputs_shape[2:])
  111. f_ref = theano.function([], grad_i_ref, mode="FAST_RUN")
  112.  
  113. # Compare the results of the two implementations
  114. res_ref = f_ref()
  115. res = np.asarray(f())
  116.  
  117. atol = 5e-2 if dtype == 'float16' else None
  118. rtol = atol
  119. if beta == 0:
  120. cpu_res = alpha * res_ref
  121. else:
  122. cpu_res = alpha * res_ref + beta * inputs_val
  123. print('Hash inputs_val :', None if inputs_val is None else hash_from_ndarray(inputs_val))
  124. print('Hash filters_val :', hash_from_ndarray(filters_val))
  125. print('Hash topgrad_val :', hash_from_ndarray(topgrad_val))
  126. print('Hash CPU res before scaling:', hash_from_ndarray(cpu_res))
  127. print('Hash res before scaling:', hash_from_ndarray(res))
  128. scale_numpy_arrays_inplace(cpu_res, res, alpha)
  129. print('Hash CPU res after scaling:', hash_from_ndarray(cpu_res))
  130. print('Hash res after scaling:', hash_from_ndarray(res))
  131. utt.assert_allclose(cpu_res, res, rtol=rtol, atol=atol)
  132. print('CPU')
  133. print(cpu_res.flatten()[:5], cpu_res.flatten()[-5:])
  134. print('res')
  135. print(res.flatten()[:5], res.flatten()[-5:])
  136.  
  137. algo = 'deterministic'
  138. dtype = sys.argv[1] if len(sys.argv) > 1 else 'float16'
  139. precision = dtype
  140. parameters = (
  141. (2, 3, 300, 5),
  142. (2, 3, 40, 4),
  143. (1, 1),
  144. (1, 1),
  145. (1, 1),
  146. 'conv',
  147. 2,
  148. -3
  149. )
  150. print(algo, dtype, precision, parameters)
  151. utt.seed_rng(1234)
  152. run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
  153. run_conv_gradinput(algo, dtype, precision, parameters, allocator=np.ones)
  154. # run_conv_gradinput(algo, dtype, precision, parameters)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement