Advertisement
Guest User

Untitled

a guest
Nov 13th, 2018
165
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 4.68 KB | None | 0 0
  1. /home/panyala/anaconda3/lib/python3.6/site-packages/torch/cuda/__init__.py:95: UserWarning:
  2.     Found GPU0 Tesla V100-PCIE-16GB which requires CUDA_VERSION >= 9000 for
  3.      optimal performance and fast startup time, but your PyTorch was compiled
  4.      with CUDA_VERSION 8000. Please install the correct PyTorch binary
  5.      using instructions from http://pytorch.org
  6.  
  7.   warnings.warn(incorrect_binary_warn % (d, name, 9000, CUDA_VERSION))
  8. /home/panyala/anaconda3/lib/python3.6/site-packages/torch/cuda/__init__.py:95: UserWarning:
  9.     Found GPU1 Tesla V100-PCIE-16GB which requires CUDA_VERSION >= 9000 for
  10.      optimal performance and fast startup time, but your PyTorch was compiled
  11.      with CUDA_VERSION 8000. Please install the correct PyTorch binary
  12.      using instructions from http://pytorch.org
  13.  
  14.   warnings.warn(incorrect_binary_warn % (d, name, 9000, CUDA_VERSION))
  15. ^A[WARNING]: No mapping options passed, 'naive' type mapping options will be used and will likely have bad performance. See help(your_layer.__call__) for setting mapping options.
  16. WARNING: Logging before InitGoogleLogging() is written to STDERR
  17. W1113 10:52:01.228185 40812 rtc.cc:103] Compilation failure for nvrtc(NVRTC_ERROR_INVALID_OPTION):
  18. nvrtc: error: invalid value for --gpu-architecture (-arch)
  19.  source:
  20. template<typename T> inline __device__ T floord(T n, T d) {
  21.   return n < 0 ? - (-n + d - 1)/d : n / d;
  22. }
  23. #define if_then_else(cond,a,b) (cond) ? (a) : (b);
  24.  
  25. // Halide type handling
  26. typedef int int32;
  27. typedef long int64;
  28. typedef float float32;
  29. typedef double float64;
  30.  
  31. #define inff __int_as_float(0x7f800000)
  32. #define inf __longlong_as_double(0x7ff0000000000000LL)
  33.  
  34. extern "C" {
  35. __global__ void matmul_4_3_5(int32 K, int32 M, int32 N, float32* pC, float32* pA, float32* pB) {
  36.   int b0 = blockIdx.x; int b1 = blockIdx.y; int b2 = blockIdx.z;
  37.   int t0 = threadIdx.x; int t1 = threadIdx.y; int t2 = threadIdx.z;
  38.   float32 (*C)[5] = reinterpret_cast<float32 (*)[5]>(pC);
  39.   float32 (*A)[4] = reinterpret_cast<float32 (*)[4]>(pA);
  40.   float32 (*B)[5] = reinterpret_cast<float32 (*)[5]>(pB);
  41.   C[t1][t0] = 0.000000f;
  42.   for (int c5 = 0; c5 <= 3; c5 += 1) {
  43.     C[t1][t0] = (C[t1][t0] + (A[t1][c5]*B[c5][t0]));
  44.   }
  45. }
  46. }
  47.  
  48. /*
  49. Mapping Options:
  50. tc::MappingOptions::makeNaiveMappingOptions()
  51.     .outerScheduleFusionStrategy(tc::FusionStrategy::Preserve3Coincident)
  52.     .outerScheduleAllowSkewing(false)
  53.     .outerSchedulePositiveOrthant(true)
  54.     .intraTileScheduleFusionStrategy(tc::FusionStrategy::Preserve3Coincident)
  55.     .intraTileScheduleAllowSkewing(false)
  56.     .intraTileSchedulePositiveOrthant(true)
  57.     .tile(32, 32, 32)
  58.     .mapToThreads(32, 8)
  59.     .mapToBlocks(256, 256)
  60.     .unroll(1)
  61.     .tileImperfectlyNested(false)
  62.     .useSharedMemory(false)
  63.     .usePrivateMemory(false)
  64.     .unrollCopyShared(false)
  65.     .matchLibraryCalls(false);
  66. TC version: 8e112e9dccda62c30ef29208a827e783b9a7f156
  67. */
  68. E1113 10:52:01.228243 40812 rtc.cc:106] Compilation failure for nvrtc(NVRTC_ERROR_INVALID_OPTION):
  69. nvrtc: error: invalid value for --gpu-architecture (-arch)
  70.  source:
  71. template<typename T> inline __device__ T floord(T n, T d) {
  72.   return n < 0 ? - (-n + d - 1)/d : n / d;
  73. }
  74. #define if_then_else(cond,a,b) (cond) ? (a) : (b);
  75.  
  76. // Halide type handling
  77. typedef int int32;
  78. typedef long int64;
  79. typedef float float32;
  80. typedef double float64;
  81.  
  82. #define inff __int_as_float(0x7f800000)
  83. #define inf __longlong_as_double(0x7ff0000000000000LL)
  84.  
  85. extern "C" {
  86. __global__ void matmul_4_3_5(int32 K, int32 M, int32 N, float32* pC, float32* pA, float32* pB) {
  87.   int b0 = blockIdx.x; int b1 = blockIdx.y; int b2 = blockIdx.z;
  88.   int t0 = threadIdx.x; int t1 = threadIdx.y; int t2 = threadIdx.z;
  89.   float32 (*C)[5] = reinterpret_cast<float32 (*)[5]>(pC);
  90.   float32 (*A)[4] = reinterpret_cast<float32 (*)[4]>(pA);
  91.   float32 (*B)[5] = reinterpret_cast<float32 (*)[5]>(pB);
  92.   C[t1][t0] = 0.000000f;
  93.   for (int c5 = 0; c5 <= 3; c5 += 1) {
  94.     C[t1][t0] = (C[t1][t0] + (A[t1][c5]*B[c5][t0]));
  95.   }
  96. }
  97. }
  98.  
  99. /*
  100. Mapping Options:
  101. tc::MappingOptions::makeNaiveMappingOptions()
  102.     .outerScheduleFusionStrategy(tc::FusionStrategy::Preserve3Coincident)
  103.     .outerScheduleAllowSkewing(false)
  104.     .outerSchedulePositiveOrthant(true)
  105.     .intraTileScheduleFusionStrategy(tc::FusionStrategy::Preserve3Coincident)
  106.     .intraTileScheduleAllowSkewing(false)
  107.     .intraTileSchedulePositiveOrthant(true)
  108.     .tile(32, 32, 32)
  109.     .mapToThreads(32, 8)
  110.     .mapToBlocks(256, 256)
  111.     .unroll(1)
  112.     .tileImperfectlyNested(false)
  113.     .useSharedMemory(false)
  114.     .usePrivateMemory(false)
  115.     .unrollCopyShared(false)
  116.     .matchLibraryCalls(false);
  117. TC version: 8e112e9dccda62c30ef29208a827e783b9a7f156
  118. */
  119. [ERROR]: Caught Exception: Could not compile function
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement