Advertisement
Guest User

Untitled

a guest
Jul 29th, 2014
387
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.79 KB | None | 0 0
  1.  
  2. $ su
  3. Password:
  4. # nvidia-smi -i 0 -c 3
  5. Set compute mode to EXCLUSIVE_PROCESS for GPU 0000:02:00.0.
  6. All done.
  7. # exit
  8. exit
  9. $ cat t349.cu
  10. #include <stdio.h>
  11. #include <stdlib.h>
  12. #include <pthread.h>
  13.  
  14. #define ARR_SIZE 10
  15. #define NUM_DEVICE 1
  16. #define NUM_THR 4
  17.  
  18. typedef struct {
  19. int *arr;
  20. int *dev_arr;
  21. int *dev_result;
  22. int *result;
  23. int dev_num;
  24. int thr_num;
  25. } cuda_st;
  26.  
  27. __global__ void kernel_fc(int *dev_arr, int *dev_result)
  28. {
  29. int idx = threadIdx.x;
  30. printf("dev_arr[%d] = %d\n", idx, dev_arr[idx]);
  31. atomicAdd(dev_result, dev_arr[idx]);
  32. }
  33.  
  34. void *thread_func(void* struc)
  35. {
  36. cuda_st * data = (cuda_st*)struc;
  37. printf("thread %d func start\n", data->thr_num);
  38. printf("arr %d = ", data->dev_num);
  39. for(int i=0; i<10; i++) {
  40. printf("%d ", data->arr[i]);
  41. }
  42. printf("\n");
  43. cudaSetDevice(data->dev_num);
  44. cudaMemcpy(data->dev_arr, data->arr, sizeof(int)*ARR_SIZE, cudaMemcpyHostToDevice);
  45. kernel_fc<<<1,ARR_SIZE>>>(data->dev_arr, data->dev_result);
  46. cudaMemcpy(data->result, data->dev_result, sizeof(int), cudaMemcpyDeviceToHost);
  47. printf("thread %d func exit\n", data->thr_num);
  48. return NULL;
  49. }
  50.  
  51. int main(void)
  52. {
  53. // Make object
  54. cuda_st cuda[NUM_DEVICE][NUM_THR];
  55.  
  56. // Make thread
  57. pthread_t pthread[NUM_DEVICE*NUM_THR];
  58.  
  59. // Host array memory allocation
  60. int *arr[NUM_DEVICE];
  61. for(int i=0; i<NUM_DEVICE; i++) {
  62. arr[i] = (int*)malloc(sizeof(int)*ARR_SIZE);
  63. }
  64.  
  65. // Fill this host array up with specified data
  66. for(int i=0; i<NUM_DEVICE; i++) {
  67. for(int j=0; j<ARR_SIZE; j++) {
  68. arr[i][j] = i*ARR_SIZE+j;
  69. }
  70. }
  71.  
  72. // To confirm host array data
  73. for(int i=0; i<NUM_DEVICE; i++) {
  74. printf("arr[%d] = ", i);
  75. for(int j=0; j<ARR_SIZE; j++) {
  76. printf("%d ", arr[i][j]);
  77. }
  78. printf("\n");
  79. }
  80.  
  81. // Result memory allocation
  82. int *result[NUM_DEVICE];
  83. for(int i=0; i<NUM_DEVICE; i++) {
  84. result[i] = (int*)malloc(sizeof(int));
  85. memset(result[i], 0, sizeof(int));
  86. }
  87.  
  88. // Device array memory allocation
  89. int *dev_arr[NUM_DEVICE];
  90. for(int i=0; i<NUM_DEVICE; i++) {
  91. cudaSetDevice(i);
  92. cudaMalloc(&dev_arr[i], sizeof(int)*ARR_SIZE);
  93. }
  94.  
  95. // Device result memory allocation
  96. int *dev_result[NUM_DEVICE];
  97. for(int i=0; i<NUM_DEVICE; i++) {
  98. cudaSetDevice(i);
  99. cudaMalloc(&dev_result[i], sizeof(int));
  100. cudaMemset(dev_result[i], 0, sizeof(int));
  101. }
  102.  
  103. // Connect these pointers with object
  104. for (int i=0; i<NUM_DEVICE; i++)
  105. for (int j=0; j<NUM_THR; j++) {
  106. cuda[i][j].arr = arr[i];
  107. cuda[i][j].dev_arr = dev_arr[i];
  108. cuda[i][j].result = result[i];
  109. cuda[i][j].dev_result = dev_result[i];
  110. cuda[i][j].dev_num = i;
  111. cuda[i][j].thr_num = j;
  112. }
  113.  
  114. // Create and excute pthread
  115. for(int i=0; i<NUM_DEVICE; i++)
  116. for (int j=0; j<NUM_THR; j++) {
  117. pthread_create(&pthread[(i*NUM_THR)+j], NULL, thread_func, (void*)&cuda[i][j]);
  118. }
  119.  
  120. // Join pthread
  121. for(int i=0; i<NUM_DEVICE*NUM_THR; i++) {
  122. pthread_join(pthread[i], NULL);
  123. }
  124.  
  125. for(int i=0; i<NUM_DEVICE; i++)
  126. for (int j=0; j < NUM_THR; j++) {
  127. printf("result[%d][%d] = %d\n", i,j, (*cuda[i][j].result));
  128. }
  129.  
  130. return 0;
  131. }
  132.  
  133. $ nvcc -arch=sm_20 -o t349 t349.cu -lpthread
  134. $ cuda-memcheck ./t349
  135. ========= CUDA-MEMCHECK
  136. arr[0] = 0 1 2 3 4 5 6 7 8 9
  137. thread 0 func start
  138. arr 0 = 0 1 2 3 4 5 6 7 8 9
  139. thread 3 func start
  140. arr 0 = thread 1 func start
  141. arr 0 = 0 1 2 3 4 5 6 7 8 9
  142. 0 1 2 3 4 5 6 7 8 9
  143. thread 2 func start
  144. arr 0 = 0 1 2 3 4 5 6 7 8 9
  145. dev_arr[0] = 0
  146. dev_arr[1] = 1
  147. dev_arr[2] = 2
  148. dev_arr[3] = 3
  149. dev_arr[4] = 4
  150. dev_arr[5] = 5
  151. dev_arr[6] = 6
  152. dev_arr[7] = 7
  153. dev_arr[8] = 8
  154. dev_arr[9] = 9
  155. thread 0 func exit
  156. dev_arr[0] = 0
  157. dev_arr[1] = 1
  158. dev_arr[2] = 2
  159. dev_arr[3] = 3
  160. dev_arr[4] = 4
  161. dev_arr[5] = 5
  162. dev_arr[6] = 6
  163. dev_arr[7] = 7
  164. dev_arr[8] = 8
  165. dev_arr[9] = 9
  166. dev_arr[0] = 0
  167. dev_arr[1] = 1
  168. dev_arr[2] = 2
  169. dev_arr[3] = 3
  170. dev_arr[4] = 4
  171. dev_arr[5] = 5
  172. dev_arr[6] = 6
  173. dev_arr[7] = 7
  174. dev_arr[8] = 8
  175. dev_arr[9] = 9
  176. thread 3 func exit
  177. thread 1 func exit
  178. dev_arr[0] = 0
  179. dev_arr[1] = 1
  180. dev_arr[2] = 2
  181. dev_arr[3] = 3
  182. dev_arr[4] = 4
  183. dev_arr[5] = 5
  184. dev_arr[6] = 6
  185. dev_arr[7] = 7
  186. dev_arr[8] = 8
  187. dev_arr[9] = 9
  188. thread 2 func exit
  189. result[0][0] = 180
  190. result[0][1] = 180
  191. result[0][2] = 180
  192. result[0][3] = 180
  193. ========= ERROR SUMMARY: 0 errors
  194. $ su
  195. Password:
  196. # nvidia-smi -i 0 -c 1
  197. Set compute mode to EXCLUSIVE_THREAD for GPU 0000:02:00.0.
  198. All done.
  199. # exit
  200. exit
  201. $ cuda-memcheck ./t349
  202. ========= CUDA-MEMCHECK
  203. arr[0] = 0 1 2 3 4 5 6 7 8 9
  204. thread 0 func start
  205. arr 0 = 0 1 2 3 4 5 6 7 8 9
  206. thread 1 func start
  207. arr 0 = 0 1 2 3 4 5 6 7 8 9
  208. ========= Program hit cudaErrorDeviceAlreadyInUse (error 54) due to "exclusive-thread device already in use by a different thread" on CUDA API call to cudaSetDevice.
  209. ========= Saved host backtrace up to driver entry point at error
  210. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  211. ========= Host Frame:./t349 [0x347d0]
  212. ========= Host Frame:./t349 [0x2b4e]
  213. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  214. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  215. =========
  216. thread 2 func start
  217. arr 0 = 0 1 2 3 4 5 6 7 8 9
  218. thread 3 func start
  219. arr 0 = 0 1 2 3 4 5 6 7 8 9
  220. ========= Program hit cudaErrorDeviceAlreadyInUse (error 54) due to "exclusive-thread device already in use by a different thread" on CUDA API call to cudaSetDevice.
  221. ========= Saved host backtrace up to driver entry point at error
  222. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  223. ========= Host Frame:./t349 [0x347d0]
  224. ========= Host Frame:./t349 [0x2b4e]
  225. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  226. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  227. =========
  228. ========= Program hit cudaErrorDeviceAlreadyInUse (error 54) due to "exclusive-thread device already in use by a different thread" on CUDA API call to cudaSetDevice.
  229. ========= Saved host backtrace up to driver entry point at error
  230. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  231. ========= Host Frame:./t349 [0x347d0]
  232. ========= Host Frame:./t349 [0x2b4e]
  233. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  234. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  235. =========
  236. ========= Program hit cudaErrorDeviceAlreadyInUse (error 54) due to "exclusive-thread device already in use by a different thread" on CUDA API call to cudaSetDevice.
  237. ========= Saved host backtrace up to driver entry point at error
  238. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  239. ========= Host Frame:./t349 [0x347d0]
  240. ========= Host Frame:./t349 [0x2b4e]
  241. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  242. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  243. =========
  244. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
  245. ========= Saved host backtrace up to driver entry point at error
  246. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  247. ========= Host Frame:./t349 [0x3831f]
  248. ========= Host Frame:./t349 [0x2b6c]
  249. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  250. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  251. =========
  252. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
  253. ========= Saved host backtrace up to driver entry point at error
  254. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  255. ========= Host Frame:./t349 [0x3831f]
  256. ========= Host Frame:./t349 [0x2b6c]
  257. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  258. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  259. =========
  260. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
  261. ========= Saved host backtrace up to driver entry point at error
  262. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  263. ========= Host Frame:./t349 [0x3831f]
  264. ========= Host Frame:./t349 [0x2b6c]
  265. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  266. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  267. =========
  268. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
  269. ========= Saved host backtrace up to driver entry point at error
  270. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  271. ========= Host Frame:./t349 [0x3831f]
  272. ========= Host Frame:./t349 [0x2b6c]
  273. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  274. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  275. =========
  276. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaLaunch.
  277. ========= Saved host backtrace up to driver entry point at error
  278. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  279. ========= Host Frame:./t349 [0x3b47e]
  280. ========= Host Frame:./t349 [0x2c55]
  281. ========= Host Frame:./t349 (__gxx_personality_v0 + 0x306) [0x25be]
  282. ========= Host Frame:./t349 (__gxx_personality_v0 + 0x325) [0x25dd]
  283. ========= Host Frame:./t349 [0x2bd4]
  284. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  285. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  286. =========
  287. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaLaunch.
  288. ========= Saved host backtrace up to driver entry point at error
  289. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  290. ========= Host Frame:./t349 [0x3b47e]
  291. ========= Host Frame:./t349 [0x2c55]
  292. ========= Host Frame:./t349 (__gxx_personality_v0 + 0x306) [0x25be]
  293. ========= Host Frame:./t349 (__gxx_personality_v0 + 0x325) [0x25dd]
  294. ========= Host Frame:./t349 [0x2bd4]
  295. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  296. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  297. =========
  298. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaLaunch.
  299. ========= Saved host backtrace up to driver entry point at error
  300. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  301. ========= Host Frame:./t349 [0x3b47e]
  302. ========= Host Frame:./t349 [0x2c55]
  303. ========= Host Frame:./t349 (__gxx_personality_v0 + 0x306) [0x25be]
  304. ========= Host Frame:./t349 (__gxx_personality_v0 + 0x325) [0x25dd]
  305. ========= Host Frame:./t349 [0x2bd4]
  306. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  307. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  308. =========
  309. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
  310. ========= Saved host backtrace up to driver entry point at error
  311. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  312. ========= Host Frame:./t349 [0x3831f]
  313. ========= Host Frame:./t349 [0x2bf3]
  314. thread 1 func exit
  315. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  316. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  317. =========
  318. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaLaunch.
  319. ========= Saved host backtrace up to driver entry point at error
  320. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  321. ========= Host Frame:./t349 [0x3b47e]
  322. ========= Host Frame:./t349 [0x2c55]
  323. ========= Host Frame:./t349 (__gxx_personality_v0 + 0x306) [0x25be]
  324. ========= Host Frame:./t349 (__gxx_personality_v0 + 0x325) [0x25dd]
  325. ========= Host Frame:./t349 [0x2bd4]
  326. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  327. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  328. =========
  329. thread 0 func exit
  330. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
  331. ========= Saved host backtrace up to driver entry point at error
  332. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  333. ========= Host Frame:./t349 [0x3831f]
  334. ========= Host Frame:./t349 [0x2bf3]
  335. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  336. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  337. =========
  338. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
  339. thread 2 func exit
  340. ========= Saved host backtrace up to driver entry point at error
  341. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  342. ========= Host Frame:./t349 [0x3831f]
  343. ========= Host Frame:./t349 [0x2bf3]
  344. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  345. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  346. =========
  347. ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
  348. thread 3 func exit
  349. ========= Saved host backtrace up to driver entry point at error
  350. ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
  351. ========= Host Frame:./t349 [0x3831f]
  352. ========= Host Frame:./t349 [0x2bf3]
  353. ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
  354. result[0][0] = 0
  355. ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
  356. result[0][1] = 0
  357. =========
  358. result[0][2] = 0
  359. result[0][3] = 0
  360. ========= ERROR SUMMARY: 16 errors
  361. $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement