Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $ su
- Password:
- # nvidia-smi -i 0 -c 3
- Set compute mode to EXCLUSIVE_PROCESS for GPU 0000:02:00.0.
- All done.
- # exit
- exit
- $ cat t349.cu
- #include <stdio.h>
- #include <stdlib.h>
- #include <pthread.h>
- #define ARR_SIZE 10
- #define NUM_DEVICE 1
- #define NUM_THR 4
- typedef struct {
- int *arr;
- int *dev_arr;
- int *dev_result;
- int *result;
- int dev_num;
- int thr_num;
- } cuda_st;
- __global__ void kernel_fc(int *dev_arr, int *dev_result)
- {
- int idx = threadIdx.x;
- printf("dev_arr[%d] = %d\n", idx, dev_arr[idx]);
- atomicAdd(dev_result, dev_arr[idx]);
- }
- void *thread_func(void* struc)
- {
- cuda_st * data = (cuda_st*)struc;
- printf("thread %d func start\n", data->thr_num);
- printf("arr %d = ", data->dev_num);
- for(int i=0; i<10; i++) {
- printf("%d ", data->arr[i]);
- }
- printf("\n");
- cudaSetDevice(data->dev_num);
- cudaMemcpy(data->dev_arr, data->arr, sizeof(int)*ARR_SIZE, cudaMemcpyHostToDevice);
- kernel_fc<<<1,ARR_SIZE>>>(data->dev_arr, data->dev_result);
- cudaMemcpy(data->result, data->dev_result, sizeof(int), cudaMemcpyDeviceToHost);
- printf("thread %d func exit\n", data->thr_num);
- return NULL;
- }
- int main(void)
- {
- // Make object
- cuda_st cuda[NUM_DEVICE][NUM_THR];
- // Make thread
- pthread_t pthread[NUM_DEVICE*NUM_THR];
- // Host array memory allocation
- int *arr[NUM_DEVICE];
- for(int i=0; i<NUM_DEVICE; i++) {
- arr[i] = (int*)malloc(sizeof(int)*ARR_SIZE);
- }
- // Fill this host array up with specified data
- for(int i=0; i<NUM_DEVICE; i++) {
- for(int j=0; j<ARR_SIZE; j++) {
- arr[i][j] = i*ARR_SIZE+j;
- }
- }
- // To confirm host array data
- for(int i=0; i<NUM_DEVICE; i++) {
- printf("arr[%d] = ", i);
- for(int j=0; j<ARR_SIZE; j++) {
- printf("%d ", arr[i][j]);
- }
- printf("\n");
- }
- // Result memory allocation
- int *result[NUM_DEVICE];
- for(int i=0; i<NUM_DEVICE; i++) {
- result[i] = (int*)malloc(sizeof(int));
- memset(result[i], 0, sizeof(int));
- }
- // Device array memory allocation
- int *dev_arr[NUM_DEVICE];
- for(int i=0; i<NUM_DEVICE; i++) {
- cudaSetDevice(i);
- cudaMalloc(&dev_arr[i], sizeof(int)*ARR_SIZE);
- }
- // Device result memory allocation
- int *dev_result[NUM_DEVICE];
- for(int i=0; i<NUM_DEVICE; i++) {
- cudaSetDevice(i);
- cudaMalloc(&dev_result[i], sizeof(int));
- cudaMemset(dev_result[i], 0, sizeof(int));
- }
- // Connect these pointers with object
- for (int i=0; i<NUM_DEVICE; i++)
- for (int j=0; j<NUM_THR; j++) {
- cuda[i][j].arr = arr[i];
- cuda[i][j].dev_arr = dev_arr[i];
- cuda[i][j].result = result[i];
- cuda[i][j].dev_result = dev_result[i];
- cuda[i][j].dev_num = i;
- cuda[i][j].thr_num = j;
- }
- // Create and excute pthread
- for(int i=0; i<NUM_DEVICE; i++)
- for (int j=0; j<NUM_THR; j++) {
- pthread_create(&pthread[(i*NUM_THR)+j], NULL, thread_func, (void*)&cuda[i][j]);
- }
- // Join pthread
- for(int i=0; i<NUM_DEVICE*NUM_THR; i++) {
- pthread_join(pthread[i], NULL);
- }
- for(int i=0; i<NUM_DEVICE; i++)
- for (int j=0; j < NUM_THR; j++) {
- printf("result[%d][%d] = %d\n", i,j, (*cuda[i][j].result));
- }
- return 0;
- }
- $ nvcc -arch=sm_20 -o t349 t349.cu -lpthread
- $ cuda-memcheck ./t349
- ========= CUDA-MEMCHECK
- arr[0] = 0 1 2 3 4 5 6 7 8 9
- thread 0 func start
- arr 0 = 0 1 2 3 4 5 6 7 8 9
- thread 3 func start
- arr 0 = thread 1 func start
- arr 0 = 0 1 2 3 4 5 6 7 8 9
- 0 1 2 3 4 5 6 7 8 9
- thread 2 func start
- arr 0 = 0 1 2 3 4 5 6 7 8 9
- dev_arr[0] = 0
- dev_arr[1] = 1
- dev_arr[2] = 2
- dev_arr[3] = 3
- dev_arr[4] = 4
- dev_arr[5] = 5
- dev_arr[6] = 6
- dev_arr[7] = 7
- dev_arr[8] = 8
- dev_arr[9] = 9
- thread 0 func exit
- dev_arr[0] = 0
- dev_arr[1] = 1
- dev_arr[2] = 2
- dev_arr[3] = 3
- dev_arr[4] = 4
- dev_arr[5] = 5
- dev_arr[6] = 6
- dev_arr[7] = 7
- dev_arr[8] = 8
- dev_arr[9] = 9
- dev_arr[0] = 0
- dev_arr[1] = 1
- dev_arr[2] = 2
- dev_arr[3] = 3
- dev_arr[4] = 4
- dev_arr[5] = 5
- dev_arr[6] = 6
- dev_arr[7] = 7
- dev_arr[8] = 8
- dev_arr[9] = 9
- thread 3 func exit
- thread 1 func exit
- dev_arr[0] = 0
- dev_arr[1] = 1
- dev_arr[2] = 2
- dev_arr[3] = 3
- dev_arr[4] = 4
- dev_arr[5] = 5
- dev_arr[6] = 6
- dev_arr[7] = 7
- dev_arr[8] = 8
- dev_arr[9] = 9
- thread 2 func exit
- result[0][0] = 180
- result[0][1] = 180
- result[0][2] = 180
- result[0][3] = 180
- ========= ERROR SUMMARY: 0 errors
- $ su
- Password:
- # nvidia-smi -i 0 -c 1
- Set compute mode to EXCLUSIVE_THREAD for GPU 0000:02:00.0.
- All done.
- # exit
- exit
- $ cuda-memcheck ./t349
- ========= CUDA-MEMCHECK
- arr[0] = 0 1 2 3 4 5 6 7 8 9
- thread 0 func start
- arr 0 = 0 1 2 3 4 5 6 7 8 9
- thread 1 func start
- arr 0 = 0 1 2 3 4 5 6 7 8 9
- ========= Program hit cudaErrorDeviceAlreadyInUse (error 54) due to "exclusive-thread device already in use by a different thread" on CUDA API call to cudaSetDevice.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x347d0]
- ========= Host Frame:./t349 [0x2b4e]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- thread 2 func start
- arr 0 = 0 1 2 3 4 5 6 7 8 9
- thread 3 func start
- arr 0 = 0 1 2 3 4 5 6 7 8 9
- ========= Program hit cudaErrorDeviceAlreadyInUse (error 54) due to "exclusive-thread device already in use by a different thread" on CUDA API call to cudaSetDevice.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x347d0]
- ========= Host Frame:./t349 [0x2b4e]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDeviceAlreadyInUse (error 54) due to "exclusive-thread device already in use by a different thread" on CUDA API call to cudaSetDevice.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x347d0]
- ========= Host Frame:./t349 [0x2b4e]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDeviceAlreadyInUse (error 54) due to "exclusive-thread device already in use by a different thread" on CUDA API call to cudaSetDevice.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x347d0]
- ========= Host Frame:./t349 [0x2b4e]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3831f]
- ========= Host Frame:./t349 [0x2b6c]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3831f]
- ========= Host Frame:./t349 [0x2b6c]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3831f]
- ========= Host Frame:./t349 [0x2b6c]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3831f]
- ========= Host Frame:./t349 [0x2b6c]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaLaunch.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3b47e]
- ========= Host Frame:./t349 [0x2c55]
- ========= Host Frame:./t349 (__gxx_personality_v0 + 0x306) [0x25be]
- ========= Host Frame:./t349 (__gxx_personality_v0 + 0x325) [0x25dd]
- ========= Host Frame:./t349 [0x2bd4]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaLaunch.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3b47e]
- ========= Host Frame:./t349 [0x2c55]
- ========= Host Frame:./t349 (__gxx_personality_v0 + 0x306) [0x25be]
- ========= Host Frame:./t349 (__gxx_personality_v0 + 0x325) [0x25dd]
- ========= Host Frame:./t349 [0x2bd4]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaLaunch.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3b47e]
- ========= Host Frame:./t349 [0x2c55]
- ========= Host Frame:./t349 (__gxx_personality_v0 + 0x306) [0x25be]
- ========= Host Frame:./t349 (__gxx_personality_v0 + 0x325) [0x25dd]
- ========= Host Frame:./t349 [0x2bd4]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3831f]
- ========= Host Frame:./t349 [0x2bf3]
- thread 1 func exit
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaLaunch.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3b47e]
- ========= Host Frame:./t349 [0x2c55]
- ========= Host Frame:./t349 (__gxx_personality_v0 + 0x306) [0x25be]
- ========= Host Frame:./t349 (__gxx_personality_v0 + 0x325) [0x25dd]
- ========= Host Frame:./t349 [0x2bd4]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- thread 0 func exit
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3831f]
- ========= Host Frame:./t349 [0x2bf3]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
- thread 2 func exit
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3831f]
- ========= Host Frame:./t349 [0x2bf3]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- =========
- ========= Program hit cudaErrorDevicesUnavailable (error 46) due to "all CUDA-capable devices are busy or unavailable" on CUDA API call to cudaMemcpy.
- thread 3 func exit
- ========= Saved host backtrace up to driver entry point at error
- ========= Host Frame:/usr/lib64/libcuda.so.1 [0x2ee943]
- ========= Host Frame:./t349 [0x3831f]
- ========= Host Frame:./t349 [0x2bf3]
- ========= Host Frame:/lib64/libpthread.so.0 [0x673d]
- result[0][0] = 0
- ========= Host Frame:/lib64/libc.so.6 (clone + 0x6d) [0xd3d1d]
- result[0][1] = 0
- =========
- result[0][2] = 0
- result[0][3] = 0
- ========= ERROR SUMMARY: 16 errors
- $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement