Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $ cat t395.cu
- #include <stdio.h>
- #define INCS 10
- #define nTPB 256
- #define nBLKS 10
- #define USE_ATOMICS
- #define cudaCheckErrors(msg) \
- do { \
- cudaError_t __err = cudaGetLastError(); \
- if (__err != cudaSuccess) { \
- fprintf(stderr, "Fatal error: %s (%s at %s:%d)\n", \
- msg, cudaGetErrorString(__err), \
- __FILE__, __LINE__); \
- fprintf(stderr, "*** FAILED - ABORTING\n"); \
- exit(1); \
- } \
- } while (0)
- __global__ void mykernel(volatile int *data){
- for (int i = 0; i < INCS; i++){
- #ifdef USE_ATOMICS
- atomicAdd((int *)data,1);
- #else
- (*data)++;
- #endif
- }
- }
- int main(){
- // simple test to demonstrate reading progress data from kernel
- volatile int *d_data, *h_data;
- cudaSetDeviceFlags(cudaDeviceMapHost);
- cudaCheckErrors("cudaSetDeviceFlags error");
- cudaHostAlloc((void **)&h_data, sizeof(int), cudaHostAllocMapped);
- cudaCheckErrors("cudaHostAlloc error");
- cudaHostGetDevicePointer((int **)&d_data, (int *)h_data, 0);
- cudaCheckErrors("cudaHostGetDevicePointer error");
- *h_data = 0;
- printf("kernel starting\n");
- mykernel<<<nBLKS,nTPB>>>(d_data);
- cudaDeviceSynchronize();
- cudaCheckErrors("kernel fail");
- if ((INCS*nBLKS*nTPB) != *h_data) {printf("fail! should be: %d, was: %d\n", (INCS*nBLKS*nTPB), *h_data); return 1;}
- printf("Success!\n");
- return 0;
- }
- $ nvcc -arch=sm_20 -o t395 t395.cu
- $ ./t395
- kernel starting
- Success!
- $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement