Advertisement
Guest User

Untitled

a guest
Jan 3rd, 2023
88
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 1.18 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <cuda_fp16.h>
  3.  
  4. __device__ void atomic_add(__half* a, float b) {
  5.     #if __CUDA_ARCH__ >= 700 // CUDA 7.0 supports fp16 atomic add
  6.         atomicAdd(a, __float2half(b));
  7.     #else
  8.         // atomicAdd(&__half2float(a), b); // Error: expression must be an lvalue
  9.     #endif
  10. }
  11.  
  12. __global__ void example(float* value, __half* result) {
  13.     atomic_add(result, *value);
  14. }
  15.  
  16. int main() {
  17.     // Allocate host memory
  18.     float hvalue{1.f}, *dvalue;
  19.     __half hresult{0.f}, *dresult;
  20.     // Copy to device
  21.     cudaMalloc(&dvalue,  sizeof(float));
  22.     cudaMalloc(&dresult, sizeof(__half));
  23.     cudaMemcpy(dvalue, &hvalue, sizeof(float), cudaMemcpyHostToDevice);
  24.     cudaMemcpy(dresult, &hresult, sizeof(__half), cudaMemcpyHostToDevice);
  25.     // Call kernel
  26.     example<<<1,3>>>(dvalue, dresult);
  27.     // Copy back to host
  28.     cudaMemcpy(&hresult, dresult, sizeof(__half), cudaMemcpyDeviceToHost);
  29.     // Print result
  30.     printf("%f\n", __half2float(hresult));
  31.     return 0;
  32. }
  33.  
  34. // FAILURE: nvcc -gencode=arch=compute_52,code=compute_52 -o example example.cu; ./example
  35. // SUCCESS: nvcc -gencode=arch=compute_70,code=compute_70 -o example example.cu; ./example
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement