Advertisement
Guest User

Untitled

a guest
Jun 21st, 2016
511
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.58 KB | None | 0 0
  1. $ cat t266.cu
  2. #include <stdio.h>
  3.  
  4. __global__ void mykernel(int *data){
  5.  
  6. (*data)++;
  7. }
  8.  
  9. int main(){
  10.  
  11. int *d_data, h_data = 0;
  12. cudaMalloc((void **)&d_data, sizeof(int));
  13. cudaMemcpy(d_data, &h_data, sizeof(int), cudaMemcpyHostToDevice);
  14. mykernel<<<1,1>>>(d_data);
  15. cudaMemcpy(&h_data, d_data, sizeof(int), cudaMemcpyDeviceToHost);
  16. printf("data = %d\n", h_data);
  17. return 0;
  18. }
  19. $ nvcc -keep -o t266 t266.cu
  20. $ vi t266.ptx
  21. $ cat t266.ptx
  22. //
  23. // Generated by NVIDIA NVVM Compiler
  24. //
  25. // Compiler Build ID: CL-19856038
  26. // Cuda compilation tools, release 7.5, V7.5.17
  27. // Based on LLVM 3.4svn
  28. //
  29.  
  30. .version 4.3
  31. .target sm_20
  32. .address_size 64
  33.  
  34. // .globl _Z8mykernelPi
  35.  
  36. .visible .entry _Z8mykernelPi(
  37. .param .u64 _Z8mykernelPi_param_0
  38. )
  39. {
  40. .reg .b32 %r<3>;
  41. .reg .b64 %rd<3>;
  42.  
  43.  
  44. ld.param.u64 %rd1, [_Z8mykernelPi_param_0];
  45. cvta.to.global.u64 %rd2, %rd1;
  46. ldu.global.u32 %r1, [%rd2];
  47. add.s32 %r2, %r1, 2;
  48. st.global.u32 [%rd2], %r2;
  49. ret;
  50. }
  51.  
  52.  
  53. $ nvcc -dryrun -o t266 t266.cu --keep 2>dryrun.out
  54. $ vi dryrun.out
  55. $ cat dryrun.out
  56. ptxas -arch=sm_20 -m64 "t266.ptx" -o "t266.sm_20.cubin"
  57. fatbinary --create="t266.fatbin" -64 --key="xxxxxxxxxx" "--image=profile=sm_20,file=t266.sm_20.cubin" "--image=profile=compute_20,file=t266.ptx" --embedded-fatbin="t266.fatbin.c" --cuda
  58. gcc -D__CUDA_ARCH__=200 -E -x c++ -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDA_PREC_DIV -D__CUDA_PREC_SQRT "-I/usr/local/cuda/bin/..//include" -m64 "t266.cudafe1.cpp" > "t266.cu.cpp.ii"
  59. gcc -c -x c++ "-I/usr/local/cuda/bin/..//include" -fpreprocessed -m64 -o "t266.o" "t266.cu.cpp.ii"
  60. nvlink --arch=sm_20 --register-link-binaries="t266_dlink.reg.c" -m64 "-L/usr/local/cuda/bin/..//lib64/stubs" "-L/usr/local/cuda/bin/..//lib64" -cpu-arch=X86_64 "t266.o" -o "t266_dlink.sm_20.cubin"
  61. fatbinary --create="t266_dlink.fatbin" -64 --key="t266_dlink" -link "--image=profile=sm_20,file=t266_dlink.sm_20.cubin" --embedded-fatbin="t266_dlink.fatbin.c"
  62. gcc -c -x c++ -DFATBINFILE="\"t266_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"t266_dlink.reg.c\"" -I. "-I/usr/local/cuda/bin/..//include" -D"__CUDACC_VER__=70517" -D"__CUDACC_VER_BUILD__=17" -D"__CUDACC_VER_MINOR__=5" -D"__CUDACC_VER_MAJOR__=7" -m64 -o "t266_dlink.o" "/usr/local/cuda/bin/crt/link.stub"
  63. g++ -m64 -o "t266" -Wl,--start-group "t266_dlink.o" "t266.o" "-L/usr/local/cuda/bin/..//lib64/stubs" "-L/usr/local/cuda/bin/..//lib64" -lcudadevrt -lcudart_static -lrt -lpthread -ldl -Wl,--end-group
  64. $ sh dryrun.out
  65. $ ./t266
  66. data = 2
  67. $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement