Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- $ cat t266.cu
- #include <stdio.h>
- __global__ void mykernel(int *data){
- (*data)++;
- }
- int main(){
- int *d_data, h_data = 0;
- cudaMalloc((void **)&d_data, sizeof(int));
- cudaMemcpy(d_data, &h_data, sizeof(int), cudaMemcpyHostToDevice);
- mykernel<<<1,1>>>(d_data);
- cudaMemcpy(&h_data, d_data, sizeof(int), cudaMemcpyDeviceToHost);
- printf("data = %d\n", h_data);
- return 0;
- }
- $ nvcc -keep -o t266 t266.cu
- $ vi t266.ptx
- $ cat t266.ptx
- //
- // Generated by NVIDIA NVVM Compiler
- //
- // Compiler Build ID: CL-19856038
- // Cuda compilation tools, release 7.5, V7.5.17
- // Based on LLVM 3.4svn
- //
- .version 4.3
- .target sm_20
- .address_size 64
- // .globl _Z8mykernelPi
- .visible .entry _Z8mykernelPi(
- .param .u64 _Z8mykernelPi_param_0
- )
- {
- .reg .b32 %r<3>;
- .reg .b64 %rd<3>;
- ld.param.u64 %rd1, [_Z8mykernelPi_param_0];
- cvta.to.global.u64 %rd2, %rd1;
- ldu.global.u32 %r1, [%rd2];
- add.s32 %r2, %r1, 2;
- st.global.u32 [%rd2], %r2;
- ret;
- }
- $ nvcc -dryrun -o t266 t266.cu --keep 2>dryrun.out
- $ vi dryrun.out
- $ cat dryrun.out
- ptxas -arch=sm_20 -m64 "t266.ptx" -o "t266.sm_20.cubin"
- fatbinary --create="t266.fatbin" -64 --key="xxxxxxxxxx" "--image=profile=sm_20,file=t266.sm_20.cubin" "--image=profile=compute_20,file=t266.ptx" --embedded-fatbin="t266.fatbin.c" --cuda
- gcc -D__CUDA_ARCH__=200 -E -x c++ -DCUDA_DOUBLE_MATH_FUNCTIONS -D__CUDA_PREC_DIV -D__CUDA_PREC_SQRT "-I/usr/local/cuda/bin/..//include" -m64 "t266.cudafe1.cpp" > "t266.cu.cpp.ii"
- gcc -c -x c++ "-I/usr/local/cuda/bin/..//include" -fpreprocessed -m64 -o "t266.o" "t266.cu.cpp.ii"
- nvlink --arch=sm_20 --register-link-binaries="t266_dlink.reg.c" -m64 "-L/usr/local/cuda/bin/..//lib64/stubs" "-L/usr/local/cuda/bin/..//lib64" -cpu-arch=X86_64 "t266.o" -o "t266_dlink.sm_20.cubin"
- fatbinary --create="t266_dlink.fatbin" -64 --key="t266_dlink" -link "--image=profile=sm_20,file=t266_dlink.sm_20.cubin" --embedded-fatbin="t266_dlink.fatbin.c"
- gcc -c -x c++ -DFATBINFILE="\"t266_dlink.fatbin.c\"" -DREGISTERLINKBINARYFILE="\"t266_dlink.reg.c\"" -I. "-I/usr/local/cuda/bin/..//include" -D"__CUDACC_VER__=70517" -D"__CUDACC_VER_BUILD__=17" -D"__CUDACC_VER_MINOR__=5" -D"__CUDACC_VER_MAJOR__=7" -m64 -o "t266_dlink.o" "/usr/local/cuda/bin/crt/link.stub"
- g++ -m64 -o "t266" -Wl,--start-group "t266_dlink.o" "t266.o" "-L/usr/local/cuda/bin/..//lib64/stubs" "-L/usr/local/cuda/bin/..//lib64" -lcudadevrt -lcudart_static -lrt -lpthread -ldl -Wl,--end-group
- $ sh dryrun.out
- $ ./t266
- data = 2
- $
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement