Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- [code]#include <stdio.h>
- #include <stdlib.h>
- #include <iostream>
- //cuda
- #include <helper_cuda.h>
- //boost
- #include <boost/thread.hpp>
- # define SIZE_MAX 9000
- __global__ void calc(double* fpDeviceBuffer, double a, double b);
- __global__ void calc2(double* fpDeviceBuffer, double a, double b);
- __global__ void calc3(double* fpDeviceBuffer, double a, double b);
- __global__ void calc4(double* fpDeviceBuffer, double a, double b);
- void role1();
- void role2();
- double a = 6708.0f;
- double b = 0.25f;
- double *fpHostBuffer = NULL;
- double *fpDeviceBuffer = NULL;
- double *fpHostBuffer2 = NULL;
- double *fpDeviceBuffer2 = NULL;
- float kernel_time = 1;
- int size = 512*128;
- cudaStream_t stream,stream2,stream3,stream4;
- dim3 blocks(size/512,1,1);
- dim3 threads(512,1,1);
- int cuda_device = 0;
- cudaDeviceProp deviceProp;
- int main(int argc, char* argv[])
- {
- //page-locked memory
- checkCudaErrors(cudaMallocHost(&fpHostBuffer,size*sizeof(double)));
- checkCudaErrors(cudaMallocHost(&fpHostBuffer2,size*sizeof(double)));
- //Device memory
- checkCudaErrors(cudaMalloc(&fpDeviceBuffer,size*sizeof(double)));
- checkCudaErrors(cudaMalloc(&fpDeviceBuffer2,size*sizeof(double)));
- // Get device properties
- checkCudaErrors(cudaGetDevice(&cuda_device));
- checkCudaErrors(cudaGetDeviceProperties(&deviceProp, cuda_device));
- checkCudaErrors( cudaMemcpy ( fpDeviceBuffer, fpHostBuffer, size*sizeof(double), cudaMemcpyHostToDevice) );
- calc<<<blocks,threads>>>(fpDeviceBuffer,a,b);
- calc2<<<blocks,threads>>>(fpDeviceBuffer,a,b);
- calc3<<<blocks,threads>>>(fpDeviceBuffer,a,b);
- calc4<<<blocks,threads>>>(fpDeviceBuffer,a,b);
- checkCudaErrors( cudaMemcpy ( fpHostBuffer, fpDeviceBuffer, size*sizeof(double), cudaMemcpyDeviceToHost) );
- calc<<<blocks,threads>>>(fpDeviceBuffer,a,b);
- calc2<<<blocks,threads>>>(fpDeviceBuffer,a,b);
- calc3<<<blocks,threads>>>(fpDeviceBuffer,a,b);
- calc4<<<blocks,threads>>>(fpDeviceBuffer,a,b);
- cudaStreamCreate(&stream);
- cudaStreamCreate(&stream2);
- cudaStreamCreate(&stream3);
- cudaStreamCreate(&stream4);
- boost::thread t1(role1);
- boost::thread t2(role2);
- t1.join();
- t2.join();
- cudaStreamDestroy(stream);
- cudaStreamDestroy(stream2);
- cudaStreamDestroy(stream3);
- cudaStreamDestroy(stream4);
- checkCudaErrors(cudaFree(fpDeviceBuffer));
- checkCudaErrors(cudaFreeHost(fpHostBuffer));
- checkCudaErrors(cudaFree(fpDeviceBuffer2));
- checkCudaErrors(cudaFreeHost(fpHostBuffer2));
- return(EXIT_SUCCESS);
- }
- void role1()
- {
- checkCudaErrors( cudaMemcpyAsync ( fpDeviceBuffer, fpHostBuffer, size*sizeof(double), cudaMemcpyHostToDevice,stream) );
- calc<<<blocks,threads,0,stream>>>(fpDeviceBuffer,a,b);
- calc2<<<blocks,threads,0,stream>>>(fpDeviceBuffer,a,b);
- calc3<<<blocks,threads,0,stream>>>(fpDeviceBuffer,a,b);
- calc4<<<blocks,threads,0,stream>>>(fpDeviceBuffer,a,b);
- checkCudaErrors( cudaMemcpyAsync ( fpHostBuffer, fpDeviceBuffer, size*sizeof(double), cudaMemcpyDeviceToHost,stream) );
- cudaStreamSynchronize(stream);
- }
- void role2()
- {
- calc<<<blocks,threads,0,stream2>>>(fpDeviceBuffer2,a,b);
- calc2<<<blocks,threads,0,stream2>>>(fpDeviceBuffer2,a,b);
- calc3<<<blocks,threads,0,stream2>>>(fpDeviceBuffer2,a,b);
- calc4<<<blocks,threads,0,stream2>>>(fpDeviceBuffer2,a,b);
- cudaStreamSynchronize(stream2);
- }
- __global__ void calc(double* fpDeviceBuffer, double a, double b)
- {
- fpDeviceBuffer[threadIdx.x+512*blockIdx.x]=(a&&b);
- }
- __global__ void calc2(double* fpDeviceBuffer, double a, double b)
- {
- fpDeviceBuffer[threadIdx.x+512*blockIdx.x]=(a*b);
- }
- __global__ void calc3(double* fpDeviceBuffer, double a, double b)
- {
- fpDeviceBuffer[threadIdx.x+512*blockIdx.x]=(a||b);
- }
- __global__ void calc4(double* fpDeviceBuffer, double a, double b)
- {
- fpDeviceBuffer[threadIdx.x+512*blockIdx.x]=(a+b);
- }
- [/code]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement