Advertisement
Guest User

Untitled

a guest
Apr 10th, 2013
135
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 3.80 KB | None | 0 0
  1. #include <iostream>
  2. #include <thrust/device_vector.h>
  3. #include <cmath>
  4. #include <thrust/sort.h>
  5. #include <ctime>
  6. #include <vector>
  7. #include <sys/time.h>
  8.  
  9. __host__ void initialize(int*& dev_data, int N){
  10.  
  11.   //generate random data incrementally
  12.   int start = 0;
  13.   int r = 2;
  14.   int i;
  15.   srand(time(NULL));
  16.   //std::cout<<"Initial data: "<<std::endl;
  17.   for(i=0;i<N;i++){
  18.     dev_data[i] = start + rand()%r;
  19.     //std::cout<<" "<<dev_data[i]<<" ";
  20.     start = dev_data[i];
  21.   }
  22.   //std::cout<<std::endl;
  23.  
  24. }
  25.  
  26. __global__ void findPositions(int *device_data, int totalAmountOfValuesPerThread, int* pos_ptr, int N){
  27.  
  28.   int res1 = 9999999;
  29.   int res2 = 9999999;
  30.   int index = totalAmountOfValuesPerThread*(threadIdx.x + blockIdx.x*blockDim.x);
  31.   int start = index; //from this index each thread will begin searching
  32.   if(start < N){ //if the index is out of bounds do nothing
  33.     if(start!=0){ //if start is not in the beginning, check the previous value
  34.       if(device_data[start-1] != device_data[start]){
  35.     res1 = start;
  36.       }
  37.     }
  38.     else res1 = start; //since it's the beginning we update the first output buffer for the thread
  39.      pos_ptr[index] = res1;
  40.    
  41.     start++; //move to the next place and see if the second output buffer needs updating or not
  42.    
  43.     if(start < N && device_data[start] != device_data[start-1]){
  44.     res2 = start;
  45.     }
  46.    
  47.     if((index+1) < N)
  48.       pos_ptr[index+ 1] = res2;
  49.     }
  50.  }
  51.  
  52. __host__ int main(void){
  53.  
  54.   int N = 100;
  55.   int threadsPerBlock = 1024;
  56.   int totalAmountOfValuesPerThread = 2;
  57.   int totalBLocks = (N/threadsPerBlock) + 1;
  58.  
  59.   int sizeOfDataInBytes = N*sizeof(int);
  60.   int *host_data = new int[N];
  61.   int *device_data;
  62.  
  63.   initialize(host_data, N);
  64.   timeval tim;
  65.   gettimeofday(&tim, NULL);
  66.   double before = tim.tv_sec + (tim.tv_usec/1000000.0);
  67.  
  68.   //run cpu version
  69.   std::vector<int> results;
  70.   int index = 0;
  71.   //std::cout<<"CPU data: "<<std::endl;
  72.   while(index<N){
  73.     results.push_back(index);
  74.     //std::cout<<results.back()<<" ";
  75.     int nextIndex = index+1;
  76.     while(nextIndex<N && host_data[index] == host_data[nextIndex] )
  77.       nextIndex++;
  78.     index = nextIndex;
  79.   }
  80.    gettimeofday(&tim, NULL);
  81.   double after =  tim.tv_sec + (tim.tv_usec/1000000.0);
  82.   std::cout<<"CPU time: "<<after-before<<" seconds"<<std::endl;
  83.  
  84.   //transfer data to the gpu
  85.  
  86.   gettimeofday(&tim, NULL);
  87.   before = tim.tv_sec + (tim.tv_usec/1000000.0);
  88.  
  89.   cudaMalloc((void**)&device_data, sizeOfDataInBytes);
  90.   cudaMemcpy(device_data, host_data, sizeOfDataInBytes, cudaMemcpyHostToDevice);
  91.  
  92.   thrust::device_vector<int> dev_pos; //saves the positions that each thread will have to start from
  93.   dev_pos.resize(N);
  94.   int* pos_ptr = thrust::raw_pointer_cast(dev_pos.data());
  95.  
  96.   //call kernel
  97.   dim3 dimGrid(totalBLocks,1,1);
  98.   dim3 dimBlock(threadsPerBlock, 1, 1);
  99.  
  100.   findPositions<<<dimGrid, dimBlock>>>(device_data, totalAmountOfValuesPerThread, pos_ptr, N);
  101.  
  102.   thrust::sort(dev_pos.begin(), dev_pos.end());
  103.  
  104.   thrust::host_vector<int> host_pos(N);
  105.   thrust::copy(dev_pos.begin(), dev_pos.end(), host_pos.begin());
  106.   //free data
  107.   cudaFree(device_data);
  108.   dev_pos.clear();
  109.   dev_pos.shrink_to_fit();
  110.  
  111.   gettimeofday(&tim, NULL);
  112.   after =  tim.tv_sec + (tim.tv_usec/1000000.0);
  113.  
  114.   std::cout<<"GPU time: "<<after-before<<" seconds"<<std::endl;
  115.  
  116.   //check correctness
  117.  // std::cout<<"GPU data"<<std::endl;
  118.   bool correct = true;
  119.   for(size_t i=0; i<results.size(); i++){
  120.     if(results[i] != host_pos[i]){
  121.       correct = false;
  122.       std::cout<<"CPU: "<<results[i]<<" GPU: "<<host_pos[i]<<" index: "<<i<<std::endl;
  123.       break;
  124.     }
  125.   }
  126.  //std::cout<<std::endl;
  127.   if(correct) std::cout<<"GPU implementation is correct"<<std::endl;
  128.   else std::cout<<"GPU implementation is wrong"<<std::endl;
  129.  
  130.  
  131. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement