Advertisement
Guest User

Untitled

a guest
Apr 20th, 2014
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 1.88 KB | None | 0 0
  1.  
  2. #include <stdio.h>
  3. #include <time.h>
  4. #include <stdlib.h>
  5.  
  6. __global__ void Kernel(char *piDevice, int *countsDevice) {
  7.        
  8.     int tid = threadIdx.x + blockIdx.x * blockDim.x;
  9.     int originalTid = tid;
  10.    
  11.     if (tid == 0) {
  12.         for (int i = 0; i < 10; i++) {
  13.             countsDevice[i] = 0;
  14.         }
  15.     }
  16.     __syncthreads();
  17.    
  18.     if (tid < 10) {
  19.         printf("thing is %d\n", piDevice[tid] - '0');
  20.         countsDevice[(piDevice[tid] - '0')]++;
  21.         __syncthreads();
  22.     }
  23.    
  24.     if (originalTid < 10) {
  25.         for (int i = 0; i < 10240; i++) {
  26.             // countsDevice[originalTid] += countsDevice[originalTid + (1024 * i)];
  27.         }      
  28.     }
  29. }
  30.  
  31.  
  32. main(int argc, char* argv[]) { 
  33.    
  34.     cudaEvent_t start, stop;
  35.     cudaEventCreate( &start );
  36.     cudaEventCreate( &stop );
  37.     cudaEventRecord( start, 0 );
  38.    
  39.    
  40.     int *counts = (int*)malloc(sizeof(int) * 10240);
  41.     for (int i = 0; i < 10240; i++) counts[i] = 0;
  42.    
  43.     int *countsDevice;
  44.     cudaMalloc((void**)&countsDevice, sizeof(int) * 10240);
  45.    
  46.     // cudaMemcpy(countsDevice, counts, sizeof(int) * 10240, cudaMemcpyHostToDevice); //maybe not necessary
  47.    
  48.     char *piDevice;
  49.     cudaMalloc((void**)&piDevice, sizeof(char) * 1000002);
  50.    
  51.     FILE *fp;  
  52.     fp = fopen("pi", "r");
  53.     char pi[1000002];
  54.     fgets(pi, 1000002, fp);
  55.     fclose(fp);
  56.    
  57.     pi[1] = '0';
  58.    
  59.     cudaMemcpy(piDevice, pi, sizeof(char) * 1000002, cudaMemcpyHostToDevice);          
  60.    
  61.     Kernel<<<1,1024>>>(piDevice, countsDevice);
  62.                
  63.     cudaMemcpy(counts, countsDevice, sizeof(int) * 10, cudaMemcpyDeviceToHost);
  64.     counts[0]--;
  65.                
  66.     cudaEventRecord( stop, 0 );
  67.     cudaEventSynchronize( stop );
  68.    
  69.     printf("Distribution:\n");
  70.     for (int i = 0; i < 10; i++) {
  71.         printf("%d: %d\n", i, counts[i]);
  72.     }
  73.    
  74.     float elapsedTime;
  75.     cudaEventElapsedTime( &elapsedTime, start, stop );
  76.     printf( "Time to generate: %3.1f ms\n", elapsedTime );
  77.     cudaEventDestroy( start );
  78.     cudaEventDestroy( stop );
  79.  
  80.     cudaFree(piDevice);
  81.     cudaFree(countsDevice);
  82.     free(counts);
  83. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement