Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <stdio.h>
- #include <time.h>
- #include <stdlib.h>
- __global__ void Kernel(char *piDevice, int *countsDevice) {
- int tid = threadIdx.x + blockIdx.x * blockDim.x;
- int originalTid = tid;
- if (tid == 0) {
- for (int i = 0; i < 10; i++) {
- countsDevice[i] = 0;
- }
- }
- __syncthreads();
- if (tid < 10) {
- printf("thing is %d\n", piDevice[tid] - '0');
- countsDevice[(piDevice[tid] - '0')]++;
- __syncthreads();
- }
- if (originalTid < 10) {
- for (int i = 0; i < 10240; i++) {
- // countsDevice[originalTid] += countsDevice[originalTid + (1024 * i)];
- }
- }
- }
- main(int argc, char* argv[]) {
- cudaEvent_t start, stop;
- cudaEventCreate( &start );
- cudaEventCreate( &stop );
- cudaEventRecord( start, 0 );
- int *counts = (int*)malloc(sizeof(int) * 10240);
- for (int i = 0; i < 10240; i++) counts[i] = 0;
- int *countsDevice;
- cudaMalloc((void**)&countsDevice, sizeof(int) * 10240);
- // cudaMemcpy(countsDevice, counts, sizeof(int) * 10240, cudaMemcpyHostToDevice); //maybe not necessary
- char *piDevice;
- cudaMalloc((void**)&piDevice, sizeof(char) * 1000002);
- FILE *fp;
- fp = fopen("pi", "r");
- char pi[1000002];
- fgets(pi, 1000002, fp);
- fclose(fp);
- pi[1] = '0';
- cudaMemcpy(piDevice, pi, sizeof(char) * 1000002, cudaMemcpyHostToDevice);
- Kernel<<<1,1024>>>(piDevice, countsDevice);
- cudaMemcpy(counts, countsDevice, sizeof(int) * 10, cudaMemcpyDeviceToHost);
- counts[0]--;
- cudaEventRecord( stop, 0 );
- cudaEventSynchronize( stop );
- printf("Distribution:\n");
- for (int i = 0; i < 10; i++) {
- printf("%d: %d\n", i, counts[i]);
- }
- float elapsedTime;
- cudaEventElapsedTime( &elapsedTime, start, stop );
- printf( "Time to generate: %3.1f ms\n", elapsedTime );
- cudaEventDestroy( start );
- cudaEventDestroy( stop );
- cudaFree(piDevice);
- cudaFree(countsDevice);
- free(counts);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement