Advertisement
Guest User

Untitled

a guest
Jan 22nd, 2017
97
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.61 KB | None | 0 0
  1. # include <stdlib.h>
  2. # include <stdio.h>
  3. # include <math.h>
  4. # include <time.h>
  5.  
  6. #define BLOCK_SIZE 256
  7.  
  8. int main ( int argc, char *argv[] );
  9.  
  10. __global__ void reductionMaXKernel(double* devShared,double* devDiff, double* blockResults, int n) {
  11.  
  12. unsigned int tid = threadIdx.x;
  13. unsigned int i = blockIdx.x * blockDim.x + threadIdx.x;
  14.  
  15. // Load block in the shared memory
  16. if (i < n) devShared[i] = devDiff[i];
  17. else devShared[i] = 0;
  18.  
  19. __syncthreads();
  20.  
  21. // Do reduction in shared memory
  22. for (unsigned int s = blockDim.x/2; s > 0; s >>= 1) {
  23. if (tid < s) {
  24. if(devShared[i]<devShared[i + s])
  25. devShared[i]=devShared[i + s];
  26. }
  27. __syncthreads();
  28. }
  29.  
  30. // Write result for this block to global memory
  31. if (tid == 0) blockResults[blockIdx.x] = devShared[i];
  32. }
  33.  
  34. __global__ void calulateIterration(double *devW,double *devU,double *devDiff,int M,int N){
  35.  
  36. int i= blockIdx.x * blockDim.x +threadIdx.x;
  37. if(i<M*N)
  38. devU[i] = devW[i];
  39. __syncthreads();
  40.  
  41. if((i>=N )&& (i<(M*N-N)) && ((i%N)!=0) && ((i+1)%N!=0)){
  42. devW[i] = ( devU[i-1] + devU[i+1] + devU[i-N] + devU[i+N] ) / 4.0;
  43.  
  44. devDiff[i] = fabs ( devW[i] - devU[i] );
  45. }
  46. else if(i<M*N)
  47. devDiff[i]=0;
  48. __syncthreads();
  49. }
  50.  
  51.  
  52.  
  53. int main ( int argc, char *argv[] )
  54. {
  55. int M;
  56. int N;
  57. int size;
  58. double diff;
  59. double epsilon;
  60. int i;
  61. int j;
  62. double mean;
  63. char output_file[80];
  64. int success;
  65. double *dataw;
  66. double *datau;
  67. double *devW,*devU,*devDiff,*devBlockRes,*devShared;
  68. double **u;
  69. double **w;
  70. int numBlocks;
  71.  
  72.  
  73. if (argc != 5) {
  74. return 1;
  75. } else {
  76. success = sscanf ( argv[1], "%d", &M );
  77. success += sscanf ( argv[2], "%d", &N );
  78. success += sscanf ( argv[3], "%lf", &epsilon );
  79. success += sscanf ( argv[4], "%s", output_file );
  80.  
  81. if (success != 4) {
  82. return 2;
  83. }
  84. }
  85.  
  86.  
  87. datau =(double *)calloc(M * N, sizeof(double));
  88. dataw = (double *)calloc(M * N, sizeof(double));
  89. w = (double **)calloc(M, sizeof(double*));
  90. u = (double **)calloc(M, sizeof(double*));
  91. for (i = 0; i < M; i++){
  92. u[i] = &datau[i * N];
  93. w[i] = &dataw[i * N];
  94. }
  95.  
  96. size=M * N * sizeof(double);
  97. cudaMalloc((void **) &devW, size);
  98. cudaMalloc((void **) &devU, size);
  99. cudaMalloc((void **) &devShared, size);
  100. cudaMalloc((void **) &devDiff, size);
  101.  
  102. /*
  103. Set the boundary values, which don't change.
  104. */
  105. for ( i = 1; i < M - 1; i++ )
  106. {
  107. w[i][0] = 100.0;
  108. }
  109. for ( i = 1; i < M - 1; i++ )
  110. {
  111. w[i][N-1] = 100.0;
  112. }
  113. for ( j = 0; j < N; j++ )
  114. {
  115. w[M-1][j] = 100.0;
  116. }
  117. for ( j = 0; j < N; j++ )
  118. {
  119. w[0][j] = 0.0;
  120. }
  121. /*
  122. Average the boundary values, to come up with a reasonable
  123. initial value for the interior.
  124. */
  125. mean = 0.0;
  126. for ( i = 1; i < M - 1; i++ )
  127. {
  128. mean = mean + w[i][0];
  129. }
  130. for ( i = 1; i < M - 1; i++ )
  131. {
  132. mean = mean + w[i][N-1];
  133. }
  134. for ( j = 0; j < N; j++ )
  135. {
  136. mean = mean + w[M-1][j];
  137. }
  138. for ( j = 0; j < N; j++ )
  139. {
  140. mean = mean + w[0][j];
  141. }
  142. mean = mean / ( double ) ( 2 * M + 2 * N - 4 );
  143. /*
  144. Initialize the interior solution to the mean value.
  145. */
  146. for ( i = 1; i < M - 1; i++ )
  147. {
  148. for ( j = 1; j < N - 1; j++ )
  149. {
  150. w[i][j] = mean;
  151. }
  152. }
  153. /*
  154. iterate until the new solution W differs from the old solution U
  155. by no more than EPSILON.
  156. */
  157. int n = M*N;
  158. cudaMemcpy(devW, dataw, size, cudaMemcpyHostToDevice);
  159. numBlocks = (n + BLOCK_SIZE - 1) / BLOCK_SIZE;
  160. cudaMalloc((void **) &devBlockRes, numBlocks * sizeof(double));
  161.  
  162. diff = epsilon;
  163.  
  164. while ( epsilon <= diff )
  165. {
  166. diff=0.0;
  167. n = M*N;
  168. calulateIterration<<<numBlocks, BLOCK_SIZE>>>(devW, devU,devDiff,M,N);
  169.  
  170. reductionMaXKernel<<<numBlocks, BLOCK_SIZE, BLOCK_SIZE * sizeof(double)>>>(devShared,devDiff, devBlockRes, n);
  171. cudaMemcpy(&diff, devBlockRes, sizeof(double), cudaMemcpyDeviceToHost);
  172. printf("prva red: %6.2f \n", diff);
  173.  
  174. while (numBlocks > 1) {
  175. n = numBlocks;
  176. numBlocks = (n + BLOCK_SIZE - 1) / BLOCK_SIZE;
  177. reductionMaXKernel<<<numBlocks, BLOCK_SIZE, BLOCK_SIZE * sizeof(double)>>>(devShared,devBlockRes, devBlockRes, n);
  178. cudaMemcpy(&diff, devBlockRes, sizeof(double), cudaMemcpyDeviceToHost);
  179. printf("u while d: %6.2f \n", diff);
  180. }
  181. cudaMemcpy(&diff, devBlockRes, sizeof(double), cudaMemcpyDeviceToHost);
  182.  
  183. printf("%6.2f \n", diff);
  184.  
  185. }
  186.  
  187. cudaMemcpy(dataw, devW, M*N * sizeof(double), cudaMemcpyDeviceToHost);
  188.  
  189. cudaFree(devW);
  190. cudaFree(devU);
  191. cudaFree(devDiff);
  192.  
  193. /*
  194. for ( i = 0; i < M; i++ )
  195. for ( j = 0; j < N; j++)
  196. printf("%6.2f ", w[i][j] );
  197. */
  198. return 0;
  199.  
  200. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement