Advertisement
Guest User

Untitled

a guest
Dec 22nd, 2014
154
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.20 KB | None | 0 0
  1. /*
  2. * Architektura procesoru (ACH 2014)
  3. * Projekt c. 2 (cuda)
  4. * Login: xlogin00
  5. */
  6.  
  7. #include <sys/time.h>
  8. #include <cstdio>
  9. #include <cmath>
  10.  
  11. #include "nbody.h"
  12.  
  13. int main(int argc, char **argv)
  14. {
  15. FILE *fp;
  16. struct timeval t1, t2;
  17. int N;
  18. float dt;
  19. int steps;
  20. int thr_blc;
  21.  
  22. // parametry
  23. if (argc != 7)
  24. {
  25. printf("Usage: nbody <N> <dt> <steps> <thr/blc> <input> <output>\n");
  26. exit(1);
  27. }
  28. N = atoi(argv[1]);
  29. dt = atof(argv[2]);
  30. steps = atoi(argv[3]);
  31. thr_blc = atoi(argv[4]);
  32.  
  33. printf("N: %d\n", N);
  34. printf("dt: %f\n", dt);
  35. printf("steps: %d\n", steps);
  36. printf("threads/block: %d\n", thr_blc);
  37.  
  38. // alokace pameti na CPU
  39. t_particles particles_cpu;
  40.  
  41. // ZDE DOPLNTE ALOKACI PAMETI NA CPU
  42. particles_cpu.pos_x = (float *)malloc(N*sizeof(float));
  43. particles_cpu.pos_y = (float *)malloc(N*sizeof(float));
  44. particles_cpu.pos_z = (float *)malloc(N*sizeof(float));
  45. particles_cpu.vel_x = (float *)malloc(N*sizeof(float));
  46. particles_cpu.vel_y = (float *)malloc(N*sizeof(float));
  47. particles_cpu.vel_z = (float *)malloc(N*sizeof(float));
  48. particles_cpu.weight = (float*)malloc(N*sizeof(float));
  49. // nacteni castic ze souboru
  50. fp = fopen(argv[5], "r");
  51. if (fp == NULL)
  52. {
  53. printf("Can't open file %s!\n", argv[2]);
  54. exit(1);
  55. }
  56. particles_read(fp, particles_cpu, N);
  57. fclose(fp);
  58. printf("%f",particles_cpu.pos_x[0]);
  59. t_particles particles_gpu[2];
  60. for (int i = 0; i < 2; i++)
  61. {
  62. float *pos_x;
  63. float *pos_y;
  64. float *pos_z;
  65. float *vel_x;
  66. float *vel_y;
  67. float *vel_z;
  68. float *weight;
  69.  
  70. // alokace pameti na GPU
  71. cudaMalloc((void**) &(pos_x), sizeof(float)*N);
  72. cudaMalloc((void**) &(pos_y), sizeof(float)*N);
  73. cudaMalloc((void**) &(pos_z), sizeof(float)*N);
  74. cudaMalloc((void**) &(vel_x), sizeof(float)*N);
  75. cudaMalloc((void**) &(vel_y), sizeof(float)*N);
  76. cudaMalloc((void**) &(vel_z), sizeof(float)*N);
  77. cudaMalloc((void**) &(weight), sizeof(float)*N);
  78.  
  79. // kopirovani castic na GPU
  80. cudaMemcpy(pos_x, particles_cpu.pos_x, sizeof(float)*N, cudaMemcpyHostToDevice);
  81. cudaMemcpy(pos_y, particles_cpu.pos_y, sizeof(float)*N, cudaMemcpyHostToDevice);
  82. cudaMemcpy(pos_z, particles_cpu.pos_z, sizeof(float)*N, cudaMemcpyHostToDevice);
  83. cudaMemcpy(vel_x, particles_cpu.vel_x, sizeof(float)*N, cudaMemcpyHostToDevice);
  84. cudaMemcpy(vel_y, particles_cpu.vel_y, sizeof(float)*N, cudaMemcpyHostToDevice);
  85. cudaMemcpy(vel_z, particles_cpu.vel_z, sizeof(float)*N, cudaMemcpyHostToDevice);
  86. cudaMemcpy(weight, particles_cpu.weight, sizeof(float)*N, cudaMemcpyHostToDevice);
  87.  
  88. particles_gpu[i].pos_x = pos_x;
  89. particles_gpu[i].pos_y = pos_y;
  90. particles_gpu[i].pos_z = pos_z;
  91. particles_gpu[i].vel_x = vel_x;
  92. particles_gpu[i].vel_y = vel_y;
  93. particles_gpu[i].vel_z = vel_z;
  94. particles_gpu[i].weight = weight;
  95. }
  96.  
  97. // vypocet
  98.  
  99. gettimeofday(&t1, 0);
  100. for (int s = 0; s < steps; s++)
  101. {
  102. // ZDE DOPLNTE SPUSTENI KERNELU
  103. int c = s%2;
  104. int d = (s+1)%2;
  105. //int block_size = 1024;
  106. int blockSize; // The launch configurator returned block size
  107. int minGridSize; // The minimum grid size needed to achieve the maximum occupancy for a full device launch
  108.  
  109. cudaOccupancyMaxPotentialBlockSize(&minGridSize, &blockSize, particles_simulate, 0, N);
  110.  
  111. int n_blocks = N/blockSize + (N%blockSize == 0 ? 0:1);
  112. particles_simulate <<<1, thr_blc>>>(particles_gpu[c],particles_gpu[d],N,dt);
  113.  
  114. }
  115. // ZDE DOPLNTE SYNCHRONIZACI
  116. cudaDeviceSynchronize();
  117. gettimeofday(&t2, 0);
  118.  
  119. // cas
  120. double t = (1000000.0 * (t2.tv_sec - t1.tv_sec) + t2.tv_usec - t1.tv_usec) / 1000000.0;
  121. printf("Time: %f s\n", t);
  122.  
  123. // kpirovani castic zpet na CPU
  124. // ZDE DOPLNTE KOPIROVANI DAT Z GPU NA CPU
  125.  
  126.  
  127. int d = (steps+1)%2;
  128.  
  129. cudaMemcpy(particles_cpu.pos_x, particles_gpu[d].pos_x, sizeof(int)*N, cudaMemcpyDeviceToHost);
  130. printf("%f",particles_cpu.pos_x[0]);
  131. cudaMemcpy(particles_cpu.pos_y, particles_gpu[d].pos_y, sizeof(int)*N, cudaMemcpyDeviceToHost);
  132. cudaMemcpy(particles_cpu.pos_z, particles_gpu[d].pos_z, sizeof(int)*N, cudaMemcpyDeviceToHost);
  133. cudaMemcpy(particles_cpu.vel_x, particles_gpu[d].vel_x, sizeof(int)*N, cudaMemcpyDeviceToHost);
  134. cudaMemcpy(particles_cpu.vel_y, particles_gpu[d].vel_y, sizeof(int)*N, cudaMemcpyDeviceToHost);
  135. cudaMemcpy(particles_cpu.vel_z, particles_gpu[d].vel_z, sizeof(int)*N, cudaMemcpyDeviceToHost);
  136. cudaMemcpy(particles_cpu.weight, particles_gpu[d].weight, sizeof(int)*N, cudaMemcpyDeviceToHost);
  137.  
  138. // ulozeni castic do souboru
  139. fp = fopen(argv[6], "w");
  140. if (fp == NULL)
  141. {
  142. printf("Can't open file %s!\n", argv[6]);
  143. exit(1);
  144. }
  145. particles_write(fp, particles_cpu, N);
  146. fclose(fp);
  147.  
  148. return 0;
  149. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement