Advertisement
Guest User

Untitled

a guest
Nov 16th, 2019
119
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.71 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <sys/time.h>
  4. #include "mpi.h"
  5. #include <string.h>
  6.  
  7. // Define output file name
  8. #define OUTPUT_FILE "karthik.pgm"
  9. #define MASTER 0
  10.  
  11. void stencil(const int local_nrows, const int local_ncols, const int width, const int height,
  12. float * restrict image, float * restrict tmp_image);
  13. void init_image(const int nx, const int ny, const int width, const int height,
  14. float * restrict image, float * restrict tmp_image);
  15. void output_image(const char* file_name, const int nx, const int ny,
  16. const int width, const int height, float * restrict image);
  17. double wtime(void);
  18.  
  19. int calc_ncols_from_rank(int rank, int size, int height)
  20. {
  21. int ncols;
  22.  
  23. ncols = height / size; /* integer division */
  24. if ((height % size) != 0) { /* if there is a remainder */
  25. if (rank == size - 1)
  26. ncols += height % size; /* add remainder to last rank */
  27. }
  28.  
  29. return ncols;
  30. }
  31.  
  32. int main(int argc, char* argv[])
  33. {
  34. // Check usage
  35. if (argc != 4) {
  36. fprintf(stderr, "Usage: %s nx ny niters\n", argv[0]);
  37. exit(EXIT_FAILURE);
  38. }
  39.  
  40. // Initiliase problem dimensions from command line arguments
  41. int nx = atoi(argv[1]);
  42. int ny = atoi(argv[2]);
  43. int niters = atoi(argv[3]);
  44.  
  45. // Adding MPI stuff
  46. int rank; /* 'rank' of process among it's cohort */
  47. int size; /* size of cohort, i.e. num processes started */
  48.  
  49. MPI_Status status; /* struct used by MPI_Recv */
  50. int left; /* the rank of the process to the left */
  51. int right; /* the rank of the process to the right */
  52. int local_nrows; /* number of rows apportioned to this rank */
  53. int local_ncols; /* number of columns apportioned to this rank */
  54.  
  55. float *sendbuf; /* buffer to hold values to send */
  56. float *recvbuf; /* buffer to hold received values */
  57.  
  58. sendbuf = (float*)malloc(sizeof(float) * (local_nrows + 2));
  59. recvbuf = (float*)malloc(sizeof(float) * (local_nrows + 2));
  60.  
  61. // we pad the outer edge of the image to avoid out of range address issues in
  62. // stencil
  63. int width = nx + 2;
  64. int height = ny + 2;
  65.  
  66. /* initialise our MPI environment */
  67. MPI_Init( &argc, &argv );
  68. MPI_Comm_size( MPI_COMM_WORLD, &size );
  69. MPI_Comm_rank( MPI_COMM_WORLD, &rank );
  70.  
  71. /*
  72. ** determine process ranks to the left and right of rank
  73. ** respecting periodic boundary conditions
  74. */
  75. left = (rank == MASTER) ? (rank + size - 1) : (rank - 1);
  76. right = (rank + 1) % size;
  77.  
  78. // Allocate the image
  79. float * restrict image = malloc(sizeof(float) * width * height);
  80. float * restrict tmp_image = malloc(sizeof(float) * width * height);
  81. // float * restrict result = malloc(sizeof(float) * width * height);
  82.  
  83. local_nrows = nx;
  84. local_ncols = calc_ncols_from_rank(rank, size, ny);
  85.  
  86. /* check whether the initialisation was successful */
  87. if ( local_ncols < 1 ) {
  88. fprintf(stderr,"Error: too many processes:- local_ncols < 1\n");
  89. MPI_Abort(MPI_COMM_WORLD,EXIT_FAILURE);
  90. }
  91.  
  92. int section_ncols = local_ncols + 2;
  93.  
  94. printf("section_nrows = %d, section_cols = %d for rank %d\n", local_nrows, section_ncols, rank);
  95.  
  96. float * restrict section = malloc(sizeof(float) * (local_nrows + 2) * section_ncols);
  97. float * restrict tmp_section = malloc(sizeof(float) * (local_nrows + 2) * section_ncols);
  98.  
  99. printf("Allocated space for section and tmp_section for rank = %d\n", rank);
  100.  
  101. // Set the input image
  102. init_image(nx, ny, width, height, image, tmp_image);
  103.  
  104. // Initialising the sections
  105. for(int i = 0; i < local_nrows + 2; i++) {
  106. for(int j = 0; j < local_ncols + 2; j++) {
  107. if (j > 0 && j < (local_ncols + 1) && i > 0 && i < (local_nrows + 1))
  108. {
  109. section[i * (local_ncols + 2) + j] = image[( i * width + j + (ny/size * rank) )];
  110. tmp_section[i * (local_ncols + 2) + j] = image[(ny/size * rank + i) * ((local_ncols + 2) + j)]; /* core cells */
  111. }
  112. else
  113. {
  114. section[i * (local_ncols + 2) + j] = 0.0f;
  115. tmp_section[i * (local_ncols + 2) + j] = 0.0f;
  116. }
  117. }
  118. }
  119.  
  120. // // Call the stencil kernel
  121. double tic = wtime();
  122.  
  123. for (int t = 0; t < niters; ++t) {
  124.  
  125. // SEND left
  126. for(int i = 0; i < local_nrows + 2; ++i){
  127.  
  128. sendbuf[i] = section[i * (local_ncols + 2) + 1];
  129.  
  130. }
  131.  
  132. MPI_Sendrecv(sendbuf, local_nrows + 2, MPI_FLOAT, left, 0,
  133. recvbuf, local_nrows + 2, MPI_FLOAT, right, 0, MPI_COMM_WORLD, &status);
  134.  
  135. if(right != MASTER)
  136. {
  137. for(int i = 0; i < local_nrows + 2; ++i)
  138. section[i * (local_ncols + 2) + local_ncols + 1] = recvbuf[i];
  139. }
  140.  
  141. // SEND right
  142. for(int i = 0; i < local_nrows + 2; ++i)
  143. sendbuf[i] = section[i * (local_ncols + 2) + local_ncols];
  144.  
  145. MPI_Sendrecv(sendbuf, local_nrows + 2, MPI_FLOAT, right, 0,
  146. recvbuf, local_nrows + 2, MPI_FLOAT, left, 0, MPI_COMM_WORLD, &status);
  147.  
  148. if(rank != (size - 1 ))
  149. {
  150. for(int i = 0; i < local_nrows + 2; ++i)
  151. section[i * (local_ncols + 2)] = recvbuf[i];
  152. }
  153.  
  154. stencil(local_ncols, local_nrows, width, height, section, tmp_section);
  155.  
  156. // SEND left
  157. for(int i = 0; i < local_nrows + 2; ++i){
  158.  
  159. sendbuf[i] = tmp_section[i * (local_ncols + 2) + 1];
  160.  
  161. }
  162.  
  163. MPI_Sendrecv(sendbuf, local_nrows + 2, MPI_FLOAT, left, 0,
  164. recvbuf, local_nrows + 2, MPI_FLOAT, right, 0, MPI_COMM_WORLD, &status);
  165.  
  166. if(right != MASTER)
  167. {
  168. for(int i = 0; i < local_nrows + 2; ++i)
  169. tmp_section[i * (local_ncols + 2) + local_ncols + 1] = recvbuf[i];
  170. }
  171.  
  172. // SEND right
  173. for(int i = 0; i < local_nrows + 2; ++i)
  174. sendbuf[i] = tmp_section[i * (local_ncols + 2) + local_ncols];
  175.  
  176. MPI_Sendrecv(sendbuf, local_nrows + 2, MPI_FLOAT, right, 0,
  177. recvbuf, local_nrows + 2, MPI_FLOAT, left, 0, MPI_COMM_WORLD, &status);
  178.  
  179. if(rank != (size - 1 ))
  180. {
  181. for(int i = 0; i < local_nrows + 2; ++i)
  182. tmp_section[i * (local_ncols + 2)] = recvbuf[i];
  183. }
  184.  
  185. stencil(local_ncols, local_nrows, width, height, tmp_section, section);
  186.  
  187. }
  188. double toc = wtime();
  189.  
  190. // Gathering
  191.  
  192. for(int i = 1; i < local_nrows + 1; i++)
  193. {
  194. if(rank == MASTER)
  195. {
  196. for(int j = 1; j < local_ncols + 1 ; j++)
  197. {
  198. image[ (i * width) + j] = section[i * (local_ncols + 2) + j];
  199. }
  200.  
  201. for(int r = 1; r < size; r++)
  202. {
  203. int ncols = calc_ncols_from_rank(r, size, ny);
  204. // offset for each rank when storing back to image
  205. int offset = r * (ny / size) + 1;
  206.  
  207. MPI_Recv(&image[(i * width) + offset], local_ncols , MPI_FLOAT, r, 0, MPI_COMM_WORLD, &status);
  208. }
  209. }
  210. else
  211. {
  212. MPI_Send(&section[i * (local_ncols + 2) + 1], local_ncols , MPI_FLOAT, MASTER, 0, MPI_COMM_WORLD);
  213. }
  214. }
  215.  
  216. // Output if rank is MASTER
  217. if(rank == MASTER)
  218. {
  219. printf("------------------------------------\n");
  220. printf(" runtime: %lf s\n", toc - tic);
  221. printf("------------------------------------\n");
  222.  
  223. output_image(OUTPUT_FILE, nx, ny, width, height, image);
  224. }
  225.  
  226. MPI_Finalize();
  227.  
  228. free(image);
  229. free(tmp_image);
  230. free(sendbuf);
  231. free(recvbuf);
  232. free(section);
  233. free(tmp_section);
  234.  
  235. }
  236.  
  237. void stencil(const int local_ncols, const int local_nrows, const int width, const int height,
  238. float * restrict image, float * restrict tmp_image)
  239. {
  240. for (int i = 1; i < local_nrows + 1; ++i)
  241. {
  242. for (int j = 1; j < local_ncols + 1; ++j)
  243. {
  244. int cell = j + i * (local_ncols + 2);
  245. tmp_image[cell] = image[cell] * 0.6f + (image[cell - (local_ncols + 2)] + image[cell + (local_ncols + 2)] + image[cell - 1] + image[cell + 1]) * 0.1f;
  246. }
  247. }
  248. }
  249.  
  250. // Create the input image
  251. void init_image(const int nx, const int ny, const int width, const int height,
  252. float * restrict image, float * restrict tmp_image)
  253. {
  254. // Zero everything
  255. for (int j = 0; j < ny + 2; ++j) {
  256. for (int i = 0; i < nx + 2; ++i) {
  257. image[j + i * height] = 0.0;
  258. tmp_image[j + i * height] = 0.0;
  259. }
  260. }
  261.  
  262. const int tile_size = 64;
  263. // checkerboard pattern
  264. for (int jb = 0; jb < ny; jb += tile_size) {
  265. for (int ib = 0; ib < nx; ib += tile_size) {
  266. if ((ib + jb) % (tile_size * 2)) {
  267. const int jlim = (jb + tile_size > ny) ? ny : jb + tile_size;
  268. const int ilim = (ib + tile_size > nx) ? nx : ib + tile_size;
  269. for (int j = jb + 1; j < jlim + 1; ++j) {
  270. for (int i = ib + 1; i < ilim + 1; ++i) {
  271. image[j + i * height] = 100.0;
  272. }
  273. }
  274. }
  275. }
  276. }
  277. }
  278.  
  279. // Routine to output the image in Netpbm grayscale binary image format
  280. void output_image(const char* file_name, const int nx, const int ny,
  281. const int width, const int height, float * restrict image)
  282. {
  283. // Open output file
  284. FILE* fp = fopen(file_name, "w");
  285. if (!fp) {
  286. fprintf(stderr, "Error: Could not open %s\n", OUTPUT_FILE);
  287. exit(EXIT_FAILURE);
  288. }
  289.  
  290. // Ouptut image header
  291. fprintf(fp, "P5 %d %d 255\n", nx, ny);
  292.  
  293. // Calculate maximum value of image
  294. // This is used to rescale the values
  295. // to a range of 0-255 for output
  296. double maximum = 0.0;
  297. for (int j = 1; j < ny + 1; ++j) {
  298. for (int i = 1; i < nx + 1; ++i) {
  299. if (image[j + i * height] > maximum) maximum = image[j + i * height];
  300. }
  301. }
  302.  
  303. // Output image, converting to numbers 0-255
  304. for (int j = 1; j < ny + 1; ++j) {
  305. for (int i = 1; i < nx + 1; ++i) {
  306. fputc((char)(255.0 * image[j + i * height] / maximum), fp);
  307. }
  308. }
  309.  
  310. // Close the file
  311. fclose(fp);
  312. }
  313.  
  314. // Get the current time in seconds since the Epoch
  315. double wtime(void)
  316. {
  317. struct timeval tv;
  318. gettimeofday(&tv, NULL);
  319. return tv.tv_sec + tv.tv_usec * 1e-6;
  320. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement