Advertisement
Guest User

Untitled

a guest
Nov 16th, 2019
95
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 9.80 KB | None | 0 0
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <sys/time.h>
  4. #include "mpi.h"
  5. #include <string.h>
  6.  
  7. // Define output file name
  8. #define OUTPUT_FILE "karthik.pgm"
  9. #define MASTER 0
  10.  
  11. void stencil(const int local_nrows, const int local_ncols, const int width, const int height,
  12. float * restrict image, float * restrict tmp_image);
  13. void init_image(const int nx, const int ny, const int width, const int height,
  14. float * restrict image, float * restrict tmp_image);
  15. void output_image(const char* file_name, const int nx, const int ny,
  16. const int width, const int height, float * restrict image);
  17. double wtime(void);
  18.  
  19. int calc_ncols_from_rank(int rank, int size, int height)
  20. {
  21. int ncols;
  22.  
  23. ncols = height / size; /* integer division */
  24. if ((height % size) != 0) { /* if there is a remainder */
  25. if (rank == size - 1)
  26. ncols += height % size; /* add remainder to last rank */
  27. }
  28.  
  29. return ncols;
  30. }
  31.  
  32. int main(int argc, char* argv[])
  33. {
  34. // Check usage
  35. if (argc != 4) {
  36. fprintf(stderr, "Usage: %s nx ny niters\n", argv[0]);
  37. exit(EXIT_FAILURE);
  38. }
  39.  
  40. // Initiliase problem dimensions from command line arguments
  41. int nx = atoi(argv[1]);
  42. int ny = atoi(argv[2]);
  43. int niters = atoi(argv[3]);
  44.  
  45. // Adding MPI stuff
  46. int rank; /* 'rank' of process among it's cohort */
  47. int size; /* size of cohort, i.e. num processes started */
  48.  
  49. MPI_Status status; /* struct used by MPI_Recv */
  50. int left; /* the rank of the process to the left */
  51. int right; /* the rank of the process to the right */
  52. int local_nrows; /* number of rows apportioned to this rank */
  53. int local_ncols; /* number of columns apportioned to this rank */
  54.  
  55. float *sendbuf; /* buffer to hold values to send */
  56. float *recvbuf; /* buffer to hold received values */
  57.  
  58. sendbuf = (float*)malloc(sizeof(float) * (local_nrows + 2));
  59. recvbuf = (float*)malloc(sizeof(float) * (local_nrows + 2));
  60.  
  61. // we pad the outer edge of the image to avoid out of range address issues in
  62. // stencil
  63. int width = nx + 2;
  64. int height = ny + 2;
  65.  
  66. /* initialise our MPI environment */
  67. MPI_Init( &argc, &argv );
  68. MPI_Comm_size( MPI_COMM_WORLD, &size );
  69. MPI_Comm_rank( MPI_COMM_WORLD, &rank );
  70.  
  71. /*
  72. ** determine process ranks to the left and right of rank
  73. ** respecting periodic boundary conditions
  74. */
  75. left = (rank == MASTER) ? (rank + size - 1) : (rank - 1);
  76. right = (rank + 1) % size;
  77.  
  78. // Allocate the image
  79. float * restrict image = malloc(sizeof(float) * width * height);
  80. float * restrict tmp_image = malloc(sizeof(float) * width * height);
  81. // float * restrict result = malloc(sizeof(float) * width * height);
  82.  
  83. local_nrows = nx;
  84. local_ncols = calc_ncols_from_rank(rank, size, ny);
  85.  
  86. /* check whether the initialisation was successful */
  87. if ( local_ncols < 1 ) {
  88. fprintf(stderr,"Error: too many processes:- local_ncols < 1\n");
  89. MPI_Abort(MPI_COMM_WORLD,EXIT_FAILURE);
  90. }
  91.  
  92. int section_ncols = local_ncols + 2;
  93.  
  94. printf("section_nrows = %d, section_cols = %d for rank %d\n", local_nrows, section_ncols, rank);
  95.  
  96. float * restrict section = malloc(sizeof(float) * (local_nrows + 2) * section_ncols);
  97. float * restrict tmp_section = malloc(sizeof(float) * (local_nrows + 2) * section_ncols);
  98.  
  99. printf("Allocated space for section and tmp_section for rank = %d\n", rank);
  100.  
  101. // Set the input image
  102. init_image(nx, ny, width, height, image, tmp_image);
  103.  
  104. // Initialising the sections
  105. for(int i = 0; i < local_nrows + 2; i++) {
  106. for(int j = 0; j < local_ncols + 2; j++) {
  107. if (j > 0 && j < (local_ncols + 1) && i > 0 && i < (local_nrows + 1))
  108. {
  109. section[i * (local_ncols + 2) + j] = image[( i * width + j + (ny/size * rank) )];
  110. tmp_section[i * (local_ncols + 2) + j] = image[(ny/size * rank + i) * ((local_ncols + 2) + j)]; /* core cells */
  111. }
  112. else
  113. {
  114. section[i * (local_ncols + 2) + j] = 0.0f;
  115. tmp_section[i * (local_ncols + 2) + j] = 0.0f;
  116. }
  117. }
  118. }
  119.  
  120. // // Call the stencil kernel
  121. double tic = wtime();
  122.  
  123. for (int t = 0; t < niters; ++t) {
  124.  
  125. for(int i = 0; i < local_nrows + 2; ++i){
  126.  
  127. sendbuf[i] = section[i * (local_ncols + 2) + 1];
  128.  
  129. }
  130.  
  131. MPI_Sendrecv(sendbuf, local_nrows + 2, MPI_FLOAT, left, 0,
  132. recvbuf, local_nrows + 2, MPI_FLOAT, right, 0, MPI_COMM_WORLD, &status);
  133.  
  134. if(rank != size - 1)
  135. {
  136. for(int i = 0; i < local_nrows + 2; ++i)
  137. section[i * (local_ncols + 2) + local_ncols + 1] = recvbuf[i];
  138. }
  139.  
  140. // SEND right
  141. for(int i = 0; i < local_nrows + 2; ++i)
  142. sendbuf[i] = section[i * (local_ncols + 2) + local_ncols];
  143.  
  144. MPI_Sendrecv(sendbuf, local_nrows + 2, MPI_FLOAT, right, 0,
  145. recvbuf, local_nrows + 2, MPI_FLOAT, left, 0, MPI_COMM_WORLD, &status);
  146.  
  147. if(rank != MASTER)
  148. {
  149. for(int i = 0; i < local_nrows + 2; ++i)
  150. section[i * (local_ncols + 2)] = recvbuf[i];
  151. }
  152.  
  153. stencil(local_ncols, local_nrows, width, height, section, tmp_section);
  154.  
  155. for(int i = 0; i < local_nrows + 2; ++i)
  156. sendbuf[i] = tmp_section[i * (local_ncols + 2) + 1];
  157.  
  158. MPI_Sendrecv(sendbuf, local_nrows + 2, MPI_FLOAT, left, 0,
  159. recvbuf, local_nrows + 2, MPI_FLOAT, right, 0, MPI_COMM_WORLD, &status);
  160.  
  161. if(rank != size - 1)
  162. {
  163. for(int i = 0; i < local_nrows + 2; ++i)
  164. tmp_section[i * (local_ncols + 2) + local_ncols + 1] = recvbuf[i];
  165. }
  166.  
  167. // SEND right
  168. for(int i = 0; i < local_nrows + 2; ++i)
  169. sendbuf[i] = tmp_section[i * (local_ncols + 2) + local_ncols];
  170.  
  171. MPI_Sendrecv(sendbuf, local_nrows + 2, MPI_FLOAT, right, 0,
  172. recvbuf, local_nrows + 2, MPI_FLOAT, left, 0, MPI_COMM_WORLD, &status);
  173.  
  174. if(rank != MASTER)
  175. {
  176. for(int i = 0; i < local_nrows + 2; ++i)
  177. tmp_section[i * (local_ncols + 2)] = recvbuf[i];
  178. }
  179.  
  180. stencil(local_ncols, local_nrows, width, height, tmp_section, section);
  181. }
  182. double toc = wtime();
  183. //
  184. // Gathering
  185.  
  186. for(int i = 1; i < local_nrows + 1; i++)
  187. {
  188. if(rank == MASTER)
  189. {
  190. for(int j = 1; j < local_ncols + 1 ; j++)
  191. {
  192. image[ (i * width) + j] = section[i * (local_ncols + 2) + j];
  193. }
  194.  
  195. for(int r = 1; r < size; r++)
  196. {
  197. int ncols = calc_ncols_from_rank(r, size, ny);
  198. // offset for each rank when storing back to image
  199. int offset = r * (ny / size) + 1;
  200.  
  201. MPI_Recv(&image[(i * width) + offset], local_ncols , MPI_FLOAT, r, 0, MPI_COMM_WORLD, &status);
  202.  
  203. // MPI_Recv(recvbuf, ncols + 2, MPI_FLOAT, r, 0, MPI_COMM_WORLD, &status);
  204. // image[i * (ncols + 2)] = recvbuf[i];
  205. }
  206. }
  207. else
  208. {
  209. MPI_Send(&section[i * (local_ncols + 2) + 1], local_ncols , MPI_FLOAT, MASTER, 0, MPI_COMM_WORLD);
  210. }
  211. }
  212.  
  213. // Output if rank is MASTER
  214. if(rank == MASTER)
  215. {
  216. // printf("------------------------------------\n");
  217. // printf(" runtime: %lf s\n", toc - tic);
  218. // printf("------------------------------------\n");
  219.  
  220. output_image(OUTPUT_FILE, nx, ny, width, height, image);
  221. }
  222.  
  223. free(image);
  224. free(tmp_image);
  225. free(sendbuf);
  226. free(recvbuf);
  227. free(section);
  228. free(tmp_section);
  229.  
  230. MPI_Finalize();
  231. }
  232.  
  233. void stencil(const int local_ncols, const int local_nrows, const int width, const int height,
  234. float * restrict image, float * restrict tmp_image)
  235. {
  236. for (int i = 1; i < local_nrows + 1; ++i)
  237. {
  238. for (int j = 1; j < local_ncols + 1; ++j)
  239. {
  240. int cell = j + i * (local_ncols + 2);
  241. tmp_image[cell] = image[cell] * 0.6f + (image[cell - (local_ncols + 2)] + image[cell + (local_ncols + 2)] + image[cell - 1] + image[cell + 1]) * 0.1f;
  242. }
  243. }
  244. }
  245.  
  246. // Create the input image
  247. void init_image(const int nx, const int ny, const int width, const int height,
  248. float * restrict image, float * restrict tmp_image)
  249. {
  250. // Zero everything
  251. for (int j = 0; j < ny + 2; ++j) {
  252. for (int i = 0; i < nx + 2; ++i) {
  253. image[j + i * height] = 0.0;
  254. tmp_image[j + i * height] = 0.0;
  255. }
  256. }
  257.  
  258. const int tile_size = 64;
  259. // checkerboard pattern
  260. for (int jb = 0; jb < ny; jb += tile_size) {
  261. for (int ib = 0; ib < nx; ib += tile_size) {
  262. if ((ib + jb) % (tile_size * 2)) {
  263. const int jlim = (jb + tile_size > ny) ? ny : jb + tile_size;
  264. const int ilim = (ib + tile_size > nx) ? nx : ib + tile_size;
  265. for (int j = jb + 1; j < jlim + 1; ++j) {
  266. for (int i = ib + 1; i < ilim + 1; ++i) {
  267. image[j + i * height] = 100.0;
  268. }
  269. }
  270. }
  271. }
  272. }
  273. }
  274.  
  275. // Routine to output the image in Netpbm grayscale binary image format
  276. void output_image(const char* file_name, const int nx, const int ny,
  277. const int width, const int height, float * restrict image)
  278. {
  279. // Open output file
  280. FILE* fp = fopen(file_name, "w");
  281. if (!fp) {
  282. fprintf(stderr, "Error: Could not open %s\n", OUTPUT_FILE);
  283. exit(EXIT_FAILURE);
  284. }
  285.  
  286. // Ouptut image header
  287. fprintf(fp, "P5 %d %d 255\n", nx, ny);
  288.  
  289. // Calculate maximum value of image
  290. // This is used to rescale the values
  291. // to a range of 0-255 for output
  292. double maximum = 0.0;
  293. for (int j = 1; j < ny + 1; ++j) {
  294. for (int i = 1; i < nx + 1; ++i) {
  295. if (image[j + i * height] > maximum) maximum = image[j + i * height];
  296. }
  297. }
  298.  
  299. // Output image, converting to numbers 0-255
  300. for (int j = 1; j < ny + 1; ++j) {
  301. for (int i = 1; i < nx + 1; ++i) {
  302. fputc((char)(255.0 * image[j + i * height] / maximum), fp);
  303. }
  304. }
  305.  
  306. // Close the file
  307. fclose(fp);
  308. }
  309.  
  310. // Get the current time in seconds since the Epoch
  311. double wtime(void)
  312. {
  313. struct timeval tv;
  314. gettimeofday(&tv, NULL);
  315. return tv.tv_sec + tv.tv_usec * 1e-6;
  316. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement