Guest User

Untitled

a guest
Dec 9th, 2019
98
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <sys/time.h>
  4. #include "mpi.h"
  5. #include <math.h>
  6.  
  7. // Define output file name
  8. #define OUTPUT_FILE "stencil.pgm"
  9. #define MASTER 0
  10.  
  11. void stencil(const int local_nrows, const int local_ncols, const int width, const int height,
  12. float* image, float* tmp_image);
  13. void init_image(const int nx, const int ny, const int width, const int height,
  14. float* image, float* tmp_image);
  15. void output_image(const char* file_name, const int nx, const int ny,
  16. const int width, const int height, float* image);
  17. double wtime(void);
  18.  
  19. int calc_nrows(int rank, int size, int nx);
  20.  
  21. int main(int argc, char* argv[])
  22. {
  23.  
  24. // Check usage
  25. if (argc != 4) {
  26. fprintf(stderr, "Usage: %s nx ny niters\n", argv[0]);
  27. exit(EXIT_FAILURE);
  28. }
  29.  
  30. // Initiliase problem dimensions from command line arguments
  31. int nx = atoi(argv[1]);
  32. int ny = atoi(argv[2]);
  33. int niters = atoi(argv[3]);
  34.  
  35. int rank; /* the rank of this process */
  36. int up; /* the rank of the process to the left */
  37. int down; /* the rank of the process to the right */
  38. int size; /* number of processes in the communicator */
  39. int tag = 0; /* scope for adding extra information to a message */
  40. MPI_Status status; /* struct used by MPI_Recv */
  41. int local_nrows; /* number of rows apportioned to this rank */
  42. int local_ncols; /* number of columns apportioned to this rank */
  43.  
  44. // we pad the outer edge of the image to avoid out of range address issues in
  45. // stencil
  46. int width = nx + 2;
  47. int height = ny + 2;
  48.  
  49. /*
  50. ** MPI_Init returns once it has started up processes
  51. ** Get size of cohort and rank for this process
  52. */
  53. MPI_Init( &argc, &argv );
  54. MPI_Comm_size( MPI_COMM_WORLD, &size );
  55. MPI_Comm_rank( MPI_COMM_WORLD, &rank );
  56.  
  57. /*
  58. ** determine process ranks to the left and right of rank
  59. ** respecting periodic boundary conditions
  60. */
  61. up = (rank == MASTER) ? (rank + size - 1) : (rank - 1);
  62. down = (rank + 1) % size;
  63.  
  64. if(rank == MASTER) up = MPI_PROC_NULL;
  65. if(rank == size - 1) down = MPI_PROC_NULL;
  66.  
  67. /*
  68. ** determine local grid size
  69. ** each rank gets all the rows, but a subset of the number of columns
  70. */
  71. local_nrows = calc_nrows(rank, size, nx);
  72. local_ncols = ny;
  73. if (local_nrows < 1) {
  74. fprintf(stderr,"Error: too many processes:- local_ncols < 1\n");
  75. MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
  76. }
  77.  
  78. // Allocate the image
  79. float* image = malloc(sizeof(float) * width * height);
  80. float* tmp_image = malloc(sizeof(float) * width * height);
  81.  
  82. // Set the input image
  83. init_image(nx, ny, width, height, image, tmp_image);
  84.  
  85. // Allocate local image
  86. float* local = malloc(sizeof(float) * (local_nrows + 2) * (local_ncols + 2));
  87. float* tmp_local = malloc(sizeof(float) * (local_nrows + 2) * (local_ncols + 2));
  88.  
  89. //Initialise local tmp_image
  90. int step = floor(nx/size);
  91. for (int i = 0; i < local_nrows + 2; i++) {
  92. for (int j = 0; j < local_ncols + 2; j++) {
  93. if (j > 0 && j < (local_ncols + 1) && i > 0 && i < (local_nrows + 1)) {
  94. local[i * (local_ncols + 2) + j] = image[(j + i * width + (step * rank * width))];
  95. tmp_local[i * (local_ncols + 2) + j] = image[(j + i * width + (step * rank * width))];
  96. } /* core cells */
  97. else {
  98. local[i * (local_ncols + 2) + j] = 0.0f;
  99. tmp_local[i * (local_ncols + 2) + j] = 0.0f;
  100. } /* halo cells */
  101. }
  102. }
  103.  
  104. // Call the stencil kernel
  105. double tic = wtime();
  106. for (int t = 0; t < niters; ++t) {
  107.  
  108. //Halo exchange for local
  109. // Sending to up first then receive to the down
  110. MPI_Sendrecv(&local[(local_ncols + 2) + 1], local_ncols, MPI_FLOAT, up, 0, &local[(local_nrows + 1) * (local_ncols + 2) + 1], local_ncols, MPI_FLOAT, down, 0, MPI_COMM_WORLD, &status);
  111.  
  112. // Send to down then receive from up
  113. MPI_Sendrecv(&local[local_nrows * (local_ncols + 2) + 1], local_ncols, MPI_FLOAT, down, 0, &local[1], local_ncols, MPI_FLOAT, up, 0, MPI_COMM_WORLD, &status);
  114.  
  115. stencil(local_nrows, local_ncols, width, height, local, tmp_local);
  116.  
  117. //Halo exchange for temp
  118. // Sending to up first then receive to the down
  119. MPI_Sendrecv(&tmp_local[(local_ncols + 2) + 1], local_ncols, MPI_FLOAT, up, 0, &tmp_local[(local_nrows + 1) * (local_ncols + 2) + 1], local_ncols, MPI_FLOAT, down, 0, MPI_COMM_WORLD, &status);
  120.  
  121. // Send to down then receive from up
  122. MPI_Sendrecv(&tmp_local[local_nrows * (local_ncols + 2) + 1], local_ncols, MPI_FLOAT, down, 0, &tmp_local[1], local_ncols, MPI_FLOAT, up, 0, MPI_COMM_WORLD, &status);
  123.  
  124. stencil(local_nrows, local_ncols, width, height, tmp_local, local);
  125. }
  126. double toc = wtime();
  127.  
  128. printf("a\n");
  129.  
  130. if (rank == MASTER) {
  131. for (int i = 1; i < local_nrows + 1; i++) {
  132. for (int j = 1; j < local_ncols + 1; j++) {
  133. image[j + (i * width)] = local[j + i * (local_ncols + 2)];
  134. }
  135. }
  136. for (int r = 1; r < size; r++) {
  137. int offset = r * local_nrows;
  138. int nrows = calc_nrows(r, size, nx);
  139. for (int i = 1; i < nrows + 1; i++) {
  140. MPI_Recv(&image[(offset + i) * width + 1], local_ncols, MPI_FLOAT, r, tag, MPI_COMM_WORLD, &status);
  141. }
  142. }
  143. }
  144.  
  145. else {
  146. for (int i = 1; i < local_nrows + 1; i++) {
  147. MPI_Send(&local[i * (local_ncols + 2) + 1], local_ncols, MPI_FLOAT, MASTER, tag, MPI_COMM_WORLD);
  148. }
  149. }
  150.  
  151. if (rank == MASTER) {
  152. // Output
  153. printf("------------------------------------\n");
  154. printf(" runtime: %lf s\n", toc - tic);
  155. printf("------------------------------------\n");
  156.  
  157. output_image(OUTPUT_FILE, nx, ny, width, height, image);
  158. }
  159.  
  160. MPI_Finalize();
  161.  
  162. free(image);
  163. free(tmp_image);
  164. free(local);
  165. free(tmp_local);
  166.  
  167. }
  168.  
  169. void stencil(const int local_nrows, const int local_ncols, const int width, const int height,
  170. float* image, float* tmp_image)
  171. {
  172. for (int i = 1; i < local_nrows + 1; ++i) {
  173. for (int j = 1; j < local_ncols + 1; ++j) {
  174. int temp = j + i * (local_ncols + 2);
  175. tmp_image[temp] = image[temp] * 0.6f + 0.1f * (image[temp - (local_ncols + 2)] + image[temp + (local_ncols + 2)] + image[temp - 1] + image[temp + 1]);
  176. }
  177. }
  178. }
  179.  
  180. // Create the input image
  181. void init_image(const int nx, const int ny, const int width, const int height,
  182. float* image, float* tmp_image)
  183. {
  184. // Zero everything
  185. for (int j = 0; j < ny + 2; ++j) {
  186. for (int i = 0; i < nx + 2; ++i) {
  187. image[j + i * height] = 0.0;
  188. tmp_image[j + i * height] = 0.0;
  189. }
  190. }
  191.  
  192. const int tile_size = 64;
  193. // checkerboard pattern
  194. for (int jb = 0; jb < ny; jb += tile_size) {
  195. for (int ib = 0; ib < nx; ib += tile_size) {
  196. if ((ib + jb) % (tile_size * 2)) {
  197. const int jlim = (jb + tile_size > ny) ? ny : jb + tile_size;
  198. const int ilim = (ib + tile_size > nx) ? nx : ib + tile_size;
  199. for (int j = jb + 1; j < jlim + 1; ++j) {
  200. for (int i = ib + 1; i < ilim + 1; ++i) {
  201. image[j + i * height] = 100.0;
  202. }
  203. }
  204. }
  205. }
  206. }
  207. }
  208.  
  209. // Routine to output the image in Netpbm grayscale binary image format
  210. void output_image(const char* file_name, const int nx, const int ny,
  211. const int width, const int height, float* image)
  212. {
  213. // Open output file
  214. FILE* fp = fopen(file_name, "w");
  215. if (!fp) {
  216. fprintf(stderr, "Error: Could not open %s\n", OUTPUT_FILE);
  217. exit(EXIT_FAILURE);
  218. }
  219.  
  220. // Ouptut image header
  221. fprintf(fp, "P5 %d %d 255\n", nx, ny);
  222.  
  223. // Calculate maximum value of image
  224. // This is used to rescale the values
  225. // to a range of 0-255 for output
  226. double maximum = 0.0;
  227. for (int j = 1; j < ny + 1; ++j) {
  228. for (int i = 1; i < nx + 1; ++i) {
  229. if (image[j + i * height] > maximum) maximum = image[j + i * height];
  230. }
  231. }
  232.  
  233. // Output image, converting to numbers 0-255
  234. for (int j = 1; j < ny + 1; ++j) {
  235. for (int i = 1; i < nx + 1; ++i) {
  236. fputc((char)(255.0 * image[j + i * height] / maximum), fp);
  237. }
  238. }
  239.  
  240. // Close the file
  241. fclose(fp);
  242. }
  243.  
  244. // Get the current time in seconds since the Epoch
  245. double wtime(void)
  246. {
  247. struct timeval tv;
  248. gettimeofday(&tv, NULL);
  249. return tv.tv_sec + tv.tv_usec * 1e-6;
  250. }
  251.  
  252. int calc_nrows(int rank, int size, int nx)
  253. {
  254. int nrows;
  255.  
  256. nrows = nx / size; /* integer division */
  257. if ((nx % size) != 0) { /* if there is a remainder */
  258. if (rank == size - 1)
  259. nrows += nx % size; /* add remainder to last rank */
  260. }
  261.  
  262. return nrows;
  263.  
  264. }
RAW Paste Data