SHARE
TWEET

Untitled

a guest Dec 9th, 2019 86 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #include <stdio.h>
  2. #include <stdlib.h>
  3. #include <sys/time.h>
  4. #include "mpi.h"
  5. #include <math.h>
  6.  
  7. // Define output file name
  8. #define OUTPUT_FILE "stencil.pgm"
  9. #define MASTER 0
  10.  
  11. void stencil(const int local_nrows, const int local_ncols, const int width, const int height,
  12.              float* image, float* tmp_image);
  13. void init_image(const int nx, const int ny, const int width, const int height,
  14.                 float* image, float* tmp_image);
  15. void output_image(const char* file_name, const int nx, const int ny,
  16.                   const int width, const int height, float* image);
  17. double  wtime(void);
  18.  
  19. int calc_nrows(int rank, int size, int nx);
  20.  
  21. int main(int argc, char* argv[])
  22. {
  23.  
  24.   // Check usage
  25.   if (argc != 4) {
  26.     fprintf(stderr, "Usage: %s nx ny niters\n", argv[0]);
  27.     exit(EXIT_FAILURE);
  28.   }
  29.  
  30.   // Initiliase problem dimensions from command line arguments
  31.   int nx = atoi(argv[1]);
  32.   int ny = atoi(argv[2]);
  33.   int niters = atoi(argv[3]);
  34.  
  35.   int rank;              /* the rank of this process */
  36.   int up;              /* the rank of the process to the left */
  37.   int down;             /* the rank of the process to the right */
  38.   int size;              /* number of processes in the communicator */
  39.   int tag = 0;           /* scope for adding extra information to a message */
  40.   MPI_Status status;     /* struct used by MPI_Recv */
  41.   int local_nrows;       /* number of rows apportioned to this rank */
  42.   int local_ncols;       /* number of columns apportioned to this rank */
  43.  
  44.   // we pad the outer edge of the image to avoid out of range address issues in
  45.   // stencil
  46.   int width = nx + 2;
  47.   int height = ny + 2;
  48.  
  49.   /*
  50.   ** MPI_Init returns once it has started up processes
  51.   ** Get size of cohort and rank for this process
  52.   */
  53.   MPI_Init( &argc, &argv );
  54.   MPI_Comm_size( MPI_COMM_WORLD, &size );
  55.   MPI_Comm_rank( MPI_COMM_WORLD, &rank );
  56.  
  57.   /*
  58.   ** determine process ranks to the left and right of rank
  59.   ** respecting periodic boundary conditions
  60.   */
  61.   up = (rank == MASTER) ? (rank + size - 1) : (rank - 1);
  62.   down = (rank + 1) % size;
  63.  
  64.   if(rank == MASTER) up = MPI_PROC_NULL;
  65.   if(rank == size - 1) down = MPI_PROC_NULL;
  66.  
  67.   /*
  68.   ** determine local grid size
  69.   ** each rank gets all the rows, but a subset of the number of columns
  70.   */
  71.   local_nrows = calc_nrows(rank, size, nx);
  72.   local_ncols = ny;
  73.   if (local_nrows < 1) {
  74.     fprintf(stderr,"Error: too many processes:- local_ncols < 1\n");
  75.     MPI_Abort(MPI_COMM_WORLD, EXIT_FAILURE);
  76.   }
  77.  
  78.   // Allocate the image
  79.   float* image = malloc(sizeof(float) * width * height);
  80.   float* tmp_image = malloc(sizeof(float) * width * height);
  81.  
  82.   // Set the input image
  83.   init_image(nx, ny, width, height, image, tmp_image);
  84.  
  85.   // Allocate local image
  86.   float* local = malloc(sizeof(float) * (local_nrows + 2) * (local_ncols + 2));
  87.   float* tmp_local = malloc(sizeof(float) * (local_nrows + 2) * (local_ncols + 2));
  88.  
  89.   //Initialise local tmp_image
  90.   int step = floor(nx/size);
  91.   for (int i = 0; i < local_nrows + 2; i++) {
  92.     for (int j = 0; j < local_ncols + 2; j++) {
  93.       if (j > 0 && j < (local_ncols + 1) && i > 0 && i < (local_nrows + 1)) {
  94.            local[i * (local_ncols + 2) + j] = image[(j + i * width + (step * rank * width))];
  95.          tmp_local[i * (local_ncols + 2) + j] = image[(j + i * width + (step * rank * width))];
  96.        }               /* core cells */
  97.       else {
  98.            local[i * (local_ncols + 2) + j] = 0.0f;
  99.          tmp_local[i * (local_ncols + 2) + j] = 0.0f;
  100.        }                      /* halo cells */
  101.     }
  102.   }
  103.  
  104.   // Call the stencil kernel
  105.   double tic = wtime();
  106.   for (int t = 0; t < niters; ++t) {
  107.  
  108.     //Halo exchange for local
  109.     // Sending to up first then receive to the down
  110.     MPI_Sendrecv(&local[(local_ncols + 2) + 1], local_ncols, MPI_FLOAT, up, 0, &local[(local_nrows + 1) * (local_ncols + 2) + 1], local_ncols, MPI_FLOAT, down, 0, MPI_COMM_WORLD, &status);
  111.  
  112.     // Send to down then receive from up
  113.     MPI_Sendrecv(&local[local_nrows * (local_ncols + 2) + 1], local_ncols, MPI_FLOAT, down, 0, &local[1], local_ncols, MPI_FLOAT, up, 0, MPI_COMM_WORLD, &status);
  114.    
  115.     stencil(local_nrows, local_ncols, width, height, local, tmp_local);
  116.  
  117.     //Halo exchange for temp
  118.     // Sending to up first then receive to the down
  119.     MPI_Sendrecv(&tmp_local[(local_ncols + 2) + 1], local_ncols, MPI_FLOAT, up, 0, &tmp_local[(local_nrows + 1) * (local_ncols + 2) + 1], local_ncols, MPI_FLOAT, down, 0, MPI_COMM_WORLD, &status);
  120.  
  121.     // Send to down then receive from up
  122.     MPI_Sendrecv(&tmp_local[local_nrows * (local_ncols + 2) + 1], local_ncols, MPI_FLOAT, down, 0, &tmp_local[1], local_ncols, MPI_FLOAT, up, 0, MPI_COMM_WORLD, &status);
  123.    
  124.     stencil(local_nrows, local_ncols, width, height, tmp_local, local);
  125.   }
  126.   double toc = wtime();
  127.  
  128.   printf("a\n");
  129.  
  130.   if (rank == MASTER) {
  131.     for (int i = 1; i < local_nrows + 1; i++) {
  132.       for (int j = 1; j < local_ncols + 1; j++) {
  133.         image[j + (i * width)] = local[j + i * (local_ncols + 2)];
  134.       }
  135.     }
  136.     for (int r = 1; r < size; r++) {
  137.       int offset = r * local_nrows;
  138.       int nrows = calc_nrows(r, size, nx);
  139.       for (int i = 1; i < nrows + 1; i++) {
  140.         MPI_Recv(&image[(offset + i) * width + 1], local_ncols, MPI_FLOAT, r, tag, MPI_COMM_WORLD, &status);
  141.       }
  142.     }
  143.   }
  144.  
  145.   else {
  146.     for (int i = 1; i < local_nrows + 1; i++) {
  147.       MPI_Send(&local[i * (local_ncols + 2) + 1], local_ncols, MPI_FLOAT, MASTER, tag, MPI_COMM_WORLD);
  148.     }
  149.   }
  150.  
  151.   if (rank == MASTER) {
  152.     // Output
  153.     printf("------------------------------------\n");
  154.     printf(" runtime: %lf s\n", toc - tic);
  155.     printf("------------------------------------\n");
  156.  
  157.     output_image(OUTPUT_FILE, nx, ny, width, height, image);
  158.   }
  159.  
  160.   MPI_Finalize();
  161.  
  162.   free(image);
  163.   free(tmp_image);
  164.   free(local);
  165.   free(tmp_local);
  166.  
  167. }
  168.  
  169. void stencil(const int local_nrows, const int local_ncols, const int width, const int height,
  170.              float* image, float* tmp_image)
  171. {
  172.   for (int i = 1; i < local_nrows + 1; ++i) {
  173.     for (int j = 1; j < local_ncols + 1; ++j) {
  174.          int temp = j + i * (local_ncols + 2);
  175.        tmp_image[temp] = image[temp] * 0.6f + 0.1f * (image[temp - (local_ncols + 2)] + image[temp + (local_ncols + 2)] + image[temp - 1] + image[temp + 1]);
  176.     }
  177.   }
  178. }
  179.  
  180. // Create the input image
  181. void init_image(const int nx, const int ny, const int width, const int height,
  182.                 float* image, float* tmp_image)
  183. {
  184.   // Zero everything
  185.   for (int j = 0; j < ny + 2; ++j) {
  186.     for (int i = 0; i < nx + 2; ++i) {
  187.       image[j + i * height] = 0.0;
  188.       tmp_image[j + i * height] = 0.0;
  189.     }
  190.   }
  191.  
  192.   const int tile_size = 64;
  193.   // checkerboard pattern
  194.   for (int jb = 0; jb < ny; jb += tile_size) {
  195.     for (int ib = 0; ib < nx; ib += tile_size) {
  196.       if ((ib + jb) % (tile_size * 2)) {
  197.         const int jlim = (jb + tile_size > ny) ? ny : jb + tile_size;
  198.         const int ilim = (ib + tile_size > nx) ? nx : ib + tile_size;
  199.         for (int j = jb + 1; j < jlim + 1; ++j) {
  200.           for (int i = ib + 1; i < ilim + 1; ++i) {
  201.             image[j + i * height] = 100.0;
  202.           }
  203.         }
  204.       }
  205.     }
  206.   }
  207. }
  208.  
  209. // Routine to output the image in Netpbm grayscale binary image format
  210. void output_image(const char* file_name, const int nx, const int ny,
  211.                   const int width, const int height, float* image)
  212. {
  213.   // Open output file
  214.   FILE* fp = fopen(file_name, "w");
  215.   if (!fp) {
  216.     fprintf(stderr, "Error: Could not open %s\n", OUTPUT_FILE);
  217.     exit(EXIT_FAILURE);
  218.   }
  219.  
  220.   // Ouptut image header
  221.   fprintf(fp, "P5 %d %d 255\n", nx, ny);
  222.  
  223.   // Calculate maximum value of image
  224.   // This is used to rescale the values
  225.   // to a range of 0-255 for output
  226.   double maximum = 0.0;
  227.   for (int j = 1; j < ny + 1; ++j) {
  228.     for (int i = 1; i < nx + 1; ++i) {
  229.       if (image[j + i * height] > maximum) maximum = image[j + i * height];
  230.     }
  231.   }
  232.  
  233.   // Output image, converting to numbers 0-255
  234.   for (int j = 1; j < ny + 1; ++j) {
  235.     for (int i = 1; i < nx + 1; ++i) {
  236.       fputc((char)(255.0 * image[j + i * height] / maximum), fp);
  237.     }
  238.   }
  239.  
  240.   // Close the file
  241.   fclose(fp);
  242. }
  243.  
  244. // Get the current time in seconds since the Epoch
  245. double wtime(void)
  246. {
  247.   struct timeval tv;
  248.   gettimeofday(&tv, NULL);
  249.   return tv.tv_sec + tv.tv_usec * 1e-6;
  250. }
  251.  
  252. int calc_nrows(int rank, int size, int nx)
  253. {
  254.   int nrows;
  255.  
  256.   nrows = nx / size;       /* integer division */
  257.   if ((nx % size) != 0) {  /* if there is a remainder */
  258.     if (rank == size - 1)
  259.       nrows += nx % size;  /* add remainder to last rank */
  260.   }
  261.  
  262.   return nrows;
  263.  
  264. }
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top