Advertisement
Guest User

MinMax-Kernel

a guest
Feb 11th, 2013
91
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 2.45 KB | None | 0 0
  1. #define WORKGROUPSIZE 64
  2. kernel void MinMax( global float * input,
  3.             global float * output,
  4.                         int sizex, //size in x
  5.             int sizey, //size in y
  6.             int sizez, //size in z
  7.             int size //global size x*y*z
  8.                     )
  9. {
  10.     //get global ids
  11.         int idx = get_global_id(0);
  12.         int idy = get_global_id(1);
  13.         int idz = get_global_id(2);
  14.    
  15.     //get local ids
  16.         int idx_local = get_local_id(0);
  17.         int idy_local = get_local_id(1);
  18.         int idz_local = get_local_id(2);
  19.     //get sizes of workgroup    
  20.         int local_x = get_local_size(0);
  21.         int local_y = get_local_size(1);
  22.         int local_z = get_local_size(2);
  23.     //calculate size of workgroup
  24.         int local_size = local_x*local_y*local_z;
  25.     //because array is saved as 1D, recalculate index
  26.         int gid = idx + sizex*idy + sizex*sizey*idz;
  27.     //same for work-group
  28.         int gid_local = idx_local + local_x*idy_local +local_x*local_y*idz_local;
  29.     //get work-group count
  30.         int group_count = get_num_groups(0) * get_num_groups(1) * get_num_groups(2);
  31.    
  32.     //local memory init
  33.     //maybe I should put that in the arguments
  34.         __local float Minvals[ WORKGROUPSIZE ];
  35.         __local float Maxvals[ WORKGROUPSIZE ];
  36.  
  37.     //make dirty memory undirty^^
  38.     if(gid_local==0)
  39.     {
  40.         for(int i =0; i<WORKGROUPSIZE;i++)
  41.         {
  42.             Minvals[i] = MAXFLOAT;
  43.             Maxvals[i] = 0;
  44.         }
  45.     }
  46.    
  47.     barrier(CLK_LOCAL_MEM_FENCE);
  48.  
  49.     //copy from global to local
  50.     if(gid < size)
  51.     {  
  52.         Minvals[gid_local] = input[gid];
  53.         Maxvals[gid_local] = input[gid];
  54.     }
  55.  
  56.     barrier(CLK_LOCAL_MEM_FENCE);
  57.         //local parallel reduction
  58.     for(int offset = local_size/2;offset>0;offset/=2)
  59.     {
  60.         barrier(CLK_LOCAL_MEM_FENCE);
  61.         if(gid_local<offset)
  62.         {
  63.             Minvals[gid_local] = fmin(Minvals[gid_local],Minvals[gid_local+offset]);
  64.             Maxvals[gid_local] = fmax(Maxvals[gid_local],Maxvals[gid_local+offset]);
  65.         }
  66.  
  67.     }
  68.  
  69.     barrier(CLK_GLOBAL_MEM_FENCE);
  70.     //Global write back
  71.     if(gid_local == 0)
  72.     {
  73.         output[group_id*2] =  Minvals[gid_local];
  74.         output[group_id*2 +1] = Maxvals[gid_local];  
  75.     }
  76.  
  77.     barrier(CLK_GLOBAL_MEM_FENCE);
  78.     //global reduction
  79.     if(gid==0)
  80.     {
  81.         float mint=256;
  82.         float maxt=0;
  83.         for(int i =0; i < group_count; i=i+2)
  84.         {
  85.             mint     = fmin(mint,output[i]);
  86.             maxt     = fmax(maxt,output[i+1]);
  87.            
  88.         }  
  89.         //write to output -> end
  90.         output[0] = mint;
  91.         output[1] = maxt;
  92.     }
  93.  
  94. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement