#define WORKGROUPSIZE 64
kernel void MinMax( global float * input,
global float * output,
int sizex,
int sizey,
int sizez,
int size
)
{
int idx = get_global_id(0);
int idy = get_global_id(1);
int idz = get_global_id(2);
int idx_local = get_local_id(0);
int idy_local = get_local_id(1);
int idz_local = get_local_id(2);
int local_x = get_local_size(0);
int local_y = get_local_size(1);
int local_z = get_local_size(2);
int local_size = local_x*local_y*local_z;
int gid = idx + sizex*idy + sizex*sizey*idz;
int gid_local = idx_local + local_x*idy_local +local_x*local_y*idz_local;
int group_count = get_num_groups(0) * get_num_groups(1) * get_num_groups(2);
int groups_x = get_num_groups(0);
int groups_y = get_num_groups(1);
int groups_z = get_num_groups(2);
int groupid_x = get_group_id(0);
int groupid_y = get_group_id(1);
int groupid_z = get_group_id(2);
int group_id = groupid_x + groups_x*groupid_y + groups_y*groups_x*groupid_z;
bool isValid = true;
__local int count_val = 0;
count_val++;
if(idx > sizex) isValid = false;
if(idy > sizey) isValid = false;
if(idz > sizez) isValid = false;
if(isValid == true)
{
__local float Minvals[ WORKGROUPSIZE ];
__local float Maxvals[ WORKGROUPSIZE ];
Minvals[gid_local] = input[gid];
Maxvals[gid_local] = input[gid];
barrier(CLK_LOCAL_MEM_FENCE);
int offset = local_size/2;
//Lokal reduzieren
for(;gid_local<offset;offset/=2)
{
Minvals[gid_local] = fmin(Minvals[gid_local],Minvals[gid_local+offset]);
Maxvals[gid_local] = fmax(Maxvals[gid_local],Maxvals[gid_local+offset]);
}
barrier(CLK_LOCAL_MEM_FENCE);
//Global rueckschreiben
if(gid_local == 0)
{
//global Rueckschreiben
output[group_id*2] = Minvals[gid_local];
output[group_id*2 +1] = Maxvals[gid_local];
}
barrier(CLK_GLOBAL_MEM_FENCE);
//Global Reduzieren
if(gid==0)
{
float mint=256;
float maxt=0;
for(int i =0; i < group_count; i=i+2)
{
mint = fmin(mint,output[i]);
maxt = fmax(maxt,output[i+1]);
}
output[0] = mint;
output[1] = maxt;
}
}
}