Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #define WIDTH 1470 // width/2
- #define HEIGHT 1008 // height/2
- #define BLOCKX 16
- #define BLOCKY 16
- #define MAXD 130
- #define THRESHOLD 15
- //TODO: muuta kernel koodi käyttämään local id:tä (laskenta tapahtuu groupin sisällä)
- //JOS HALUAA TEHDÄ POST PROCESSOINNIN SAMASSA KERNELISSÄ, NIIN EI OLE MAHDOLLISTA SYNKATA THREADEJA GLOBAALISTI
- //VAAN TÄYTYY JAKAA SUORITUS WORK-GROUPPEIHIN, JA SILLOIN THREADEJEN SYNKKAUS ONNISTUU GROUPIN SISÄLLÄ KÄYTTÄMÄLLÄ BARRIERIA
- //---> pitää valita group size (local id ranget) siten, että post processing onnistuu groupin sisällä
- //esim:
- #define BLOCK_WIDTH 260 //MAXD*2
- #define BLOCK_HEIGHT 260 //MAXD*2
- __kernel void ssd_process(const __global unsigned char *pic_left, const __global unsigned char *pic_right,
- __global unsigned char *tmp1_disp_picL, __global unsigned char *tmp1_disp_picR,
- __global unsigned char *tmp2_disp_picL, __global unsigned char *tmp2_disp_picR,
- __global unsigned char *rslt_disp_picL, __global unsigned char *rslt_disp_picR)
- {
- unsigned int min_blocksumL = 100000000;
- unsigned int min_blocksumR = 100000000;
- unsigned int curr_blocksumL = 0;
- unsigned int curr_blocksumR = 0;
- unsigned int ssd_val;
- unsigned int cond;
- unsigned char i, j, d;
- unsigned char best_dL, best_dR;
- unsigned char left, right;
- char diff_true, diff_false;
- int index;
- //size_t idx = get_global_id(0);
- //size_t idy = get_global_id(1);
- size_t idx = get_local_id(0);
- size_t idy = get_local_id(1);
- size_t gx = get_group_id(0);
- size_t gy = get_group_id(1);
- for(d=0; d<=MAXD; d++) {
- for(j=0; j<BLOCKY; j++) {
- for(i=0; i<BLOCKX; i++) {
- index = (idy+j)*WIDTH+idx;
- //disp0 (left to right)
- cond = -(idx+i-d > 0); //check that row index is inside limits
- left = pic_left[index+i];
- diff_true = left - pic_right[index+i-d];
- diff_false = left - pic_right[index-idx]; //take first element of pic_right
- ssd_val = (cond & diff_true*diff_true) | ((~cond) & diff_false*diff_false);
- curr_blocksumL += ssd_val;
- //disp1 (right to left)
- cond = -(idx+i+d < WIDTH); //check that row index is inside limits
- right = pic_right[index+i];
- diff_true = right - pic_left[index+i+d];
- diff_false = right - pic_left[index-idx+WIDTH-1]; //take last element of pic_left
- ssd_val = (cond & diff_true*diff_true) | ((~cond) & diff_false*diff_false);
- curr_blocksumR += ssd_val;
- }
- }
- //disp0 (left to right)
- cond = -(curr_blocksumL < min_blocksumL);
- min_blocksumL = (cond & curr_blocksumL) | ((~cond) & min_blocksumL);
- best_dL = (cond & d) | ((~cond) & best_dL);
- curr_blocksumL = 0;
- //disp1 (right to left)
- cond = -(curr_blocksumR < min_blocksumR);
- min_blocksumR = (cond & curr_blocksumR) | ((~cond) & min_blocksumR);
- best_dR = (cond & d) | ((~cond) & best_dR);
- curr_blocksumR = 0;
- }
- index = idy*WIDTH+idx;
- tmp1_disp_picL[index] = best_dL;
- tmp1_disp_picR[index] = best_dR;
- min_blocksumL = 100000000;
- min_blocksumR = 100000000;
- //POST PROCESSING
- barrier(CLK_GLOBAL_MEM_FENCE); //all threads inside the group are finished here (all global memory reads/writes are done)
- //cross check left to right
- left = tmp1_disp_picL[index];
- //if(index-left >= 0) { //is inside the block
- right = tmp1_disp_picR[index-left];
- cond = -(abs(left - right) > THRESHOLD);
- tmp2_disp_picL[index] = ((cond & 0) | ((~cond) & left))/(float)MAXD*255;
- //}
- //cross check right to left
- right = tmp1_disp_picR[index];
- //if(index+right < BLOCK_SIZE) { //is inside the block
- left = tmp1_disp_picL[index+right];
- cond = -(abs(right - left) > THRESHOLD);
- tmp2_disp_picR[index] = ((cond & 0) | ((~cond) & right))/(float)MAXD*255;
- //}
- barrier(CLK_GLOBAL_MEM_FENCE); //all threads inside the group are finished here (all global memory reads/writes are done)
- //occlusion filling
- int k;
- char c = 0;
- //palikkaversio, kokeillaan ensin toimiiko tämä (pitäs tulla sama kuva mitä C-toteutuksella)
- //(tai itteasiassa parempi, ku siinä ei breikattu for-looppia, niin tuloskuva sai aina viimeisimmän non-zero pixelin arvon)
- if((tmp2_disp_picL[index] == 0) && (tmp1_disp_picL[index] != 0)) { //if pixel was set to zero in cross-checking
- //first try to find non-zero pixel from earlier pixels
- for(k=idx; k>=0; k--) {
- if(tmp2_disp_picL[index-idx+k] > 0) {
- //rslt_disp_picL[index] = tmp2_disp_picL[idy*WIDTH+k];
- c = 1;
- break;
- }
- }
- //if non-zero pixel was not found from earlier pixels, try to find from next pixels
- if(c == 0) {
- for(k=idx; k<BLOCK_WIDTH; k++) {
- if(tmp2_disp_picL[index-idx+k] > 0) {
- //rslt_disp_picL[index] = tmp2_disp_picL[idy*WIDTH+k];
- break;
- }
- }
- }
- rslt_disp_picL[index] = tmp2_disp_picL[index-idx+k];
- } else {
- rslt_disp_picL[index] = tmp2_disp_picL[index];
- }
- c = 0;
- if((tmp2_disp_picR[index] == 0) && (tmp1_disp_picR[index] != 0)) { //if pixel was set to zero in cross-checking
- //first try to find non-zero pixel from earlier pixels
- for(k=idx; k>=0; k--) {
- if(tmp2_disp_picR[index-idx+k] > 0) {
- //rslt_disp_picR[index] = tmp2_disp_picR[idy*WIDTH+k];
- c = 1;
- break;
- }
- }
- //if non-zero pixel was not found from earlier pixels, try to find from next pixels
- if(c == 0) {
- for(k=idx; k< BLOCK_WIDTH; k++) {
- if(tmp2_disp_picR[index-idx+k] > 0) {
- //rslt_disp_picR[index] = tmp2_disp_picR[idy*WIDTH+k];
- break:
- }
- }
- }
- rslt_disp_picR[index] = tmp2_disp_picR[index-idx+k];
- } else {
- rslt_disp_picR[index] = tmp2_disp_picR[index];
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement