Advertisement
Guest User

ssd_kernel.cl

a guest
Nov 28th, 2015
70
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 6.35 KB | None | 0 0
  1. #define WIDTH 1470 // width/2
  2. #define HEIGHT 1008 // height/2
  3. #define BLOCKX 16
  4. #define BLOCKY 16
  5. #define MAXD 130
  6. #define THRESHOLD 15
  7.  
  8. //TODO: muuta kernel koodi käyttämään local id:tä (laskenta tapahtuu groupin sisällä)
  9.  
  10. //JOS HALUAA TEHDÄ POST PROCESSOINNIN SAMASSA KERNELISSÄ, NIIN EI OLE MAHDOLLISTA SYNKATA THREADEJA GLOBAALISTI
  11. //VAAN TÄYTYY JAKAA SUORITUS WORK-GROUPPEIHIN, JA SILLOIN THREADEJEN SYNKKAUS ONNISTUU GROUPIN SISÄLLÄ KÄYTTÄMÄLLÄ BARRIERIA
  12. //---> pitää valita group size (local id ranget) siten, että post processing onnistuu groupin sisällä
  13. //esim:
  14. #define BLOCK_WIDTH 260 //MAXD*2
  15. #define BLOCK_HEIGHT 260 //MAXD*2
  16.  
  17. __kernel void ssd_process(const __global unsigned char *pic_left, const __global unsigned char *pic_right,
  18.                         __global unsigned char *tmp1_disp_picL, __global unsigned char *tmp1_disp_picR,
  19.                         __global unsigned char *tmp2_disp_picL, __global unsigned char *tmp2_disp_picR,
  20.                         __global unsigned char *rslt_disp_picL, __global unsigned char *rslt_disp_picR)
  21. {
  22.     unsigned int min_blocksumL = 100000000;
  23.     unsigned int min_blocksumR = 100000000;
  24.     unsigned int curr_blocksumL = 0;
  25.     unsigned int curr_blocksumR = 0;
  26.     unsigned int ssd_val;
  27.     unsigned int cond;
  28.     unsigned char i, j, d;
  29.     unsigned char best_dL, best_dR;
  30.     unsigned char left, right;
  31.     char diff_true, diff_false;
  32.     int index;
  33.     //size_t idx = get_global_id(0);
  34.     //size_t idy = get_global_id(1);
  35.     size_t idx = get_local_id(0);
  36.     size_t idy = get_local_id(1);
  37.     size_t gx = get_group_id(0);
  38.     size_t gy = get_group_id(1);
  39.  
  40.     for(d=0; d<=MAXD; d++) {
  41.         for(j=0; j<BLOCKY; j++) {
  42.             for(i=0; i<BLOCKX; i++) {
  43.                 index = (idy+j)*WIDTH+idx;
  44.                 //disp0 (left to right)
  45.                 cond = -(idx+i-d > 0); //check that row index is inside limits
  46.                 left = pic_left[index+i];
  47.                 diff_true = left - pic_right[index+i-d];
  48.                 diff_false = left - pic_right[index-idx]; //take first element of pic_right
  49.                 ssd_val = (cond & diff_true*diff_true) | ((~cond) & diff_false*diff_false);
  50.                 curr_blocksumL += ssd_val;
  51.                 //disp1 (right to left)
  52.                 cond = -(idx+i+d < WIDTH); //check that row index is inside limits
  53.                 right = pic_right[index+i];
  54.                 diff_true = right - pic_left[index+i+d];
  55.                 diff_false = right - pic_left[index-idx+WIDTH-1]; //take last element of pic_left
  56.                 ssd_val = (cond & diff_true*diff_true) | ((~cond) & diff_false*diff_false);
  57.                 curr_blocksumR += ssd_val;
  58.             }
  59.         }
  60.         //disp0 (left to right)
  61.         cond = -(curr_blocksumL < min_blocksumL);
  62.         min_blocksumL = (cond & curr_blocksumL) | ((~cond) & min_blocksumL);
  63.         best_dL = (cond & d) | ((~cond) & best_dL);
  64.         curr_blocksumL = 0;
  65.         //disp1 (right to left)
  66.         cond = -(curr_blocksumR < min_blocksumR);
  67.         min_blocksumR = (cond & curr_blocksumR) | ((~cond) & min_blocksumR);
  68.         best_dR = (cond & d) | ((~cond) & best_dR);
  69.         curr_blocksumR = 0;
  70.     }
  71.     index = idy*WIDTH+idx;
  72.     tmp1_disp_picL[index] = best_dL;
  73.     tmp1_disp_picR[index] = best_dR;
  74.     min_blocksumL = 100000000;
  75.     min_blocksumR = 100000000;
  76.  
  77.     //POST PROCESSING
  78.     barrier(CLK_GLOBAL_MEM_FENCE); //all threads inside the group are finished here (all global memory reads/writes are done)
  79.  
  80.     //cross check left to right
  81.     left = tmp1_disp_picL[index];
  82.     //if(index-left >= 0) { //is inside the block
  83.     right = tmp1_disp_picR[index-left];
  84.     cond = -(abs(left - right) > THRESHOLD);
  85.     tmp2_disp_picL[index] = ((cond & 0) | ((~cond) & left))/(float)MAXD*255;
  86.     //}
  87.  
  88.     //cross check right to left
  89.     right = tmp1_disp_picR[index];
  90.     //if(index+right < BLOCK_SIZE) { //is inside the block
  91.     left = tmp1_disp_picL[index+right];
  92.     cond = -(abs(right - left) > THRESHOLD);
  93.     tmp2_disp_picR[index] = ((cond & 0) | ((~cond) & right))/(float)MAXD*255;
  94.     //}
  95.    
  96.     barrier(CLK_GLOBAL_MEM_FENCE); //all threads inside the group are finished here (all global memory reads/writes are done)
  97.  
  98.     //occlusion filling
  99.     int k;
  100.     char c = 0;
  101.    
  102.     //palikkaversio, kokeillaan ensin toimiiko tämä (pitäs tulla sama kuva mitä C-toteutuksella)
  103.     //(tai itteasiassa parempi, ku siinä ei breikattu for-looppia, niin tuloskuva sai aina viimeisimmän non-zero pixelin arvon)
  104.  
  105.     if((tmp2_disp_picL[index] == 0) && (tmp1_disp_picL[index] != 0)) { //if pixel was set to zero in cross-checking
  106.         //first try to find non-zero pixel from earlier pixels
  107.         for(k=idx; k>=0; k--) {
  108.             if(tmp2_disp_picL[index-idx+k] > 0) {
  109.                 //rslt_disp_picL[index] = tmp2_disp_picL[idy*WIDTH+k];
  110.                 c = 1;
  111.                 break;
  112.             }
  113.         }
  114.         //if non-zero pixel was not found from earlier pixels, try to find from next pixels
  115.         if(c == 0) {
  116.             for(k=idx; k<BLOCK_WIDTH; k++) {
  117.                 if(tmp2_disp_picL[index-idx+k] > 0) {
  118.                     //rslt_disp_picL[index] = tmp2_disp_picL[idy*WIDTH+k];
  119.                     break;
  120.                 }
  121.             }
  122.         }
  123.         rslt_disp_picL[index] = tmp2_disp_picL[index-idx+k];
  124.     } else {
  125.         rslt_disp_picL[index] = tmp2_disp_picL[index];
  126.     }      
  127.    
  128.     c = 0;    
  129.     if((tmp2_disp_picR[index] == 0) && (tmp1_disp_picR[index] != 0)) { //if pixel was set to zero in cross-checking
  130.         //first try to find non-zero pixel from earlier pixels
  131.         for(k=idx; k>=0; k--) {
  132.             if(tmp2_disp_picR[index-idx+k] > 0) {
  133.                 //rslt_disp_picR[index] = tmp2_disp_picR[idy*WIDTH+k];
  134.                 c = 1;
  135.                 break;
  136.             }
  137.         }
  138.         //if non-zero pixel was not found from earlier pixels, try to find from next pixels
  139.         if(c == 0) {
  140.             for(k=idx; k< BLOCK_WIDTH; k++) {
  141.                 if(tmp2_disp_picR[index-idx+k] > 0) {
  142.                     //rslt_disp_picR[index] = tmp2_disp_picR[idy*WIDTH+k];
  143.                     break:
  144.                 }
  145.             }
  146.         }
  147.         rslt_disp_picR[index] = tmp2_disp_picR[index-idx+k];
  148.     } else {
  149.         rslt_disp_picR[index] = tmp2_disp_picR[index];
  150.     }                
  151. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement