Advertisement
Tiana9875

FFT 2D OpenCL Kernel

Nov 27th, 2012
288
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C 2.97 KB | None | 0 0
  1. #define PI 3.14159265358979323846  
  2. #define PI_2 1.57079632679489661923
  3.      
  4. __kernel void spinFact(__global float2* w, int n)  
  5. {  
  6. unsigned int i = get_global_id(0);  
  7.      
  8. float2 angle = (float2)(2*i*PI/(float)n,(2*i*PI/(float)n)+PI_2);    
  9. w[i] = cos(angle);  
  10. }  
  11.      
  12. __kernel void bitReverse(__global float2 *dst, __global float2 *src, int m, int n)  
  13. {  
  14. unsigned int gid = get_global_id(0);    
  15. unsigned int nid = get_global_id(1);    
  16.      
  17. unsigned int j = gid;  
  18. j = (j & 0x55555555) << 1 | (j & 0xAAAAAAAA) >> 1;  
  19. j = (j & 0x33333333) << 2 | (j & 0xCCCCCCCC) >> 2;  
  20. j = (j & 0x0F0F0F0F) << 4 | (j & 0xF0F0F0F0) >> 4;  
  21. j = (j & 0x00FF00FF) << 8 | (j & 0xFF00FF00) >> 8;  
  22. j = (j & 0x0000FFFF) << 16 | (j & 0xFFFF0000) >> 16;    
  23.      
  24. j >>= (32-m);
  25.      
  26. dst[nid*n+j] = src[nid*n+gid];  
  27. }  
  28.      
  29. __kernel void norm(__global float2 *x, int n)  
  30. {  
  31. unsigned int gid = get_global_id(0);    
  32. unsigned int nid = get_global_id(1);    
  33.      
  34. x[nid*n+gid] = x[nid*n+gid] / (float2)((float)n, (float)n);
  35. }  
  36.      
  37. __kernel void butterfly(__global float2 *x, __global float2* w, int m, int n, int iter, uint flag)  
  38. {  
  39. unsigned int gid = get_global_id(0);    
  40. unsigned int nid = get_global_id(1);    
  41.      
  42. int butterflySize = 1 << (iter-1);    
  43. int butterflyGrpDist = 1 << iter;
  44. int butterflyGrpNum = n >> iter;  
  45. int butterflyGrpBase = (gid >> (iter-1))*(butterflyGrpDist);  
  46. int butterflyGrpOffset = gid & (butterflySize-1);  
  47.      
  48. int a = nid * n + butterflyGrpBase + butterflyGrpOffset;    
  49. int b = a + butterflySize;  
  50.      
  51. int l = butterflyGrpNum * butterflyGrpOffset;  
  52.      
  53. float2 xa, xb, xbxx, xbyy, wab, wayx, wbyx, resa, resb;
  54.      
  55. xa = x[a];  
  56. xb = x[b];  
  57. xbxx = xb.xx;  
  58. xbyy = xb.yy;  
  59.      
  60. wab = as_float2(as_uint2(w[l]) ^ (uint2)(0x0, flag));  
  61. wayx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x80000000, 0x0));  
  62. wbyx = as_float2(as_uint2(wab.yx) ^ (uint2)(0x0, 0x80000000));  
  63.      
  64. resa = xa + xbxx*wab + xbyy*wayx;  
  65. resb = xa - xbxx*wab + xbyy*wbyx;  
  66.      
  67. x[a] = resa;    
  68. x[b] = resb;    
  69. }  
  70.      
  71. __kernel void transpose(__global float2 *dst, __global float2* src, int n)  
  72. {  
  73. unsigned int xgid = get_global_id(0);  
  74. unsigned int ygid = get_global_id(1);  
  75.      
  76. unsigned int iid = ygid * n + xgid;
  77. unsigned int oid = xgid * n + ygid;
  78.      
  79. dst[oid] = src[iid];    
  80. }  
  81.      
  82. __kernel void highPassFilter(__global float2* image, int n, int radius)
  83. {  
  84. unsigned int xgid = get_global_id(0);  
  85. unsigned int ygid = get_global_id(1);  
  86.      
  87. int2 n_2 = (int2)(n>>1, n>>1);  
  88. int2 mask = (int2)(n-1, n-1);  
  89.      
  90. int2 gid = ((int2)(xgid, ygid) + n_2) & mask;  
  91.      
  92. int2 diff = n_2 - gid;  
  93. int2 diff2 = diff * diff;  
  94. int dist2 = diff2.x + diff2.y;  
  95.      
  96. int2 window;    
  97.      
  98. if (dist2 < radius*radius) {
  99. window = (int2)(0L, 0L);    
  100. } else {    
  101. window = (int2)(-1L, -1L);  
  102. }  
  103.  
  104. image[ygid*n+xgid] = as_float2(as_int2(image[ygid*n+xgid]) & window);  
  105. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement