Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

FFT 2D OpenCL host code

By: Tiana9875 on Nov 27th, 2012  |  syntax: C++  |  size: 8.20 KB  |  views: 41  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. #include <stdio.h>    
  2. #include <stdlib.h>  
  3. #include <math.h>
  4.      
  5. #ifdef __APPLE__    
  6. #include <OpenCL/opencl.h>    
  7. #else  
  8. #include <CL/cl.h>    
  9. #endif  
  10.      
  11. #include "pgm.h"    
  12.      
  13. #define PI 3.14159265358979
  14.      
  15. #define MAX_SOURCE_SIZE (0x100000)  
  16.      
  17. #define AMP(a, b) (sqrt((a)*(a)+(b)*(b)))  
  18.      
  19. cl_device_id device_id = NULL;  
  20. cl_context context = NULL;  
  21. cl_command_queue queue = NULL;  
  22. cl_program program = NULL;  
  23.      
  24. enum Mode {
  25. forward = 0,    
  26. inverse = 1
  27. };  
  28.      
  29. int setWorkSize(size_t* gws, size_t* lws, cl_int x, cl_int y)  
  30. {  
  31. switch(y) {
  32. case 1:
  33. gws[0] = x;
  34. gws[1] = 1;
  35. lws[0] = 1;
  36. lws[1] = 1;
  37. break;  
  38. default:    
  39. gws[0] = x;
  40. gws[1] = y;
  41. lws[0] = 1;
  42. lws[1] = 1;
  43. break;  
  44. }  
  45.      
  46. return 0;  
  47. }  
  48.      
  49. int fftCore(cl_mem dst, cl_mem src, cl_mem spin, cl_int m, enum Mode direction)
  50. {  
  51. cl_int ret;
  52.      
  53. cl_int iter;    
  54. cl_uint flag;  
  55.      
  56. cl_int n = 1<<m;  
  57.      
  58. cl_event kernelDone;    
  59.      
  60. cl_kernel brev = NULL;  
  61. cl_kernel bfly = NULL;  
  62. cl_kernel norm = NULL;  
  63.      
  64. brev = clCreateKernel(program, "bitReverse", &ret);
  65. bfly = clCreateKernel(program, "butterfly", &ret);  
  66. norm = clCreateKernel(program, "norm", &ret);  
  67.      
  68. size_t gws[2];  
  69. size_t lws[2];  
  70.      
  71. switch (direction) {    
  72. case forward:flag = 0x00000000; break;  
  73. case inverse:flag = 0x80000000; break;  
  74. }  
  75.      
  76. ret = clSetKernelArg(brev, 0, sizeof(cl_mem), (void *)&dst);    
  77. ret = clSetKernelArg(brev, 1, sizeof(cl_mem), (void *)&src);    
  78. ret = clSetKernelArg(brev, 2, sizeof(cl_int), (void *)&m);  
  79. ret = clSetKernelArg(brev, 3, sizeof(cl_int), (void *)&n);  
  80.      
  81. ret = clSetKernelArg(bfly, 0, sizeof(cl_mem), (void *)&dst);    
  82. ret = clSetKernelArg(bfly, 1, sizeof(cl_mem), (void *)&spin);  
  83. ret = clSetKernelArg(bfly, 2, sizeof(cl_int), (void *)&m);  
  84. ret = clSetKernelArg(bfly, 3, sizeof(cl_int), (void *)&n);  
  85. ret = clSetKernelArg(bfly, 5, sizeof(cl_uint), (void *)&flag);  
  86.      
  87. ret = clSetKernelArg(norm, 0, sizeof(cl_mem), (void *)&dst);    
  88. ret = clSetKernelArg(norm, 1, sizeof(cl_int), (void *)&n);  
  89.      
  90. /* Reverse bit ordering */
  91. setWorkSize(gws, lws, n, n);    
  92. ret = clEnqueueNDRangeKernel(queue, brev, 2, NULL, gws, lws, 0, NULL, NULL);    
  93.      
  94. /* Perform Butterfly Operations*/  
  95. setWorkSize(gws, lws, n/2, n);  
  96. for (iter=1; iter <= m; iter++){
  97. ret = clSetKernelArg(bfly, 4, sizeof(cl_int), (void *)&iter);  
  98. ret = clEnqueueNDRangeKernel(queue, bfly, 2, NULL, gws, lws, 0, NULL, &kernelDone);
  99. ret = clWaitForEvents(1, &kernelDone);  
  100. }  
  101.      
  102. if (direction == inverse) {
  103. setWorkSize(gws, lws, n, n);    
  104. ret = clEnqueueNDRangeKernel(queue, norm, 2, NULL, gws, lws, 0, NULL, &kernelDone);
  105. ret = clWaitForEvents(1, &kernelDone);  
  106. }  
  107.      
  108. ret = clReleaseKernel(bfly);    
  109. ret = clReleaseKernel(brev);    
  110. ret = clReleaseKernel(norm);    
  111.      
  112. return 0;  
  113. }  
  114.      
  115. int main()  
  116. {  
  117. cl_mem xmobj = NULL;    
  118. cl_mem rmobj = NULL;    
  119. cl_mem wmobj = NULL;    
  120. cl_kernel sfac = NULL;  
  121. cl_kernel trns = NULL;  
  122. cl_kernel hpfl = NULL;  
  123.      
  124. cl_platform_id platform_id = NULL;  
  125.      
  126. cl_uint ret_num_devices;    
  127. cl_uint ret_num_platforms;  
  128.      
  129. cl_int ret;
  130.      
  131. cl_float2 *xm;  
  132. cl_float2 *rm;  
  133. cl_float2 *wm;  
  134.      
  135. pgm_t ipgm;
  136. pgm_t opgm;
  137.      
  138. FILE *fp;  
  139. const char fileName[] = "./fft.cl";
  140. size_t source_size;
  141. char *source_str;  
  142. cl_int i, j;    
  143. cl_int n;  
  144. cl_int m;  
  145.      
  146. size_t gws[2];  
  147. size_t lws[2];  
  148.      
  149. /* Load kernel source code */  
  150. fp = fopen(fileName, "r");  
  151. if (!fp) {  
  152. fprintf(stderr, "Failed to load kernel.\n");    
  153. exit(1);    
  154. }  
  155. source_str = (char *)malloc(MAX_SOURCE_SIZE);  
  156. source_size = fread(source_str, 1, MAX_SOURCE_SIZE, fp);    
  157. fclose( fp );  
  158.      
  159. /* Read image */  
  160. readPGM(&ipgm, "lena.pgm");
  161.      
  162. n = ipgm.width;
  163. m = (cl_int)(log((double)n)/log(2.0));  
  164.      
  165. xm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));    
  166. rm = (cl_float2 *)malloc(n * n * sizeof(cl_float2));    
  167. wm = (cl_float2 *)malloc(n / 2 * sizeof(cl_float2));    
  168.      
  169. for (i=0; i < n; i++) {  
  170. for (j=0; j < n; j++) {  
  171. ((float*)xm)[(2*n*j)+2*i+0] = (float)ipgm.buf[n*j+i];  
  172. ((float*)xm)[(2*n*j)+2*i+1] = (float)0;
  173. }  
  174. }  
  175.      
  176. /* Get platform/device  */
  177. ret = clGetPlatformIDs(1, &platform_id, &ret_num_platforms);    
  178. ret = clGetDeviceIDs( platform_id, CL_DEVICE_TYPE_DEFAULT, 1, &device_id, &ret_num_devices);    
  179.      
  180. /* Create OpenCL context */
  181. context = clCreateContext(NULL, 1, &device_id, NULL, NULL, &ret);  
  182.      
  183. /* Create Command queue */
  184. queue = clCreateCommandQueue(context, device_id, 0, &ret);  
  185.      
  186. /* Create Buffer Objects */
  187. xmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret);  
  188. rmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, n*n*sizeof(cl_float2), NULL, &ret);  
  189. wmobj = clCreateBuffer(context, CL_MEM_READ_WRITE, (n/2)*sizeof(cl_float2), NULL, &ret);    
  190.      
  191. /* Transfer data to memory buffer */  
  192. ret = clEnqueueWriteBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL);
  193.      
  194. /* Create kernel program from source */
  195. program = clCreateProgramWithSource(context, 1, (const char **)&source_str, (const size_t *)&source_size, &ret);    
  196.      
  197. /* Build kernel program */
  198. ret = clBuildProgram(program, 1, &device_id, NULL, NULL, NULL);
  199.      
  200. /* Create OpenCL Kernel */
  201. sfac = clCreateKernel(program, "spinFact", &ret);  
  202. trns = clCreateKernel(program, "transpose", &ret);  
  203. hpfl = clCreateKernel(program, "highPassFilter", &ret);
  204.      
  205. /* Create spin factor */  
  206. ret = clSetKernelArg(sfac, 0, sizeof(cl_mem), (void *)&wmobj);  
  207. ret = clSetKernelArg(sfac, 1, sizeof(cl_int), (void *)&n);  
  208. setWorkSize(gws, lws, n/2, 1);  
  209. ret = clEnqueueNDRangeKernel(queue, sfac, 1, NULL, gws, lws, 0, NULL, NULL);    
  210.      
  211. /* Butterfly Operation */  
  212. fftCore(rmobj, xmobj, wmobj, m, forward);  
  213.      
  214. /* Transpose matrix */
  215. ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&xmobj);  
  216. ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&rmobj);  
  217. ret = clSetKernelArg(trns, 2, sizeof(cl_int), (void *)&n);  
  218. setWorkSize(gws, lws, n, n);    
  219. ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL);    
  220.      
  221. /* Butterfly Operation */  
  222. fftCore(rmobj, xmobj, wmobj, m, forward);  
  223.      
  224. /* Apply high-pass filter */  
  225. cl_int radius = n/8;    
  226. ret = clSetKernelArg(hpfl, 0, sizeof(cl_mem), (void *)&rmobj);  
  227. ret = clSetKernelArg(hpfl, 1, sizeof(cl_int), (void *)&n);  
  228. ret = clSetKernelArg(hpfl, 2, sizeof(cl_int), (void *)&radius);
  229. setWorkSize(gws, lws, n, n);    
  230. ret = clEnqueueNDRangeKernel(queue, hpfl, 2, NULL, gws, lws, 0, NULL, NULL);    
  231.      
  232. /* Inverse FFT */  
  233.      
  234. /* Butterfly Operation */  
  235. fftCore(xmobj, rmobj, wmobj, m, inverse);  
  236.      
  237. /* Transpose matrix */
  238. ret = clSetKernelArg(trns, 0, sizeof(cl_mem), (void *)&rmobj);  
  239. ret = clSetKernelArg(trns, 1, sizeof(cl_mem), (void *)&xmobj);  
  240. setWorkSize(gws, lws, n, n);    
  241. ret = clEnqueueNDRangeKernel(queue, trns, 2, NULL, gws, lws, 0, NULL, NULL);    
  242.      
  243. /* Butterfly Operation */  
  244. fftCore(xmobj, rmobj, wmobj, m, inverse);  
  245.      
  246. /* Read data from memory buffer */
  247. ret = clEnqueueReadBuffer(queue, xmobj, CL_TRUE, 0, n*n*sizeof(cl_float2), xm, 0, NULL, NULL);  
  248.      
  249. /*  */
  250. float* ampd;    
  251. ampd = (float*)malloc(n*n*sizeof(float));  
  252. for (i=0; i < n; i++) {  
  253. for (j=0; j < n; j++) {  
  254. ampd[n*((i))+((j))] = (AMP(((float*)xm)[(2*n*i)+2*j], ((float*)xm)[(2*n*i)+2*j+1]));    
  255. }  
  256. }  
  257. opgm.width = n;
  258. opgm.height = n;    
  259. normalizeF2PGM(&opgm, ampd);    
  260. free(ampd);
  261.      
  262. /* Write out image */  
  263. writePGM(&opgm, "output.pgm");  
  264.      
  265. /* Finalizations*/
  266. ret = clFlush(queue);  
  267. ret = clFinish(queue);  
  268. ret = clReleaseKernel(hpfl);    
  269. ret = clReleaseKernel(trns);    
  270. ret = clReleaseKernel(sfac);    
  271. ret = clReleaseProgram(program);    
  272. ret = clReleaseMemObject(xmobj);    
  273. ret = clReleaseMemObject(rmobj);    
  274. ret = clReleaseMemObject(wmobj);    
  275. ret = clReleaseCommandQueue(queue);
  276. ret = clReleaseContext(context);    
  277.      
  278. destroyPGM(&ipgm);  
  279. destroyPGM(&opgm);  
  280.      
  281. free(source_str);  
  282. free(wm);  
  283. free(rm);  
  284. free(xm);  
  285.      
  286. return 0;  
  287. }