Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <iostream>
- #include <cmath>
- #include <chrono>
- #include <algorithm>
- #include <complex>
- #include <vector>
- #include <clFFT.h>
- #include <algorithm>
- #include <fstream>
- #include <memory>
- float toAmpl(std::complex<float> val)
- {
- return sqrt(val.real()*val.real()+val.imag()*val.imag());
- }
- const size_t WINDOW_SIZE = 256;
- const size_t MAX_ARRAY_LENGTH = 8192;
- const size_t FFTS_COUNT = MAX_ARRAY_LENGTH-WINDOW_SIZE;
- /* OpenCL variables */
- cl_int err;
- cl_platform_id platform = 0;
- cl_device_id device = 0;
- cl_context_properties props[3] = {CL_CONTEXT_PLATFORM, 0, 0};
- cl_context ctx = 0;
- cl_command_queue queue = 0;
- cl_mem data_buffer = 0;
- cl_mem out_buffer = 0;
- cl_mem temp_buffer = 0;
- cl_mem inplace_buffers[FFTS_COUNT];
- cl_mem out_buffers[FFTS_COUNT];
- cl_mem temp_buffers[FFTS_COUNT];
- _cl_buffer_region inplace_regions[FFTS_COUNT];
- _cl_buffer_region out_regions[FFTS_COUNT];
- _cl_buffer_region temp_regions[FFTS_COUNT];
- /* clFFT variables */
- int status = 0;
- int ret = 0;
- size_t tmpBufferSize;
- size_t N;
- clfftPlanHandle planHandle;
- clfftDim dim = CLFFT_1D;
- size_t clLengths[1];
- std::vector<std::complex<float>> generate(size_t size)
- {
- std::vector<std::complex<float>> buffer;
- buffer.reserve(size);
- for(size_t i = 0; i<size; i++)
- {
- double x = 2*M_PI*0.125*i;
- buffer.push_back(std::complex<float>(0.5*cos(x), 0.5*sin(x)));
- }
- return buffer;
- }
- void OCLFFTInit()
- {
- srand(time(0));
- /* OpenCL init */
- err = clGetPlatformIDs(1, &platform, NULL);
- err = clGetDeviceIDs(platform, CL_DEVICE_TYPE_GPU, 1, &device, NULL);
- cl_ulong res;
- clGetDeviceInfo(device, CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(cl_ulong), &res, NULL);
- props[1] = (cl_context_properties) platform;
- ctx = clCreateContext(props, 1, &device, NULL, NULL, &err);
- queue = clCreateCommandQueue(ctx, device, 0, &err);
- /* clFFT init */
- clfftSetupData fftSetup;
- err = clfftInitSetupData(&fftSetup);
- err = clfftSetup(&fftSetup);
- /* Plan init */
- clLengths[0] = WINDOW_SIZE;
- err = clfftCreateDefaultPlan(&planHandle, ctx, dim, clLengths);
- err = clfftSetPlanPrecision(planHandle, CLFFT_SINGLE);
- err = clfftSetLayout(planHandle, CLFFT_COMPLEX_INTERLEAVED, CLFFT_COMPLEX_INTERLEAVED);
- err = clfftSetPlanBatchSize(planHandle, 1000);
- err = clfftSetResultLocation(planHandle, CLFFT_OUTOFPLACE);
- err = clfftBakePlan(planHandle, 1, &queue, NULL, NULL);
- /* Buffers initialization */
- clfftGetTmpBufSize(planHandle, &tmpBufferSize);
- temp_buffer = tmpBufferSize != 0 ? clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(std::complex<float>)*FFTS_COUNT*tmpBufferSize, 0, &err) : 0;
- data_buffer = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(std::complex<float>)*MAX_ARRAY_LENGTH, 0, &err);
- out_buffer = clCreateBuffer(ctx, CL_MEM_READ_WRITE, sizeof(std::complex<float>)*FFTS_COUNT*WINDOW_SIZE, 0, &err);
- /* Subbuffers initialization */
- for(size_t i=0; i<FFTS_COUNT; i++)
- {
- inplace_regions[i].origin = i*sizeof(std::complex<float>);
- inplace_regions[i].size = WINDOW_SIZE*sizeof(std::complex<float>);
- inplace_buffers[i] = clCreateSubBuffer(data_buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &inplace_regions[i], &err);
- out_regions[i].origin = i*WINDOW_SIZE*sizeof(std::complex<float>);
- out_regions[i].size = WINDOW_SIZE*sizeof(std::complex<float>);
- out_buffers[i] = clCreateSubBuffer(out_buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &out_regions[i], &err);
- if(tmpBufferSize != 0)
- {
- temp_regions[i].origin = tmpBufferSize*i;
- temp_regions[i].size = tmpBufferSize;
- temp_buffers[i] = clCreateSubBuffer(temp_buffer, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &temp_regions[i], &err);
- }
- }
- }
- std::vector<std::complex<float>> time_freq_distribution(std::vector<std::complex<float>> time_domain)
- {
- err = clEnqueueWriteBuffer(queue, data_buffer, CL_TRUE, 0, time_domain.size()*sizeof(std::complex<float>), time_domain.data(), 0, 0, 0);
- for(size_t i=0; i<time_domain.size()-WINDOW_SIZE; i++)
- {
- clfftEnqueueTransform (planHandle, CLFFT_FORWARD, 1, &queue, 0, NULL, NULL, &inplace_buffers[i], &out_buffers[i], tmpBufferSize != 0 ? temp_buffers[i] : 0);
- }
- clFinish(queue);
- std::vector<std::complex<float>> out;
- out.resize(time_domain.size()*WINDOW_SIZE);
- err = clEnqueueReadBuffer(queue, out_buffer, CL_TRUE, 0, time_domain.size()*WINDOW_SIZE*sizeof(std::complex<float>), out.data(), 0, 0, 0);
- switch (err)
- {
- case CL_INVALID_COMMAND_QUEUE: std::cout << "queue" << std::endl; break;
- case CL_INVALID_CONTEXT: std::cout << "context" << std::endl; break;
- case CL_INVALID_MEM_OBJECT: std::cout << "mem object" << std::endl; break;
- case CL_INVALID_VALUE: std::cout << "value" << std::endl; break;
- case CL_INVALID_EVENT_WAIT_LIST: std::cout << "event wait list" << std::endl; break;
- case CL_MEM_OBJECT_ALLOCATION_FAILURE: std::cout << "object allocation failure" << std::endl; break;
- case CL_OUT_OF_HOST_MEMORY: std::cout << "OOM" << std::endl; break;
- case CL_SUCCESS: std::cout << "success" << std::endl; break;
- default: std::cout << "undefined" << err << std::endl; break;
- }
- return out;
- }
- int main (void)
- {
- OCLFFTInit();
- std::cout << "clFFT has been initialized." << std::endl;
- std::vector<std::complex<float>> freq_time_domain;
- for(int i=0; i<10; i++)
- {
- auto time_domain = generate(4096);
- auto start = std::chrono::high_resolution_clock::now();
- freq_time_domain = time_freq_distribution(time_domain);
- auto end = std::chrono::high_resolution_clock::now();
- std::cout << "Test(cl): "<< i << ". Time elapsed: " << std::chrono::duration_cast<std::chrono::milliseconds>(end-start).count() << *freq_time_domain.end() << std::endl;
- }
- clfftTeardown();
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement