Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /* THIS IS ALL JUST HACKED TOGETHER. IM NOT TRYING TO BE PERFECT. */
- // BEGIN setup.h
- #include "stdafx.h"
- #include <stdio.h>
- #include <stdlib.h>
- #include <CL/cl.h>
- #include <iostream>
- #include <string>
- using namespace std;
- void setup(cl_context &context, cl_program &program, cl_kernel &kernel, cl_command_queue &cmdQueue, const char *kernelName, const char *source, bool diagnostics);
- // BEGIN setup.cpp
- #include "stdafx.h"
- #include <stdio.h>
- #include <stdlib.h>
- #include <CL/cl.h>
- #include <iostream>
- #include <string>
- using namespace std;
- void setup(cl_context &context, cl_program &program, cl_kernel &kernel, cl_command_queue &cmdQueue, const char *kernelName, const char *source, bool diagnostics)
- {
- cl_int status;
- cl_uint numPlatforms = 0;
- status = clGetPlatformIDs(0, NULL, &numPlatforms);
- cl_platform_id *platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));
- status = clGetPlatformIDs(numPlatforms, platforms, NULL);
- cout << "clGetPlatformIDs: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
- char *profile = NULL;
- size_t pBits;
- status = clGetPlatformInfo(platforms[0], CL_PLATFORM_PROFILE, NULL, profile, &pBits);
- profile = (char*)malloc(pBits);
- status = clGetPlatformInfo(platforms[0], CL_PLATFORM_PROFILE,pBits, profile, NULL);
- cout << "clGetPlatformInfo: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
- cl_uint numDevices;
- status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);
- cl_device_id *devices;
- devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id));
- status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices, NULL);
- cout << "clGetDeviceIDs: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
- context = clCreateContext(NULL, numDevices, devices, NULL, NULL, &status);
- cout << "clCreateContext: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
- cmdQueue = clCreateCommandQueue(context, devices[0], 0, &status);
- cout << "clCreateCommandQueue: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
- program = clCreateProgramWithSource(context, 1, (const char**)&source, NULL, &status);
- cout << "clCreateProgramWithSource: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
- status = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL);
- cout << "clBuildProgram: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
- kernel = clCreateKernel(program, kernelName, &status);
- cout << "clCreateKernel: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
- }
- // BEGIN main.cpp
- #include "stdafx.h"
- #include <stdio.h>
- #include <stdlib.h>
- #include <CL/cl.h>
- #include <iostream>
- #include <string>
- #include "setup.h"
- using namespace std;
- const char * programSource = "__kernel void simpleMultiply(__global float* outputC, "
- "int widthA, "
- "int heightA, "
- "int widthB, "
- "int heightB, "
- "__global float * inputA, "
- "__global float * inputB)"
- "{"
- "int row = get_global_id(1);"
- "int col = get_global_id(0);"
- "float sum = 0.0f;"
- "for(int i = 0; i < widthA; i++)"
- "{"
- "sum += inputA[row*widthA+i] * inputB[i*widthB+col];"
- "}"
- "outputC[row*widthB+col] = sum;"
- "}";
- cl_int status;
- cl_context context;
- cl_program program;
- cl_kernel kernel;
- cl_command_queue cmdQueue;
- const int wA = 16; // must be smaller than hA
- const int hA = wA;
- const int wB = hA;
- const int hB = wA;
- const int wC = wA;
- const int hC = wA;
- float * A[hA];
- float * B[wA];
- float * C[wA];
- cl_mem bufferA;
- cl_mem bufferB;
- cl_mem bufferC;
- #define BEGIN setup(context, program, kernel, cmdQueue, "simpleMultiply", programSource, true);
- #define END int exit; cout << "\n\n\n-->"; cin >> exit; return 0;
- #define ARRAYS cout << endl; setupArrays(true);
- #define BUFFERS cout << endl; setupBuffers(true);
- void setupArrays(bool diagnostics);
- void setupBuffers(bool diagnostics);
- int main()
- {
- BEGIN
- ARRAYS
- BUFFERS
- clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&bufferC);
- clSetKernelArg(kernel, 1, sizeof(cl_int), (void*)&wA);
- clSetKernelArg(kernel, 2, sizeof(cl_int), (void*)&hA);
- clSetKernelArg(kernel, 3, sizeof(cl_int), (void*)&wB);
- clSetKernelArg(kernel, 4, sizeof(cl_int), (void*)&hB);
- clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*)&bufferA);
- clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*)&bufferB);
- size_t localws[2] = {16, 16};
- size_t globalws[2] = {wC, wC};
- status = clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL, globalws, localws, 0, NULL, NULL);
- cout << "\nclEnqueueNDRangeKernel: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL");
- status = clEnqueueReadBuffer(cmdQueue, bufferC, CL_TRUE, 0, wC*hC*sizeof(float), (void*)C, 0, NULL, NULL);
- cout << "\nclEnqueueReadBuffer: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL");
- cout << "\nC[0][0]: " << C[0][0];
- END }
- void setupArrays(bool diagnostics)
- {
- for(int i = 0; i < hA; i++)
- {
- A[i] = new float[wA];
- for(int i2 = 0; i2 < wA; i2++)
- A[i][i2] = 1;
- }
- for(int i = 0; i < wA; i++)
- {
- B[i] = new float[hA];
- for(int i2 = 0; i2 < hA; i2++)
- B[i][i2] = 2;
- }
- for(int i = 0; i < hA; i++)
- {
- C[i] = new float[hA];
- for(int i2 = 0; i2 < hA; i2++)
- C[i][i2] = 0;
- }
- if(diagnostics)
- {
- for(int i = 0; i < hA; i++)
- {
- for(int i2 = 0; i2 < wA; i2++)
- cout << A[i][i2] << " ";
- cout << endl;
- }
- cout << endl;
- for(int i = 0; i < wA; i++)
- {
- for(int i2 = 0; i2 < hA; i2++)
- cout << B[i][i2] << " ";
- cout << endl;
- }
- cout << endl;
- for(int i = 0; i < hA; i++)
- {
- for(int i2 = 0; i2 < hA; i2++)
- cout << C[i][i2] << " ";
- cout << endl;
- }
- cout << endl;
- }
- }
- void setupBuffers(bool diagnostics)
- {
- int everFailed = 0;
- bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, wA*hA*sizeof(float), NULL, &status);
- everFailed = status == CL_SUCCESS ? everFailed : 1;
- status = clEnqueueWriteBuffer(cmdQueue, bufferA, CL_TRUE, 0, wA*hA*sizeof(float), (void*)A, 0, NULL, NULL);
- everFailed = status == CL_SUCCESS ? everFailed : 1;
- bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, wB*hB*sizeof(float), NULL, &status);
- everFailed = status == CL_SUCCESS ? everFailed : 1;
- status = clEnqueueWriteBuffer(cmdQueue, bufferB, CL_TRUE, 0, wB*hB*sizeof(float), (void*)B, 0, NULL, NULL);
- everFailed = status == CL_SUCCESS ? everFailed : 1;
- bufferC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, hA*hA*sizeof(float), NULL, &status);
- if(diagnostics)
- cout << "SETUP BUFFERS: " << (everFailed == 0 ? "SUCCESS" : "FAIL");
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement