Advertisement
Guest User

Untitled

a guest
Jul 1st, 2013
197
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
C++ 6.61 KB | None | 0 0
  1. /* THIS IS ALL JUST HACKED TOGETHER. IM NOT TRYING TO BE PERFECT. */
  2.  
  3. // BEGIN setup.h
  4.  
  5. #include "stdafx.h"
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <CL/cl.h>
  9.  
  10. #include <iostream>
  11. #include <string>
  12.  
  13. using namespace std;
  14. void setup(cl_context &context, cl_program &program, cl_kernel &kernel, cl_command_queue &cmdQueue, const char *kernelName, const char *source, bool diagnostics);
  15.  
  16.  
  17.  
  18.  
  19.  
  20.  
  21.  
  22.  
  23.  
  24.  
  25.  
  26. // BEGIN setup.cpp
  27.  
  28.  
  29. #include "stdafx.h"
  30. #include <stdio.h>
  31. #include <stdlib.h>
  32. #include <CL/cl.h>
  33. #include <iostream>
  34. #include <string>
  35.  
  36.  
  37. using namespace std;
  38. void setup(cl_context &context, cl_program &program, cl_kernel &kernel, cl_command_queue &cmdQueue, const char *kernelName, const char *source, bool diagnostics)
  39. {
  40.     cl_int status;
  41.  
  42.     cl_uint numPlatforms = 0;
  43.     status = clGetPlatformIDs(0, NULL, &numPlatforms);
  44.     cl_platform_id  *platforms = (cl_platform_id*)malloc(numPlatforms*sizeof(cl_platform_id));
  45.     status = clGetPlatformIDs(numPlatforms, platforms, NULL);
  46.     cout << "clGetPlatformIDs: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
  47.  
  48.     char *profile = NULL;
  49.     size_t pBits;
  50.     status = clGetPlatformInfo(platforms[0], CL_PLATFORM_PROFILE, NULL, profile, &pBits);
  51.     profile = (char*)malloc(pBits);
  52.     status = clGetPlatformInfo(platforms[0], CL_PLATFORM_PROFILE,pBits, profile, NULL);
  53.     cout << "clGetPlatformInfo: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
  54.  
  55.     cl_uint numDevices;
  56.     status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, 0, NULL, &numDevices);
  57.     cl_device_id *devices;
  58.     devices = (cl_device_id*)malloc(numDevices*sizeof(cl_device_id));
  59.     status = clGetDeviceIDs(platforms[0], CL_DEVICE_TYPE_ALL, numDevices, devices,  NULL);
  60.     cout << "clGetDeviceIDs: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
  61.  
  62.  
  63.     context = clCreateContext(NULL, numDevices, devices, NULL, NULL, &status);
  64.     cout << "clCreateContext: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
  65.    
  66.     cmdQueue = clCreateCommandQueue(context, devices[0], 0, &status);
  67.     cout << "clCreateCommandQueue: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
  68.  
  69.     program = clCreateProgramWithSource(context, 1, (const char**)&source, NULL, &status);
  70.     cout << "clCreateProgramWithSource: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
  71.    
  72.     status = clBuildProgram(program, numDevices, devices, NULL, NULL, NULL);
  73.     cout << "clBuildProgram: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
  74.  
  75.     kernel = clCreateKernel(program, kernelName, &status);
  76.     cout << "clCreateKernel: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL") << endl;
  77.  
  78. }
  79.  
  80.  
  81.  
  82.  
  83.  
  84.  
  85.  
  86.  
  87.  
  88.  
  89. // BEGIN main.cpp
  90.  
  91. #include "stdafx.h"
  92. #include <stdio.h>
  93. #include <stdlib.h>
  94. #include <CL/cl.h>
  95. #include <iostream>
  96. #include <string>
  97. #include "setup.h"
  98. using namespace std;
  99.  
  100. const char * programSource = "__kernel void simpleMultiply(__global float* outputC, "
  101.                             "int widthA, "
  102.                             "int heightA, "
  103.                             "int widthB, "
  104.                             "int heightB, "
  105.                             "__global float * inputA, "
  106.                             "__global float * inputB)"
  107.                             "{"
  108.                                 "int row = get_global_id(1);"
  109.                                 "int col = get_global_id(0);"
  110.                                 "float sum = 0.0f;"
  111.                                 "for(int i = 0; i < widthA; i++)"
  112.                                 "{"
  113.                                     "sum += inputA[row*widthA+i] * inputB[i*widthB+col];"
  114.                                 "}"
  115.                                 "outputC[row*widthB+col] = sum;"
  116.                             "}";
  117.  
  118. cl_int status;
  119. cl_context context;
  120. cl_program program;
  121. cl_kernel kernel;
  122. cl_command_queue cmdQueue;
  123. const int wA = 16; // must be smaller than hA
  124. const int hA = wA;
  125. const int wB = hA;
  126. const int hB = wA;
  127. const int wC = wA;
  128. const int hC = wA;
  129. float * A[hA];
  130. float * B[wA];
  131. float * C[wA];
  132. cl_mem bufferA;
  133. cl_mem bufferB;
  134. cl_mem bufferC;
  135. #define BEGIN setup(context, program, kernel, cmdQueue,  "simpleMultiply", programSource, true);
  136. #define END int exit; cout << "\n\n\n-->"; cin >> exit; return 0;
  137. #define ARRAYS cout << endl; setupArrays(true);
  138. #define BUFFERS cout << endl; setupBuffers(true);
  139. void setupArrays(bool diagnostics);
  140. void setupBuffers(bool diagnostics);
  141. int main()
  142. {
  143.     BEGIN
  144.     ARRAYS
  145.     BUFFERS
  146.     clSetKernelArg(kernel, 0, sizeof(cl_mem), (void*)&bufferC);
  147.     clSetKernelArg(kernel, 1, sizeof(cl_int), (void*)&wA);
  148.     clSetKernelArg(kernel, 2, sizeof(cl_int), (void*)&hA);
  149.     clSetKernelArg(kernel, 3, sizeof(cl_int), (void*)&wB);
  150.     clSetKernelArg(kernel, 4, sizeof(cl_int), (void*)&hB);
  151.     clSetKernelArg(kernel, 5, sizeof(cl_mem), (void*)&bufferA);
  152.     clSetKernelArg(kernel, 6, sizeof(cl_mem), (void*)&bufferB);
  153.     size_t localws[2] = {16, 16};
  154.     size_t globalws[2] = {wC, wC};
  155.     status = clEnqueueNDRangeKernel(cmdQueue, kernel, 2, NULL, globalws, localws, 0, NULL, NULL);
  156.     cout << "\nclEnqueueNDRangeKernel: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL");
  157.     status = clEnqueueReadBuffer(cmdQueue, bufferC, CL_TRUE, 0, wC*hC*sizeof(float), (void*)C, 0, NULL, NULL);
  158.     cout << "\nclEnqueueReadBuffer: " << (status == CL_SUCCESS ? "SUCCESS" : "FAIL");
  159.     cout << "\nC[0][0]: " << C[0][0];
  160. END }
  161.  
  162. void setupArrays(bool diagnostics)
  163. {
  164.     for(int i = 0; i < hA; i++)
  165.     {
  166.         A[i] = new float[wA];
  167.         for(int i2 = 0; i2 < wA; i2++)
  168.             A[i][i2] = 1;
  169.     }
  170.     for(int i = 0; i < wA; i++)
  171.     {
  172.         B[i] = new float[hA];
  173.         for(int i2 = 0; i2 < hA; i2++)
  174.             B[i][i2] = 2;
  175.     }
  176.     for(int i = 0; i < hA; i++)
  177.     {
  178.         C[i] = new float[hA];
  179.         for(int i2 = 0; i2 < hA; i2++)
  180.             C[i][i2] = 0;
  181.     }
  182.  
  183.     if(diagnostics)
  184.     {
  185.         for(int i = 0; i < hA; i++)
  186.         {
  187.             for(int i2 = 0; i2 < wA; i2++)
  188.                 cout << A[i][i2] << " ";
  189.             cout << endl;
  190.         }
  191.         cout << endl;
  192.         for(int i = 0; i < wA; i++)
  193.         {
  194.             for(int i2 = 0; i2 < hA; i2++)
  195.                 cout << B[i][i2] << " ";
  196.             cout << endl;
  197.         }
  198.         cout << endl;
  199.         for(int i = 0; i < hA; i++)
  200.         {
  201.             for(int i2 = 0; i2 < hA; i2++)
  202.                 cout << C[i][i2] << " ";
  203.             cout << endl;
  204.         }
  205.         cout << endl;
  206.     }
  207. }
  208.  
  209. void setupBuffers(bool diagnostics)
  210. {
  211.     int everFailed = 0;
  212.     bufferA = clCreateBuffer(context, CL_MEM_READ_ONLY, wA*hA*sizeof(float), NULL, &status);
  213.     everFailed = status == CL_SUCCESS ? everFailed : 1;
  214.     status = clEnqueueWriteBuffer(cmdQueue, bufferA, CL_TRUE, 0, wA*hA*sizeof(float), (void*)A, 0, NULL, NULL);
  215.     everFailed = status == CL_SUCCESS ? everFailed : 1;
  216.     bufferB = clCreateBuffer(context, CL_MEM_READ_ONLY, wB*hB*sizeof(float), NULL, &status);
  217.     everFailed = status == CL_SUCCESS ? everFailed : 1;
  218.     status = clEnqueueWriteBuffer(cmdQueue, bufferB, CL_TRUE, 0, wB*hB*sizeof(float), (void*)B, 0, NULL, NULL);
  219.     everFailed = status == CL_SUCCESS ? everFailed : 1;
  220.     bufferC = clCreateBuffer(context, CL_MEM_WRITE_ONLY, hA*hA*sizeof(float), NULL, &status);
  221.     if(diagnostics)
  222.         cout << "SETUP BUFFERS: " << (everFailed == 0 ? "SUCCESS" : "FAIL");
  223. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement