Advertisement
Guest User

Untitled

a guest
Sep 4th, 2015
62
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.74 KB | None | 0 0
  1. #define __CL_ENABLE_EXCEPTIONS
  2. #include <CL/cl.hpp>
  3. #include <iostream>
  4.  
  5. using namespace std;
  6.  
  7. const char *SOURCE = R"RAW(
  8. #define local_barrier() barrier(CLK_LOCAL_MEM_FENCE);
  9. #define WITHIN_KERNEL /* empty */
  10. #define KERNEL __kernel
  11. #define GLOBAL_MEM __global
  12. #define LOCAL_MEM __local
  13. #define LOCAL_MEM_ARG __local
  14. #define REQD_WG_SIZE(X,Y,psc_Z) __attribute__((reqd_work_group_size(X, Y, psc_Z)))
  15. #define psc_LID_0 get_local_id(0)
  16. #define psc_LID_1 get_local_id(1)
  17. #define psc_LID_2 get_local_id(2)
  18. #define psc_GID_0 get_group_id(0)
  19. #define psc_GID_1 get_group_id(1)
  20. #define psc_GID_2 get_group_id(2)
  21. #define psc_LDIM_0 get_local_size(0)
  22. #define psc_LDIM_1 get_local_size(1)
  23. #define psc_LDIM_2 get_local_size(2)
  24. #define psc_GDIM_0 get_num_groups(0)
  25. #define psc_GDIM_1 get_num_groups(1)
  26. #define psc_GDIM_2 get_num_groups(2)
  27. #if __OPENCL_C_VERSION__ < 120
  28. #pragma OPENCL EXTENSION cl_khr_fp64: enable
  29. #endif
  30. //CL//
  31. #define psc_WG_SIZE 16
  32. #define psc_SCAN_EXPR(a, b, across_seg_boundary) a+b
  33. #define psc_INPUT_EXPR(i) (input_ary[i])
  34. typedef int psc_scan_type;
  35. typedef int psc_index_type;
  36. // NO_SEG_BOUNDARY is the largest representable integer in psc_index_type.
  37. // This assumption is used in code below.
  38. #define NO_SEG_BOUNDARY 2147483647
  39. //CL//
  40. #define psc_K 256
  41. KERNEL
  42. REQD_WG_SIZE(psc_WG_SIZE, 1, 1)
  43. void scan_scan_intervals_lev1(
  44. __global int *input_ary, __global int *output_ary,
  45. GLOBAL_MEM psc_scan_type *restrict psc_partial_scan_buffer,
  46. const psc_index_type N,
  47. const psc_index_type psc_interval_size
  48. , GLOBAL_MEM psc_scan_type *restrict psc_interval_results
  49. )
  50. {
  51. // index psc_K in first dimension used for psc_carry storage
  52. struct psc_wrapped_scan_type
  53. {
  54. psc_scan_type psc_value;
  55. };
  56. // padded in psc_WG_SIZE to avoid bank conflicts
  57. LOCAL_MEM struct psc_wrapped_scan_type psc_ldata[psc_WG_SIZE];
  58. for(int i = 0; i < 10; ++i)
  59. {
  60. local_barrier();
  61. psc_scan_type psc_val = 0;
  62. if (psc_LID_0 >= 2)
  63. {
  64. psc_scan_type psc_tmp = psc_ldata[psc_LID_0 - 2].psc_value;
  65. psc_val = psc_tmp+ psc_val;
  66. }
  67. // {{{ writes to local allowed, reads from local not allowed
  68. psc_ldata[psc_LID_0].psc_value = psc_val;
  69. }
  70. }
  71. )RAW";
  72.  
  73. int main(int argc, char *argv[])
  74. {
  75. cl::Device device = cl::Device::getDefault();
  76. cl::CommandQueue queue = cl::CommandQueue::getDefault();
  77. cl::Program program(SOURCE, true);
  78.  
  79. cout << device.getInfo<CL_DEVICE_NAME>() << endl;
  80.  
  81. auto kernel = cl::make_kernel<cl::Buffer, cl::Buffer, cl::Buffer, cl_int, cl_int, cl::Buffer>(program, "scan_scan_intervals_lev1");
  82.  
  83. cl_int i;
  84. cl::Buffer buffer;
  85. kernel(cl::EnqueueArgs(queue, cl::NDRange(16), cl::NDRange(16)),
  86. buffer, buffer, buffer, i, i, buffer);
  87.  
  88. queue.finish();
  89. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement