Advertisement
Guest User

Untitled

a guest
Mar 22nd, 2017
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.44 KB | None | 0 0
  1. #define arrA_GROUP_SIZE 1
  2. #define arrA_bitwidth 8
  3. #define arrA_bitwidthLog2 3
  4. #define arrA_valuesPerCell 4
  5. #define arrA_valuesPerCellLog2 2
  6. #define arrA_valueMask 0xff
  7. #define arrA_indexMask 0x3
  8. #define arrA_base_offset 0
  9. uint arrA_cell(const uint index) { return index >> arrA_valuesPerCellLog2; }
  10. uint arrA_subcell(const uint index) { return (index & arrA_indexMask) << arrA_bitwidthLog2; }
  11. int arrA_get2(__local const uint* const a, const uint cell, const uint subcell) { return (a[cell] >> subcell) & arrA_valueMask; }
  12. void arrA_set2(__local uint* const a, const uint cell, const uint subcell, const int v) { uint original = 0; uint current = 0; do { original = a[cell]; const uint cleared = original & ~(arrA_valueMask << subcell); const uint desired = cleared | v << subcell; current = atomic_cmpxchg(&a[cell], original, desired); } while (current != original);}
  13. void arrA_set(__local uint* const a, const uint index, const uint v) { arrA_set2(a, arrA_cell(index), arrA_subcell(index), v); }
  14. int arrA_get(__local const uint* const a, const uint index) { const uint x = index % 8; return arrA_get2(a, arrA_cell(x), arrA_subcell(x)); }
  15. #define LINEAR_LOCAL_ID_arrA (get_local_id(1) * get_local_size(0) + get_local_id(0))
  16. #define LINEAR_GROUP_ID_arrA (get_group_id(1) * get_num_groups(0) + get_group_id(0))
  17. #define LINEAR_GLOBAL_ID_arrA (LINEAR_GROUP_ID_arrA * 1 + LINEAR_LOCAL_ID_arrA)
  18. uint arrA_getSectionID() { return (get_group_id(0) * 1) / get_num_groups(0); }
  19. uint arrA_getSectionID_2D() { return LINEAR_GROUP_ID_arrA % 1;}
  20. #define INIT_VAR_arrA(global,local) __local uint (local)[2]; prefetch_arrA(arrA_getSectionID(), get_local_id(0), (global), (local));
  21. #define INIT_VAR_2D_arrA(global,local) __local uint (local)[2]; prefetch_arrA(arrA_getSectionID_2D(), LINEAR_LOCAL_ID_arrA, (global), (local));
  22. #define INIT_VAR_2D_row_major_arrA(global,local, tileID, M) __local uint (local)[1][1]; prefetch_2D_row_major_arrA(tileID, M, (global), (local));
  23. #define INIT_VAR_2D_col_major_arrA(global,local, tileID, K) __local uint (local)[1][1]; prefetch_2D_col_major_arrA(tileID, K, (global), (local));
  24. uint arrA_global_index(uint sectionID, uint local_index) {
  25. return (sectionID * 8 + local_index);
  26. }
  27. void prefetch_arrA(const uint sectionID, const uint threadID, __global const uint* const ga, __local uint* la) {
  28. const uint g_offset = sectionID * 2;
  29. #pragma unroll 2
  30. for (uint loop=0, cell=threadID; loop < 2; loop++, cell += 1) {
  31. const uint g_cell = g_offset + cell;
  32. la[cell] = ga[g_cell];
  33. }
  34. barrier(CLK_LOCAL_MEM_FENCE);
  35. }
  36. void prefetch_2D_row_major_arrA(const uint tileID, const uint M, __global const uint* const ga, __local uint** la) {
  37. const int row = get_local_id(0);
  38. const int col = get_local_id(1);
  39. const int globalRow = 1 * get_group_id(0) + row;
  40. const int tiledCol = 1 * tileID + col;
  41. la[col][row] = ga[tiledCol*M + globalRow];
  42. }
  43. void prefetch_2D_col_major_arrA(const uint tileID, const uint K, __global const uint* const ga, __local uint** la) {
  44. const int row = get_local_id(0);
  45. const int col = get_local_id(1);
  46. const int globalCol = 1 * get_group_id(1) + col;
  47. const int tiledRow = 1 * tileID + row;
  48. la[col][row] = ga[globalCol*K + tiledRow];
  49. }
  50. #define arrB_GROUP_SIZE 1
  51. #define arrB_bitwidth 8
  52. #define arrB_bitwidthLog2 3
  53. #define arrB_valuesPerCell 4
  54. #define arrB_valuesPerCellLog2 2
  55. #define arrB_valueMask 0xff
  56. #define arrB_indexMask 0x3
  57. #define arrB_base_offset 0
  58. uint arrB_cell(const uint index) { return index >> arrB_valuesPerCellLog2; }
  59. uint arrB_subcell(const uint index) { return (index & arrB_indexMask) << arrB_bitwidthLog2; }
  60. int arrB_get2(__local const uint* const a, const uint cell, const uint subcell) { return (a[cell] >> subcell) & arrB_valueMask; }
  61. void arrB_set2(__local uint* const a, const uint cell, const uint subcell, const int v) { uint original = 0; uint current = 0; do { original = a[cell]; const uint cleared = original & ~(arrB_valueMask << subcell); const uint desired = cleared | v << subcell; current = atomic_cmpxchg(&a[cell], original, desired); } while (current != original);}
  62. void arrB_set(__local uint* const a, const uint index, const uint v) { arrB_set2(a, arrB_cell(index), arrB_subcell(index), v); }
  63. int arrB_get(__local const uint* const a, const uint index) { const uint x = index % 8; return arrB_get2(a, arrB_cell(x), arrB_subcell(x)); }
  64. #define LINEAR_LOCAL_ID_arrB (get_local_id(1) * get_local_size(0) + get_local_id(0))
  65. #define LINEAR_GROUP_ID_arrB (get_group_id(1) * get_num_groups(0) + get_group_id(0))
  66. #define LINEAR_GLOBAL_ID_arrB (LINEAR_GROUP_ID_arrB * 1 + LINEAR_LOCAL_ID_arrB)
  67. uint arrB_getSectionID() { return (get_group_id(0) * 1) / get_num_groups(0); }
  68. uint arrB_getSectionID_2D() { return LINEAR_GROUP_ID_arrB % 1;}
  69. #define INIT_VAR_arrB(global,local) __local uint (local)[2]; prefetch_arrB(arrB_getSectionID(), get_local_id(0), (global), (local));
  70. #define INIT_VAR_2D_arrB(global,local) __local uint (local)[2]; prefetch_arrB(arrB_getSectionID_2D(), LINEAR_LOCAL_ID_arrB, (global), (local));
  71. #define INIT_VAR_2D_row_major_arrB(global,local, tileID, M) __local uint (local)[1][1]; prefetch_2D_row_major_arrB(tileID, M, (global), (local));
  72. #define INIT_VAR_2D_col_major_arrB(global,local, tileID, K) __local uint (local)[1][1]; prefetch_2D_col_major_arrB(tileID, K, (global), (local));
  73. uint arrB_global_index(uint sectionID, uint local_index) {
  74. return (sectionID * 8 + local_index);
  75. }
  76. void prefetch_arrB(const uint sectionID, const uint threadID, __global const uint* const ga, __local uint* la) {
  77. const uint g_offset = sectionID * 2;
  78. #pragma unroll 2
  79. for (uint loop=0, cell=threadID; loop < 2; loop++, cell += 1) {
  80. const uint g_cell = g_offset + cell;
  81. la[cell] = ga[g_cell];
  82. }
  83. barrier(CLK_LOCAL_MEM_FENCE);
  84. }
  85. void prefetch_2D_row_major_arrB(const uint tileID, const uint M, __global const uint* const ga, __local uint** la) {
  86. const int row = get_local_id(0);
  87. const int col = get_local_id(1);
  88. const int globalRow = 1 * get_group_id(0) + row;
  89. const int tiledCol = 1 * tileID + col;
  90. la[col][row] = ga[tiledCol*M + globalRow];
  91. }
  92. void prefetch_2D_col_major_arrB(const uint tileID, const uint K, __global const uint* const ga, __local uint** la) {
  93. const int row = get_local_id(0);
  94. const int col = get_local_id(1);
  95. const int globalCol = 1 * get_group_id(1) + col;
  96. const int tiledRow = 1 * tileID + row;
  97. la[col][row] = ga[globalCol*K + tiledRow];
  98. }
  99. __kernel void multi_tile(const int M, const int N, const int K,
  100. const __global int* A,
  101. const __global int* B,
  102. const __global int* Target,
  103. __global int* C) {
  104.  
  105. const int row = get_local_id(0); // Local row ID (max: arrA_GROUP_SIZE)
  106. const int col = get_local_id(1); // Local col ID (max: arrA_GROUP_SIZE)
  107. const int globalRow = arrA_GROUP_SIZE*get_group_id(0) + row; // Row ID of C (0..M)
  108. const int globalCol = arrA_GROUP_SIZE*get_group_id(1) + col; // Col ID of C (0..N)
  109.  
  110. int acc = 0;
  111.  
  112. const int numTiles = K/arrA_GROUP_SIZE;
  113. for (int t=0; t<numTiles; t++) {
  114. INIT_VAR_2D_col_major_arrA(A, A_sub, t, M)
  115. INIT_VAR_2D_row_major_arrB(B, B_sub, t, K)
  116. barrier(CLK_LOCAL_MEM_FENCE);
  117.  
  118. for (int k=0; k<arrA_GROUP_SIZE; k++) {
  119. acc += A_sub[k][row] * B_sub[col][k];
  120. }
  121.  
  122. barrier(CLK_LOCAL_MEM_FENCE);
  123. }
  124.  
  125. // Store the final result in C
  126. printf("%d, %d - %d\n", row, col, acc);
  127. C[globalCol*M + globalRow] = acc;
  128. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement