Advertisement
Guest User

Untitled

a guest
Mar 27th, 2017
49
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.34 KB | None | 0 0
  1. void tile() {
  2. uint32_t dim = 2;
  3. uint32_t M = dim;
  4. uint32_t N = dim;
  5. uint32_t K = 3;
  6. uint32_t group_size = 1;
  7.  
  8. PackedArrayImpl a("arrA", 32, M*K);
  9. PackedArrayImpl b("arrB", 32, K*N);
  10.  
  11. for (size_t i = 0; i < M; i++) {
  12. for (size_t j = 0; j < K; j++) {
  13. a.set(i*M + j, (i*M + j));
  14. }
  15. }
  16.  
  17. for (size_t i = 0; i < K; i++) {
  18. for (size_t j = 0; j < N; j++) {
  19. b.set(i*M + j, (20 + i*K + j));
  20. }
  21. }
  22.  
  23.  
  24. ifstream infile{ "../samples/fydp-tune/multi_tile.cl" };
  25. string kernel{ istreambuf_iterator<char>(infile), istreambuf_iterator<char>() };
  26. string headerA = a.getConfig().generateOpenCLCode(true, group_size);
  27. string headerB = b.getConfig().generateOpenCLCode(true, group_size);
  28. string full_kernel = headerA + headerB + kernel;
  29.  
  30. cout<<full_kernel<<endl;
  31.  
  32. int32_t *cellA = a.getCells();
  33. int32_t *cellB = b.getCells();
  34. std::vector<int32_t> arrA(cellA, cellA+a.physical_capacity());
  35. std::vector<int32_t> arrB(cellB, cellB+b.physical_capacity());
  36. std::vector<int32_t> target(M*N);
  37. std::vector<int32_t> arrC(M*N);
  38.  
  39. for (size_t i = 0; i < M; i++) {
  40. for (size_t j = 0; j < N; j++) {
  41. target[i*N + j] = 0;
  42. for (size_t k = 0; k < K; k++) {
  43. target[i*N+ j] += a.get(i*K+k) * b.get(k*N+j);
  44. //cout << "a[" << i << "][" << k << "] = " << a.get(i*K+k);
  45. //cout << " * b[" << k << "][" << j << "] = " << b.get(k*N+j) << endl;
  46. }
  47. }
  48. }
  49.  
  50. for (auto x : target) cout << x << " ";
  51. cout << endl;
  52. cout << endl;
  53. for (auto x : arrA) cout << x << " ";
  54. cout << endl;
  55. for (auto x : arrB) cout << x << " ";
  56. cout << endl;
  57.  
  58. cltune::Tuner tuner(size_t{0}, size_t{0});
  59. tuner.AddKernelFromString(full_kernel, "multi_tile", {M, N}, {group_size, group_size});
  60. tuner.SetReference({"../samples/fydp-tune/ref.cl"}, "ref", {M, N}, {group_size, group_size});
  61.  
  62. tuner.AddArgumentScalar((int)M);
  63. tuner.AddArgumentScalar((int)N);
  64. tuner.AddArgumentScalar((int)K);
  65. tuner.AddArgumentInput(arrA);
  66. tuner.AddArgumentInput(arrB);
  67. tuner.AddArgumentInput(target);
  68. tuner.AddArgumentOutput(arrC);
  69. tuner.SetNumRuns(1);
  70.  
  71.  
  72. // Starts the tuner
  73. tuner.Tune();
  74.  
  75. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement