Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- void tile() {
- uint32_t dim = 2;
- uint32_t M = dim;
- uint32_t N = dim;
- uint32_t K = 3;
- uint32_t group_size = 1;
- PackedArrayImpl a("arrA", 32, M*K);
- PackedArrayImpl b("arrB", 32, K*N);
- for (size_t i = 0; i < M; i++) {
- for (size_t j = 0; j < K; j++) {
- a.set(i*M + j, (i*M + j));
- }
- }
- for (size_t i = 0; i < K; i++) {
- for (size_t j = 0; j < N; j++) {
- b.set(i*M + j, (20 + i*K + j));
- }
- }
- ifstream infile{ "../samples/fydp-tune/multi_tile.cl" };
- string kernel{ istreambuf_iterator<char>(infile), istreambuf_iterator<char>() };
- string headerA = a.getConfig().generateOpenCLCode(true, group_size);
- string headerB = b.getConfig().generateOpenCLCode(true, group_size);
- string full_kernel = headerA + headerB + kernel;
- cout<<full_kernel<<endl;
- int32_t *cellA = a.getCells();
- int32_t *cellB = b.getCells();
- std::vector<int32_t> arrA(cellA, cellA+a.physical_capacity());
- std::vector<int32_t> arrB(cellB, cellB+b.physical_capacity());
- std::vector<int32_t> target(M*N);
- std::vector<int32_t> arrC(M*N);
- for (size_t i = 0; i < M; i++) {
- for (size_t j = 0; j < N; j++) {
- target[i*N + j] = 0;
- for (size_t k = 0; k < K; k++) {
- target[i*N+ j] += a.get(i*K+k) * b.get(k*N+j);
- //cout << "a[" << i << "][" << k << "] = " << a.get(i*K+k);
- //cout << " * b[" << k << "][" << j << "] = " << b.get(k*N+j) << endl;
- }
- }
- }
- for (auto x : target) cout << x << " ";
- cout << endl;
- cout << endl;
- for (auto x : arrA) cout << x << " ";
- cout << endl;
- for (auto x : arrB) cout << x << " ";
- cout << endl;
- cltune::Tuner tuner(size_t{0}, size_t{0});
- tuner.AddKernelFromString(full_kernel, "multi_tile", {M, N}, {group_size, group_size});
- tuner.SetReference({"../samples/fydp-tune/ref.cl"}, "ref", {M, N}, {group_size, group_size});
- tuner.AddArgumentScalar((int)M);
- tuner.AddArgumentScalar((int)N);
- tuner.AddArgumentScalar((int)K);
- tuner.AddArgumentInput(arrA);
- tuner.AddArgumentInput(arrB);
- tuner.AddArgumentInput(target);
- tuner.AddArgumentOutput(arrC);
- tuner.SetNumRuns(1);
- // Starts the tuner
- tuner.Tune();
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement