Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #include <chrono>
- #include <iostream>
- #include <random>
- static void compute_neq(float* __restrict__ neq,
- const float* __restrict__ ux,
- const float* __restrict__ uy,
- const float* __restrict__ rho,
- const float* __restrict__ ex,
- const float* __restrict__ ey,
- const float* __restrict__ w,
- const size_t N)
- {
- for (size_t idx = 0; idx < N * N; ++idx) {
- float usqr = ux[idx] * ux[idx] + uy[idx] * uy[idx];
- for (size_t q = 0; q < 9; ++q) {
- float eu = 3.0f * (ex[q] * ux[idx] + ey[q] * uy[idx]);
- float tmp = 1.0f + eu + 0.5f * eu * eu - 1.5f * usqr;
- tmp *= w[q] * rho[idx];
- neq[idx * 9 + q] = tmp;
- }
- }
- }
- int main() {
- std::random_device rd;
- std::mt19937 mt(rd());
- std::uniform_real_distribution<float> dist(0.0f, 1.0f);
- const size_t N = 1000;
- const size_t benchmarkCount = 1000;
- const auto length = N * N;
- const float ex[9] = {0., 1., 0., -1., 0., 1., -1., -1., 1.};
- const float ey[9] = {0., 0., 1., 0., -1., 1., 1., -1., -1.};
- const float w[9] = {4./9., 1./9., 1./9., 1./9., 1./9., 1./36., 1./36., 1./36., 1./36.};
- auto neq = new float[length * 9];
- auto ux = new float[length];
- auto uy = new float[length];
- auto rho = new float[length];
- for (size_t i = 0; i < length; ++i) {
- ux[i] = dist(mt);
- uy[i] = dist(mt);
- rho[i] = 1.0f;
- for (size_t q = 0; q < 9; ++q) neq[9 * i + q] = 0.0f;
- }
- auto start = std::chrono::steady_clock::now();
- for (size_t t = 0; t < benchmarkCount; ++t)
- compute_neq(neq, ux, uy, rho, ex, ey, w, N);
- auto end = std::chrono::steady_clock::now();
- auto diff = end - start;
- std::cout << std::chrono::duration<double, std::milli>(diff).count() / benchmarkCount << " ms" << std::endl;
- std::cout << neq[0] << std::endl;
- return 0;
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement