Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- User@User-PC ~/PyFR/euler_vortex_2d-1.3Test
- $ export PYFR_DEBUG_OMP_KEEP_LIBS=1
- (env4)
- User@User-PC ~/PyFR/euler_vortex_2d-1.3Test
- $ pyfr run -b openmp -p euler_vortex_2d.pyfrm euler_vortex_2d.ini
- <pyfr.util.memoize object at 0x6fffe85c278>
- --self
- <pyfr.solvers.euler.elements.EulerElements object at 0x6fffe567630> upts
- --args
- --kwargs
- res cache key bad
- [[[-9.93056816 -8.93056816 -7.93056816 ..., 7.06943184 8.06943184
- 9.06943184]
- [-9.93056816 -9.93056816 -9.93056816 ..., 9.06943184 9.06943184
- 9.06943184]]
- [[-9.66999052 -8.66999052 -7.66999052 ..., 7.33000948 8.33000948
- 9.33000948]
- [-9.93056816 -9.93056816 -9.93056816 ..., 9.06943184 9.06943184
- 9.06943184]]
- [[-9.33000948 -8.33000948 -7.33000948 ..., 7.66999052 8.66999052
- 9.66999052]
- [-9.93056816 -9.93056816 -9.93056816 ..., 9.06943184 9.06943184
- 9.06943184]]
- ...,
- [[-9.66999052 -8.66999052 -7.66999052 ..., 7.33000948 8.33000948
- 9.33000948]
- [-9.06943184 -9.06943184 -9.06943184 ..., 9.93056816 9.93056816
- 9.93056816]]
- [[-9.33000948 -8.33000948 -7.33000948 ..., 7.66999052 8.66999052
- 9.66999052]
- [-9.06943184 -9.06943184 -9.06943184 ..., 9.93056816 9.93056816
- 9.93056816]]
- [[-9.06943184 -8.06943184 -7.06943184 ..., 7.93056816 8.93056816
- 9.93056816]
- [-9.06943184 -9.06943184 -9.06943184 ..., 9.93056816 9.93056816
- 9.93056816]]]
- new res cache key
- <pyfr.util.memoize object at 0x6fffe85c278>
- --self
- <pyfr.solvers.euler.elements.EulerElements object at 0x6fffe567630> upts
- --args
- --kwargs
- [[[-9.93056816 -8.93056816 -7.93056816 ..., 7.06943184 8.06943184
- 9.06943184]
- [-9.93056816 -9.93056816 -9.93056816 ..., 9.06943184 9.06943184
- 9.06943184]]
- [[-9.66999052 -8.66999052 -7.66999052 ..., 7.33000948 8.33000948
- 9.33000948]
- [-9.93056816 -9.93056816 -9.93056816 ..., 9.06943184 9.06943184
- 9.06943184]]
- [[-9.33000948 -8.33000948 -7.33000948 ..., 7.66999052 8.66999052
- 9.66999052]
- [-9.93056816 -9.93056816 -9.93056816 ..., 9.06943184 9.06943184
- 9.06943184]]
- ...,
- [[-9.66999052 -8.66999052 -7.66999052 ..., 7.33000948 8.33000948
- 9.33000948]
- [-9.06943184 -9.06943184 -9.06943184 ..., 9.93056816 9.93056816
- 9.93056816]]
- [[-9.33000948 -8.33000948 -7.33000948 ..., 7.66999052 8.66999052
- 9.66999052]
- [-9.06943184 -9.06943184 -9.06943184 ..., 9.93056816 9.93056816
- 9.93056816]]
- [[-9.06943184 -8.06943184 -7.06943184 ..., 7.93056816 8.93056816
- 9.93056816]
- [-9.06943184 -9.06943184 -9.06943184 ..., 9.93056816 9.93056816
- 9.93056816]]]
- res cache key good
- <pyfr.util.memoize object at 0x6fffe85c278>
- --self
- <pyfr.solvers.euler.elements.EulerElements object at 0x6fffe567630> mpts
- --args
- --kwargs
- res cache key bad
- [[[-10. -9. -8. ..., 7. 8. 9.]
- [-10. -10. -10. ..., 9. 9. 9.]]
- [[ -9. -8. -7. ..., 8. 9. 10.]
- [-10. -10. -10. ..., 9. 9. 9.]]
- [[-10. -9. -8. ..., 7. 8. 9.]
- [ -9. -9. -9. ..., 10. 10. 10.]]
- [[ -9. -8. -7. ..., 8. 9. 10.]
- [ -9. -9. -9. ..., 10. 10. 10.]]]
- new res cache key
- <pyfr.util.memoize object at 0x6fffe85c160>
- --self
- <pyfr.solvers.euler.elements.EulerElements object at 0x6fffe567630> M3
- --args
- --kwargs
- res cache key bad
- <pyfr.backends.openmp.types.OpenMPConstMatrix object at 0x6ffffd5ed68>
- new res cache key
- <pyfr.util.memoize object at 0x6fffe7267b8>
- --self
- <pyfr.backends.openmp.cblas.OpenMPCBLASKernels object at 0x6fffe55f9e8> par_gemm
- #include <omp.h>
- #include <stdlib.h>
- #include <tgmath.h>
- #define PYFR_ALIGN_BYTES 32
- #define PYFR_NOINLINE __attribute__ ((noinline))
- #define min(a, b) ((a) < (b) ? (a) : (b))
- #define max(a, b) ((a) > (b) ? (a) : (b))
- // Typedefs
- typedef double fpdtype_t;
- // OpenMP static loop scheduling functions
- static inline int
- gcd(int a, int b)
- {
- return (a == 0) ? b : gcd(b % a, a);
- }
- static inline void
- loop_sched_1d(int n, int align, int *b, int *e)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Round up n to be a multiple of nth
- int rn = n + nth - 1 - (n - 1) % nth;
- // Nominal tile size
- int sz = rn / nth;
- // Handle alignment
- sz += align - 1 - (sz - 1) % align;
- // Assign the starting and ending index
- *b = sz * tid;
- *e = min(*b + sz, n);
- // Clamp
- if (*b >= n)
- *b = *e = 0;
- }
- static inline void
- loop_sched_2d(int nrow, int ncol, int colalign,
- int *rowb, int *rowe, int *colb, int *cole)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Distribute threads
- int nrowth = gcd(nrow, nth);
- int ncolth = nth / nrowth;
- // Row and column indices for our thread
- int rowix = tid / ncolth;
- int colix = tid % ncolth;
- // Round up ncol to be a multiple of ncolth
- int rncol = ncol + ncolth - 1 - (ncol - 1) % ncolth;
- // Nominal tile size
- int ntilerow = nrow / nrowth;
- int ntilecol = rncol / ncolth;
- // Handle column alignment
- ntilecol += colalign - 1 - (ntilecol - 1) % colalign;
- // Assign the starting and ending row to each thread
- *rowb = ntilerow * rowix;
- *rowe = *rowb + ntilerow;
- // Assign the starting and ending column to each thread
- *colb = ntilecol * colix;
- *cole = min(*colb + ntilecol, ncol);
- // Clamp
- if (*colb >= ncol)
- *colb = *cole = 0;
- }
- // CBLAS GEMM constants
- #define ROW_MAJOR 101
- #define NO_TRANS 111
- // CBLAS GEMM prototype
- typedef void (*cblas_gemm_t)(int, int, int,
- int, int, int,
- fpdtype_t, const fpdtype_t *, int,
- const fpdtype_t *, int,
- fpdtype_t, fpdtype_t *, int);
- void
- par_gemm(cblas_gemm_t gemm, int M, int N, int K,
- fpdtype_t alpha, const fpdtype_t *A, int lda,
- const fpdtype_t *B, int ldb,
- fpdtype_t beta, fpdtype_t *C, int ldc)
- {
- #pragma omp parallel
- {
- int begin, end;
- loop_sched_1d(N, PYFR_ALIGN_BYTES / sizeof(fpdtype_t), &begin, &end);
- gemm(ROW_MAJOR, NO_TRANS, NO_TRANS, M, end - begin, K,
- alpha, A, lda, B + begin, ldb, beta, C + begin, ldc);
- }
- }
- [<class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.int32'>, <class 'numpy.int32'>, <class 'numpy.float64'>, <class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.float64'>, <class 'numpy.int64'>, <class 'numpy.int32'>]
- --args
- --kwargs
- res cache key bad
- <_FuncPtr object at 0x6fffe5694f8>
- new res cache key
- <pyfr.util.memoize object at 0x6fffe85c240>
- --self
- <pyfr.solvers.euler.elements.EulerElements object at 0x6fffe567630> upts
- --args
- --kwargs
- <pyfr.util.memoize object at 0x6fffe85c208>
- --self
- <pyfr.solvers.euler.elements.EulerElements object at 0x6fffe567630> upts
- --args
- --kwargs
- res cache key bad
- [[ 4. 4. 4. ..., 4. 4. 4.]
- [ 4. 4. 4. ..., 4. 4. 4.]
- [ 4. 4. 4. ..., 4. 4. 4.]
- ...,
- [ 4. 4. 4. ..., 4. 4. 4.]
- [ 4. 4. 4. ..., 4. 4. 4.]
- [ 4. 4. 4. ..., 4. 4. 4.]]
- new res cache key
- res cache key bad
- <pyfr.backends.openmp.types.OpenMPConstMatrix object at 0x6ffffca9f98>
- new res cache key
- <pyfr.util.memoize object at 0x6fffebbf470>
- --self
- <pyfr.backends.openmp.provider.OpenMPPointwiseKernelProvider object at 0x6fffe826f60> negdivconf pyfr.solvers.baseadvec.kernels.negdivconf {'ndims': 2, 'srcex': ['(0.)', '(0.)', '(0.)', '(0.)'], 'nvars': 4}
- --args
- --kwargs
- res cache key bad
- ('\n\n#include <omp.h>\n#include <stdlib.h>\n#include <tgmath.h>\n\n#define PYFR_ALIGN_BYTES 32\n#define PYFR_NOINLINE __attribute__ ((noinline))\n\n#define min(a, b) ((a) < (b) ? (a) : (b))\n#define max(a, b) ((a) > (b) ? (a) : (b))\n\n// Typedefs\ntypedef double fpdtype_t;\n\n// OpenMP static loop scheduling functions\n\nstatic inline int\ngcd(int a, int b)\n{\n return (a == 0) ? b : gcd(b % a, a);\n}\n\nstatic inline void\nloop_sched_1d(int n, int align, int *b, int *e)\n{\n int tid = omp_get_thread_num();\n int nth = omp_get_num_threads();\n\n // Round up n to be a multiple of nth\n int rn = n + nth - 1 - (n - 1) % nth;\n\n // Nominal tile size\n int sz = rn / nth;\n\n // Handle alignment\n sz += align - 1 - (sz - 1) % align;\n\n // Assign the starting and ending index\n *b = sz * tid;\n *e = min(*b + sz, n);\n\n // Clamp\n if (*b >= n)\n *b = *e = 0;\n}\n\nstatic inline void\nloop_sched_2d(int nrow, int ncol, int colalign,\n int *rowb, int *rowe, int *colb, int *cole)\n{\n int tid = omp_get_thread_num();\n int nth = omp_get_num_threads();\n\n // Distribute threads\n int nrowth = gcd(nrow, nth);\n int ncolth = nth / nrowth;\n\n // Row and column indices for our thread\n int rowix = tid / ncolth;\n int colix = tid % ncolth;\n\n // Round up ncol to be a multiple of ncolth\n int rncol = ncol + ncolth - 1 - (ncol - 1) % ncolth;\n\n // Nominal tile size\n int ntilerow = nrow / nrowth;\n int ntilecol = rncol / ncolth;\n\n // Handle column alignment\n ntilecol += colalign - 1 - (ntilecol - 1) % colalign;\n\n // Assign the starting and ending row to each thread\n *rowb = ntilerow * rowix;\n *rowe = *rowb + ntilerow;\n\n // Assign the starting and ending column to each thread\n *colb = ntilecol * colix;\n *cole = min(*colb + ntilecol, ncol);\n\n // Clamp\n if (*colb >= ncol)\n *colb = *cole = 0;\n}\n\n\n\n\n\nstatic PYFR_NOINLINE void negdivconf_inner(int _nx, const fpdtype_t *__restrict__ rcpdjac_v, fpdtype_t *__restrict__ tdivtconf_v0, fpdtype_t *__restrict__ tdivtconf_v1, fpdtype_t *__restrict__ tdivtconf_v2, fpdtype_t *__restrict__ tdivtconf_v3)\n {\n for (int _x = 0; _x < _nx; _x++)\n {\n \n tdivtconf_v0[_x] = -rcpdjac_v[_x]*tdivtconf_v0[_x] + (0.);\n tdivtconf_v1[_x] = -rcpdjac_v[_x]*tdivtconf_v1[_x] + (0.);\n tdivtconf_v2[_x] = -rcpdjac_v[_x]*tdivtconf_v2[_x] + (0.);\n tdivtconf_v3[_x] = -rcpdjac_v[_x]*tdivtconf_v3[_x] + (0.);\n\n }\n }\n void negdivconf(int _ny, int _nx, const fpdtype_t* __restrict__ rcpdjac_v, int lsdrcpdjac, fpdtype_t* __restrict__ tdivtconf_v, int lsdtdivtconf)\n {\n #pragma omp parallel\n {\n int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);\n int rb, re, cb, ce;\n loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);\n for (int _y = rb; _y < re; _y++)\n {\n negdivconf_inner(ce - cb, rcpdjac_v + _y*lsdrcpdjac + cb, tdivtconf_v + (_y*4 + 0)*lsdtdivtconf + cb, tdivtconf_v + (_y*4 + 1)*lsdtdivtconf + cb, tdivtconf_v + (_y*4 + 2)*lsdtdivtconf + cb, tdivtconf_v + (_y*4 + 3)*lsdtdivtconf + cb);\n }\n }\n }\n\n', 2, ['_ny', '_nx', 'rcpdjac', 'tdivtconf'], [[<class 'numpy.int32'>], [<class 'numpy.int32'>], [<class 'numpy.int64'>, <class 'numpy.int32'>], [<class 'numpy.int64'>, <class 'numpy.int32'>]])
- new res cache key
- <pyfr.util.memoize object at 0x6fffe7267b8>
- --self
- <pyfr.backends.openmp.provider.OpenMPPointwiseKernelProvider object at 0x6fffe826f60> negdivconf
- #include <omp.h>
- #include <stdlib.h>
- #include <tgmath.h>
- #define PYFR_ALIGN_BYTES 32
- #define PYFR_NOINLINE __attribute__ ((noinline))
- #define min(a, b) ((a) < (b) ? (a) : (b))
- #define max(a, b) ((a) > (b) ? (a) : (b))
- // Typedefs
- typedef double fpdtype_t;
- // OpenMP static loop scheduling functions
- static inline int
- gcd(int a, int b)
- {
- return (a == 0) ? b : gcd(b % a, a);
- }
- static inline void
- loop_sched_1d(int n, int align, int *b, int *e)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Round up n to be a multiple of nth
- int rn = n + nth - 1 - (n - 1) % nth;
- // Nominal tile size
- int sz = rn / nth;
- // Handle alignment
- sz += align - 1 - (sz - 1) % align;
- // Assign the starting and ending index
- *b = sz * tid;
- *e = min(*b + sz, n);
- // Clamp
- if (*b >= n)
- *b = *e = 0;
- }
- static inline void
- loop_sched_2d(int nrow, int ncol, int colalign,
- int *rowb, int *rowe, int *colb, int *cole)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Distribute threads
- int nrowth = gcd(nrow, nth);
- int ncolth = nth / nrowth;
- // Row and column indices for our thread
- int rowix = tid / ncolth;
- int colix = tid % ncolth;
- // Round up ncol to be a multiple of ncolth
- int rncol = ncol + ncolth - 1 - (ncol - 1) % ncolth;
- // Nominal tile size
- int ntilerow = nrow / nrowth;
- int ntilecol = rncol / ncolth;
- // Handle column alignment
- ntilecol += colalign - 1 - (ntilecol - 1) % colalign;
- // Assign the starting and ending row to each thread
- *rowb = ntilerow * rowix;
- *rowe = *rowb + ntilerow;
- // Assign the starting and ending column to each thread
- *colb = ntilecol * colix;
- *cole = min(*colb + ntilecol, ncol);
- // Clamp
- if (*colb >= ncol)
- *colb = *cole = 0;
- }
- static PYFR_NOINLINE void negdivconf_inner(int _nx, const fpdtype_t *__restrict__ rcpdjac_v, fpdtype_t *__restrict__ tdivtconf_v0, fpdtype_t *__restrict__ tdivtconf_v1, fpdtype_t *__restrict__ tdivtconf_v2, fpdtype_t *__restrict__ tdivtconf_v3)
- {
- for (int _x = 0; _x < _nx; _x++)
- {
- tdivtconf_v0[_x] = -rcpdjac_v[_x]*tdivtconf_v0[_x] + (0.);
- tdivtconf_v1[_x] = -rcpdjac_v[_x]*tdivtconf_v1[_x] + (0.);
- tdivtconf_v2[_x] = -rcpdjac_v[_x]*tdivtconf_v2[_x] + (0.);
- tdivtconf_v3[_x] = -rcpdjac_v[_x]*tdivtconf_v3[_x] + (0.);
- }
- }
- void negdivconf(int _ny, int _nx, const fpdtype_t* __restrict__ rcpdjac_v, int lsdrcpdjac, fpdtype_t* __restrict__ tdivtconf_v, int lsdtdivtconf)
- {
- #pragma omp parallel
- {
- int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
- int rb, re, cb, ce;
- loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);
- for (int _y = rb; _y < re; _y++)
- {
- negdivconf_inner(ce - cb, rcpdjac_v + _y*lsdrcpdjac + cb, tdivtconf_v + (_y*4 + 0)*lsdtdivtconf + cb, tdivtconf_v + (_y*4 + 1)*lsdtdivtconf + cb, tdivtconf_v + (_y*4 + 2)*lsdtdivtconf + cb, tdivtconf_v + (_y*4 + 3)*lsdtdivtconf + cb);
- }
- }
- }
- [<class 'numpy.int32'>, <class 'numpy.int32'>, <class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.int64'>, <class 'numpy.int32'>]
- --args
- --kwargs
- res cache key bad
- <_FuncPtr object at 0x6fffe5695c0>
- new res cache key
- <pyfr.util.memoize object at 0x6fffe85c160>
- --self
- <pyfr.solvers.euler.elements.EulerElements object at 0x6fffe567630> M1 - M3*M2
- --args
- --kwargs
- res cache key bad
- <pyfr.backends.openmp.types.OpenMPConstMatrix object at 0x6fffe108a20>
- new res cache key
- <pyfr.util.memoize object at 0x6fffe7267b8>
- --self
- <pyfr.backends.openmp.cblas.OpenMPCBLASKernels object at 0x6fffe55f9e8> par_gemm
- #include <omp.h>
- #include <stdlib.h>
- #include <tgmath.h>
- #define PYFR_ALIGN_BYTES 32
- #define PYFR_NOINLINE __attribute__ ((noinline))
- #define min(a, b) ((a) < (b) ? (a) : (b))
- #define max(a, b) ((a) > (b) ? (a) : (b))
- // Typedefs
- typedef double fpdtype_t;
- // OpenMP static loop scheduling functions
- static inline int
- gcd(int a, int b)
- {
- return (a == 0) ? b : gcd(b % a, a);
- }
- static inline void
- loop_sched_1d(int n, int align, int *b, int *e)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Round up n to be a multiple of nth
- int rn = n + nth - 1 - (n - 1) % nth;
- // Nominal tile size
- int sz = rn / nth;
- // Handle alignment
- sz += align - 1 - (sz - 1) % align;
- // Assign the starting and ending index
- *b = sz * tid;
- *e = min(*b + sz, n);
- // Clamp
- if (*b >= n)
- *b = *e = 0;
- }
- static inline void
- loop_sched_2d(int nrow, int ncol, int colalign,
- int *rowb, int *rowe, int *colb, int *cole)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Distribute threads
- int nrowth = gcd(nrow, nth);
- int ncolth = nth / nrowth;
- // Row and column indices for our thread
- int rowix = tid / ncolth;
- int colix = tid % ncolth;
- // Round up ncol to be a multiple of ncolth
- int rncol = ncol + ncolth - 1 - (ncol - 1) % ncolth;
- // Nominal tile size
- int ntilerow = nrow / nrowth;
- int ntilecol = rncol / ncolth;
- // Handle column alignment
- ntilecol += colalign - 1 - (ntilecol - 1) % colalign;
- // Assign the starting and ending row to each thread
- *rowb = ntilerow * rowix;
- *rowe = *rowb + ntilerow;
- // Assign the starting and ending column to each thread
- *colb = ntilecol * colix;
- *cole = min(*colb + ntilecol, ncol);
- // Clamp
- if (*colb >= ncol)
- *colb = *cole = 0;
- }
- // CBLAS GEMM constants
- #define ROW_MAJOR 101
- #define NO_TRANS 111
- // CBLAS GEMM prototype
- typedef void (*cblas_gemm_t)(int, int, int,
- int, int, int,
- fpdtype_t, const fpdtype_t *, int,
- const fpdtype_t *, int,
- fpdtype_t, fpdtype_t *, int);
- void
- par_gemm(cblas_gemm_t gemm, int M, int N, int K,
- fpdtype_t alpha, const fpdtype_t *A, int lda,
- const fpdtype_t *B, int ldb,
- fpdtype_t beta, fpdtype_t *C, int ldc)
- {
- #pragma omp parallel
- {
- int begin, end;
- loop_sched_1d(N, PYFR_ALIGN_BYTES / sizeof(fpdtype_t), &begin, &end);
- gemm(ROW_MAJOR, NO_TRANS, NO_TRANS, M, end - begin, K,
- alpha, A, lda, B + begin, ldb, beta, C + begin, ldc);
- }
- }
- [<class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.int32'>, <class 'numpy.int32'>, <class 'numpy.float64'>, <class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.float64'>, <class 'numpy.int64'>, <class 'numpy.int32'>]
- --args
- --kwargs
- <_FuncPtr object at 0x6fffe5694f8>
- res cache key good
- <pyfr.util.memoize object at 0x6fffe85c160>
- --self
- <pyfr.solvers.euler.elements.EulerElements object at 0x6fffe567630> M0
- --args
- --kwargs
- res cache key bad
- <pyfr.backends.openmp.types.OpenMPConstMatrix object at 0x6fffe108978>
- new res cache key
- <pyfr.util.memoize object at 0x6fffe7267b8>
- --self
- <pyfr.backends.openmp.cblas.OpenMPCBLASKernels object at 0x6fffe55f9e8> par_gemm
- #include <omp.h>
- #include <stdlib.h>
- #include <tgmath.h>
- #define PYFR_ALIGN_BYTES 32
- #define PYFR_NOINLINE __attribute__ ((noinline))
- #define min(a, b) ((a) < (b) ? (a) : (b))
- #define max(a, b) ((a) > (b) ? (a) : (b))
- // Typedefs
- typedef double fpdtype_t;
- // OpenMP static loop scheduling functions
- static inline int
- gcd(int a, int b)
- {
- return (a == 0) ? b : gcd(b % a, a);
- }
- static inline void
- loop_sched_1d(int n, int align, int *b, int *e)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Round up n to be a multiple of nth
- int rn = n + nth - 1 - (n - 1) % nth;
- // Nominal tile size
- int sz = rn / nth;
- // Handle alignment
- sz += align - 1 - (sz - 1) % align;
- // Assign the starting and ending index
- *b = sz * tid;
- *e = min(*b + sz, n);
- // Clamp
- if (*b >= n)
- *b = *e = 0;
- }
- static inline void
- loop_sched_2d(int nrow, int ncol, int colalign,
- int *rowb, int *rowe, int *colb, int *cole)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Distribute threads
- int nrowth = gcd(nrow, nth);
- int ncolth = nth / nrowth;
- // Row and column indices for our thread
- int rowix = tid / ncolth;
- int colix = tid % ncolth;
- // Round up ncol to be a multiple of ncolth
- int rncol = ncol + ncolth - 1 - (ncol - 1) % ncolth;
- // Nominal tile size
- int ntilerow = nrow / nrowth;
- int ntilecol = rncol / ncolth;
- // Handle column alignment
- ntilecol += colalign - 1 - (ntilecol - 1) % colalign;
- // Assign the starting and ending row to each thread
- *rowb = ntilerow * rowix;
- *rowe = *rowb + ntilerow;
- // Assign the starting and ending column to each thread
- *colb = ntilecol * colix;
- *cole = min(*colb + ntilecol, ncol);
- // Clamp
- if (*colb >= ncol)
- *colb = *cole = 0;
- }
- // CBLAS GEMM constants
- #define ROW_MAJOR 101
- #define NO_TRANS 111
- // CBLAS GEMM prototype
- typedef void (*cblas_gemm_t)(int, int, int,
- int, int, int,
- fpdtype_t, const fpdtype_t *, int,
- const fpdtype_t *, int,
- fpdtype_t, fpdtype_t *, int);
- void
- par_gemm(cblas_gemm_t gemm, int M, int N, int K,
- fpdtype_t alpha, const fpdtype_t *A, int lda,
- const fpdtype_t *B, int ldb,
- fpdtype_t beta, fpdtype_t *C, int ldc)
- {
- #pragma omp parallel
- {
- int begin, end;
- loop_sched_1d(N, PYFR_ALIGN_BYTES / sizeof(fpdtype_t), &begin, &end);
- gemm(ROW_MAJOR, NO_TRANS, NO_TRANS, M, end - begin, K,
- alpha, A, lda, B + begin, ldb, beta, C + begin, ldc);
- }
- }
- [<class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.int32'>, <class 'numpy.int32'>, <class 'numpy.float64'>, <class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.float64'>, <class 'numpy.int64'>, <class 'numpy.int32'>]
- --args
- --kwargs
- <_FuncPtr object at 0x6fffe5694f8>
- res cache key good
- <pyfr.util.memoize object at 0x6fffe85c1d0>
- --self
- <pyfr.solvers.euler.elements.EulerElements object at 0x6fffe567630> upts
- --args
- --kwargs
- <pyfr.util.memoize object at 0x6fffe85c198>
- --self
- <pyfr.solvers.euler.elements.EulerElements object at 0x6fffe567630> upts
- --args
- --kwargs
- res cache key bad
- [[[[ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]
- [ -6.16678657e-17 -8.26510554e-16 3.91590947e-15 ...,
- 5.16569096e-14 0.00000000e+00 0.00000000e+00]]
- [[ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]
- [ -2.93107297e-16 -5.95071123e-16 1.86123133e-14 ...,
- 3.71919452e-14 0.00000000e+00 0.00000000e+00]]
- [[ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]
- [ -5.95071123e-16 -2.93107297e-16 3.77870163e-14 ...,
- 1.83192061e-14 0.00000000e+00 0.00000000e+00]]
- ...,
- [[ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]
- [ -2.93107297e-16 -5.95071123e-16 1.86123133e-14 ...,
- 3.71919452e-14 0.00000000e+00 0.00000000e+00]]
- [[ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]
- [ -5.95071123e-16 -2.93107297e-16 3.77870163e-14 ...,
- 1.83192061e-14 0.00000000e+00 0.00000000e+00]]
- [[ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]
- [ -8.26510554e-16 -6.16678657e-17 5.24834202e-14 ...,
- 3.85424160e-15 0.00000000e+00 0.00000000e+00]]]
- [[[ -6.16678657e-17 6.16678657e-17 -1.23335731e-16 ...,
- 0.00000000e+00 -8.26510554e-16 8.26510554e-16]
- [ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]]
- [[ -6.16678657e-17 6.16678657e-17 -1.23335731e-16 ...,
- 0.00000000e+00 -8.26510554e-16 8.26510554e-16]
- [ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]]
- [[ -6.16678657e-17 6.16678657e-17 -1.23335731e-16 ...,
- 0.00000000e+00 -8.26510554e-16 8.26510554e-16]
- [ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]]
- ...,
- [[ -8.26510554e-16 8.26510554e-16 -1.65302111e-15 ...,
- 0.00000000e+00 -6.16678657e-17 6.16678657e-17]
- [ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]]
- [[ -8.26510554e-16 8.26510554e-16 -1.65302111e-15 ...,
- 0.00000000e+00 -6.16678657e-17 6.16678657e-17]
- [ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]]
- [[ -8.26510554e-16 8.26510554e-16 -1.65302111e-15 ...,
- 0.00000000e+00 -6.16678657e-17 6.16678657e-17]
- [ 5.00000000e-01 5.00000000e-01 5.00000000e-01 ...,
- 5.00000000e-01 5.00000000e-01 5.00000000e-01]]]]
- new res cache key
- res cache key bad
- <pyfr.backends.openmp.types.OpenMPConstMatrix object at 0x6fffe159b70>
- new res cache key
- <pyfr.util.memoize object at 0x6fffebbf470>
- --self
- <pyfr.backends.openmp.provider.OpenMPPointwiseKernelProvider object at 0x6fffe826f60> tflux pyfr.solvers.euler.kernels.tflux {'ndims': 2, 'c': OrderedDict([('gamma', 1.4), ('S', 13.5), ('M', 0.4), ('R', 1.5)]), 'nvars': 4}
- --args
- --kwargs
- res cache key bad
- ('\n\n#include <omp.h>\n#include <stdlib.h>\n#include <tgmath.h>\n\n#define PYFR_ALIGN_BYTES 32\n#define PYFR_NOINLINE __attribute__ ((noinline))\n\n#define min(a, b) ((a) < (b) ? (a) : (b))\n#define max(a, b) ((a) > (b) ? (a) : (b))\n\n// Typedefs\ntypedef double fpdtype_t;\n\n// OpenMP static loop scheduling functions\n\nstatic inline int\ngcd(int a, int b)\n{\n return (a == 0) ? b : gcd(b % a, a);\n}\n\nstatic inline void\nloop_sched_1d(int n, int align, int *b, int *e)\n{\n int tid = omp_get_thread_num();\n int nth = omp_get_num_threads();\n\n // Round up n to be a multiple of nth\n int rn = n + nth - 1 - (n - 1) % nth;\n\n // Nominal tile size\n int sz = rn / nth;\n\n // Handle alignment\n sz += align - 1 - (sz - 1) % align;\n\n // Assign the starting and ending index\n *b = sz * tid;\n *e = min(*b + sz, n);\n\n // Clamp\n if (*b >= n)\n *b = *e = 0;\n}\n\nstatic inline void\nloop_sched_2d(int nrow, int ncol, int colalign,\n int *rowb, int *rowe, int *colb, int *cole)\n{\n int tid = omp_get_thread_num();\n int nth = omp_get_num_threads();\n\n // Distribute threads\n int nrowth = gcd(nrow, nth);\n int ncolth = nth / nrowth;\n\n // Row and column indices for our thread\n int rowix = tid / ncolth;\n int colix = tid % ncolth;\n\n // Round up ncol to be a multiple of ncolth\n int rncol = ncol + ncolth - 1 - (ncol - 1) % ncolth;\n\n // Nominal tile size\n int ntilerow = nrow / nrowth;\n int ntilecol = rncol / ncolth;\n\n // Handle column alignment\n ntilecol += colalign - 1 - (ntilecol - 1) % colalign;\n\n // Assign the starting and ending row to each thread\n *rowb = ntilerow * rowix;\n *rowe = *rowb + ntilerow;\n\n // Assign the starting and ending column to each thread\n *colb = ntilecol * colix;\n *cole = min(*colb + ntilecol, ncol);\n\n // Clamp\n if (*colb >= ncol)\n *colb = *cole = 0;\n}\n\n\n\n\n\n\n\n\n\nstatic PYFR_NOINLINE void tflux_inner(int _nx, fpdtype_t *__restrict__ f_v0v0, fpdtype_t *__restrict__ f_v0v1, fpdtype_t *__restrict__ f_v0v2, fpdtype_t *__restrict__ f_v0v3, fpdtype_t *__restrict__ f_v1v0, fpdtype_t *__restrict__ f_v1v1, fpdtype_t *__restrict__ f_v1v2, fpdtype_t *__restrict__ f_v1v3, const fpdtype_t *__restrict__ smats_v0v0, const fpdtype_t *__restrict__ smats_v0v1, const fpdtype_t *__restrict__ smats_v1v0, const fpdtype_t *__restrict__ smats_v1v1, const fpdtype_t *__restrict__ u_v0, const fpdtype_t *__restrict__ u_v1, const fpdtype_t *__restrict__ u_v2, const fpdtype_t *__restrict__ u_v3)\n {\n for (int _x = 0; _x < _nx; _x++)\n {\n \n // Compute the flux\n fpdtype_t ftemp[2][4];\n fpdtype_t p, v[2];\n {\n\n fpdtype_t invrho_ = 1.0/u_v0[_x], E_ = u_v3[_x];\n\n // Compute the velocities\n fpdtype_t rhov_[2];\n rhov_[0] = u_v1[_x];\n v[0] = invrho_*rhov_[0];\n rhov_[1] = u_v2[_x];\n v[1] = invrho_*rhov_[1];\n\n // Compute the pressure\n p = 0.3999999999999999*(E_ - 0.5*invrho_*((rhov_[0])*(rhov_[0]) + (rhov_[1])*(rhov_[1])));\n\n // Density and energy fluxes\n ftemp[0][0] = rhov_[0];\n ftemp[0][3] = (E_ + p)*v[0];\n ftemp[1][0] = rhov_[1];\n ftemp[1][3] = (E_ + p)*v[1];\n\n // Momentum fluxes\n ftemp[0][1] = rhov_[0]*v[0] + p;\n ftemp[0][2] = rhov_[0]*v[1];\n ftemp[1][1] = rhov_[1]*v[0];\n ftemp[1][2] = rhov_[1]*v[1] + p;\n\n};\n\n // Transform the fluxes\n f_v0v0[_x] = smats_v0v0[_x]*ftemp[0][0] + smats_v0v1[_x]*ftemp[1][0];\n f_v0v1[_x] = smats_v0v0[_x]*ftemp[0][1] + smats_v0v1[_x]*ftemp[1][1];\n f_v0v2[_x] = smats_v0v0[_x]*ftemp[0][2] + smats_v0v1[_x]*ftemp[1][2];\n f_v0v3[_x] = smats_v0v0[_x]*ftemp[0][3] + smats_v0v1[_x]*ftemp[1][3];\n f_v1v0[_x] = smats_v1v0[_x]*ftemp[0][0] + smats_v1v1[_x]*ftemp[1][0];\n f_v1v1[_x] = smats_v1v0[_x]*ftemp[0][1] + smats_v1v1[_x]*ftemp[1][1];\n f_v1v2[_x] = smats_v1v0[_x]*ftemp[0][2] + smats_v1v1[_x]*ftemp[1][2];\n f_v1v3[_x] = smats_v1v0[_x]*ftemp[0][3] + smats_v1v1[_x]*ftemp[1][3];\n\n }\n }\n void tflux(int _ny, int _nx, fpdtype_t* __restrict__ f_v, int lsdf, const fpdtype_t* __restrict__ smats_v, int lsdsmats, const fpdtype_t* __restrict__ u_v, int lsdu)\n {\n #pragma omp parallel\n {\n int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);\n int rb, re, cb, ce;\n loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);\n for (int _y = rb; _y < re; _y++)\n {\n tflux_inner(ce - cb, f_v + ((0*_ny + _y)*4 + 0)*lsdf + cb, f_v + ((0*_ny + _y)*4 + 1)*lsdf + cb, f_v + ((0*_ny + _y)*4 + 2)*lsdf + cb, f_v + ((0*_ny + _y)*4 + 3)*lsdf + cb, f_v + ((1*_ny + _y)*4 + 0)*lsdf + cb, f_v + ((1*_ny + _y)*4 + 1)*lsdf + cb, f_v + ((1*_ny + _y)*4 + 2)*lsdf + cb, f_v + ((1*_ny + _y)*4 + 3)*lsdf + cb, smats_v + ((0*_ny + _y)*2 + 0)*lsdsmats + cb, smats_v + ((0*_ny + _y)*2 + 1)*lsdsmats + cb, smats_v + ((1*_ny + _y)*2 + 0)*lsdsmats + cb, smats_v + ((1*_ny + _y)*2 + 1)*lsdsmats + cb, u_v + (_y*4 + 0)*lsdu + cb, u_v + (_y*4 + 1)*lsdu + cb, u_v + (_y*4 + 2)*lsdu + cb, u_v + (_y*4 + 3)*lsdu + cb);\n }\n }\n }\n\n', 2, ['_ny', '_nx', 'f', 'smats', 'u'], [[<class 'numpy.int32'>], [<class 'numpy.int32'>], [<class 'numpy.int64'>, <class 'numpy.int32'>], [<class 'numpy.int64'>, <class 'numpy.int32'>], [<class 'numpy.int64'>, <class 'numpy.int32'>]])
- new res cache key
- <pyfr.util.memoize object at 0x6fffe7267b8>
- --self
- <pyfr.backends.openmp.provider.OpenMPPointwiseKernelProvider object at 0x6fffe826f60> tflux
- #include <omp.h>
- #include <stdlib.h>
- #include <tgmath.h>
- #define PYFR_ALIGN_BYTES 32
- #define PYFR_NOINLINE __attribute__ ((noinline))
- #define min(a, b) ((a) < (b) ? (a) : (b))
- #define max(a, b) ((a) > (b) ? (a) : (b))
- // Typedefs
- typedef double fpdtype_t;
- // OpenMP static loop scheduling functions
- static inline int
- gcd(int a, int b)
- {
- return (a == 0) ? b : gcd(b % a, a);
- }
- static inline void
- loop_sched_1d(int n, int align, int *b, int *e)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Round up n to be a multiple of nth
- int rn = n + nth - 1 - (n - 1) % nth;
- // Nominal tile size
- int sz = rn / nth;
- // Handle alignment
- sz += align - 1 - (sz - 1) % align;
- // Assign the starting and ending index
- *b = sz * tid;
- *e = min(*b + sz, n);
- // Clamp
- if (*b >= n)
- *b = *e = 0;
- }
- static inline void
- loop_sched_2d(int nrow, int ncol, int colalign,
- int *rowb, int *rowe, int *colb, int *cole)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Distribute threads
- int nrowth = gcd(nrow, nth);
- int ncolth = nth / nrowth;
- // Row and column indices for our thread
- int rowix = tid / ncolth;
- int colix = tid % ncolth;
- // Round up ncol to be a multiple of ncolth
- int rncol = ncol + ncolth - 1 - (ncol - 1) % ncolth;
- // Nominal tile size
- int ntilerow = nrow / nrowth;
- int ntilecol = rncol / ncolth;
- // Handle column alignment
- ntilecol += colalign - 1 - (ntilecol - 1) % colalign;
- // Assign the starting and ending row to each thread
- *rowb = ntilerow * rowix;
- *rowe = *rowb + ntilerow;
- // Assign the starting and ending column to each thread
- *colb = ntilecol * colix;
- *cole = min(*colb + ntilecol, ncol);
- // Clamp
- if (*colb >= ncol)
- *colb = *cole = 0;
- }
- static PYFR_NOINLINE void tflux_inner(int _nx, fpdtype_t *__restrict__ f_v0v0, fpdtype_t *__restrict__ f_v0v1, fpdtype_t *__restrict__ f_v0v2, fpdtype_t *__restrict__ f_v0v3, fpdtype_t *__restrict__ f_v1v0, fpdtype_t *__restrict__ f_v1v1, fpdtype_t *__restrict__ f_v1v2, fpdtype_t *__restrict__ f_v1v3, const fpdtype_t *__restrict__ smats_v0v0, const fpdtype_t *__restrict__ smats_v0v1, const fpdtype_t *__restrict__ smats_v1v0, const fpdtype_t *__restrict__ smats_v1v1, const fpdtype_t *__restrict__ u_v0, const fpdtype_t *__restrict__ u_v1, const fpdtype_t *__restrict__ u_v2, const fpdtype_t *__restrict__ u_v3)
- {
- for (int _x = 0; _x < _nx; _x++)
- {
- // Compute the flux
- fpdtype_t ftemp[2][4];
- fpdtype_t p, v[2];
- {
- fpdtype_t invrho_ = 1.0/u_v0[_x], E_ = u_v3[_x];
- // Compute the velocities
- fpdtype_t rhov_[2];
- rhov_[0] = u_v1[_x];
- v[0] = invrho_*rhov_[0];
- rhov_[1] = u_v2[_x];
- v[1] = invrho_*rhov_[1];
- // Compute the pressure
- p = 0.3999999999999999*(E_ - 0.5*invrho_*((rhov_[0])*(rhov_[0]) + (rhov_[1])*(rhov_[1])));
- // Density and energy fluxes
- ftemp[0][0] = rhov_[0];
- ftemp[0][3] = (E_ + p)*v[0];
- ftemp[1][0] = rhov_[1];
- ftemp[1][3] = (E_ + p)*v[1];
- // Momentum fluxes
- ftemp[0][1] = rhov_[0]*v[0] + p;
- ftemp[0][2] = rhov_[0]*v[1];
- ftemp[1][1] = rhov_[1]*v[0];
- ftemp[1][2] = rhov_[1]*v[1] + p;
- };
- // Transform the fluxes
- f_v0v0[_x] = smats_v0v0[_x]*ftemp[0][0] + smats_v0v1[_x]*ftemp[1][0];
- f_v0v1[_x] = smats_v0v0[_x]*ftemp[0][1] + smats_v0v1[_x]*ftemp[1][1];
- f_v0v2[_x] = smats_v0v0[_x]*ftemp[0][2] + smats_v0v1[_x]*ftemp[1][2];
- f_v0v3[_x] = smats_v0v0[_x]*ftemp[0][3] + smats_v0v1[_x]*ftemp[1][3];
- f_v1v0[_x] = smats_v1v0[_x]*ftemp[0][0] + smats_v1v1[_x]*ftemp[1][0];
- f_v1v1[_x] = smats_v1v0[_x]*ftemp[0][1] + smats_v1v1[_x]*ftemp[1][1];
- f_v1v2[_x] = smats_v1v0[_x]*ftemp[0][2] + smats_v1v1[_x]*ftemp[1][2];
- f_v1v3[_x] = smats_v1v0[_x]*ftemp[0][3] + smats_v1v1[_x]*ftemp[1][3];
- }
- }
- void tflux(int _ny, int _nx, fpdtype_t* __restrict__ f_v, int lsdf, const fpdtype_t* __restrict__ smats_v, int lsdsmats, const fpdtype_t* __restrict__ u_v, int lsdu)
- {
- #pragma omp parallel
- {
- int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
- int rb, re, cb, ce;
- loop_sched_2d(_ny, _nx, align, &rb, &re, &cb, &ce);
- for (int _y = rb; _y < re; _y++)
- {
- tflux_inner(ce - cb, f_v + ((0*_ny + _y)*4 + 0)*lsdf + cb, f_v + ((0*_ny + _y)*4 + 1)*lsdf + cb, f_v + ((0*_ny + _y)*4 + 2)*lsdf + cb, f_v + ((0*_ny + _y)*4 + 3)*lsdf + cb, f_v + ((1*_ny + _y)*4 + 0)*lsdf + cb, f_v + ((1*_ny + _y)*4 + 1)*lsdf + cb, f_v + ((1*_ny + _y)*4 + 2)*lsdf + cb, f_v + ((1*_ny + _y)*4 + 3)*lsdf + cb, smats_v + ((0*_ny + _y)*2 + 0)*lsdsmats + cb, smats_v + ((0*_ny + _y)*2 + 1)*lsdsmats + cb, smats_v + ((1*_ny + _y)*2 + 0)*lsdsmats + cb, smats_v + ((1*_ny + _y)*2 + 1)*lsdsmats + cb, u_v + (_y*4 + 0)*lsdu + cb, u_v + (_y*4 + 1)*lsdu + cb, u_v + (_y*4 + 2)*lsdu + cb, u_v + (_y*4 + 3)*lsdu + cb);
- }
- }
- }
- [<class 'numpy.int32'>, <class 'numpy.int32'>, <class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.int64'>, <class 'numpy.int32'>]
- --args
- --kwargs
- res cache key bad
- <_FuncPtr object at 0x6fffe569688>
- new res cache key
- <pyfr.util.memoize object at 0x6fffebbf470>
- --self
- <pyfr.backends.openmp.provider.OpenMPPointwiseKernelProvider object at 0x6fffe826f60> intcflux pyfr.solvers.euler.kernels.intcflux {'ndims': 2, 'rsolver': 'rusanov', 'c': OrderedDict([('gamma', 1.4), ('S', 13.5), ('M', 0.4), ('R', 1.5)]), 'nvars': 4}
- --args
- --kwargs
- res cache key bad
- ('\n\n#include <omp.h>\n#include <stdlib.h>\n#include <tgmath.h>\n\n#define PYFR_ALIGN_BYTES 32\n#define PYFR_NOINLINE __attribute__ ((noinline))\n\n#define min(a, b) ((a) < (b) ? (a) : (b))\n#define max(a, b) ((a) > (b) ? (a) : (b))\n\n// Typedefs\ntypedef double fpdtype_t;\n\n// OpenMP static loop scheduling functions\n\nstatic inline int\ngcd(int a, int b)\n{\n return (a == 0) ? b : gcd(b % a, a);\n}\n\nstatic inline void\nloop_sched_1d(int n, int align, int *b, int *e)\n{\n int tid = omp_get_thread_num();\n int nth = omp_get_num_threads();\n\n // Round up n to be a multiple of nth\n int rn = n + nth - 1 - (n - 1) % nth;\n\n // Nominal tile size\n int sz = rn / nth;\n\n // Handle alignment\n sz += align - 1 - (sz - 1) % align;\n\n // Assign the starting and ending index\n *b = sz * tid;\n *e = min(*b + sz, n);\n\n // Clamp\n if (*b >= n)\n *b = *e = 0;\n}\n\nstatic inline void\nloop_sched_2d(int nrow, int ncol, int colalign,\n int *rowb, int *rowe, int *colb, int *cole)\n{\n int tid = omp_get_thread_num();\n int nth = omp_get_num_threads();\n\n // Distribute threads\n int nrowth = gcd(nrow, nth);\n int ncolth = nth / nrowth;\n\n // Row and column indices for our thread\n int rowix = tid / ncolth;\n int colix = tid % ncolth;\n\n // Round up ncol to be a multiple of ncolth\n int rncol = ncol + ncolth - 1 - (ncol - 1) % ncolth;\n\n // Nominal tile size\n int ntilerow = nrow / nrowth;\n int ntilecol = rncol / ncolth;\n\n // Handle column alignment\n ntilecol += colalign - 1 - (ntilecol - 1) % colalign;\n\n // Assign the starting and ending row to each thread\n *rowb = ntilerow * rowix;\n *rowe = *rowb + ntilerow;\n\n // Assign the starting and ending column to each thread\n *colb = ntilecol * colix;\n *cole = min(*colb + ntilecol, ncol);\n\n // Clamp\n if (*colb >= ncol)\n *colb = *cole = 0;\n}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n void intcflux(int _nx, const fpdtype_t* __restrict__ magnl_v, const fpdtype_t* __restrict__ nl_v, int lsdnl, fpdtype_t* __restrict__ ul_v, const int* __restrict__ ul_vix, const int* __restrict__ ul_vcstri, fpdtype_t* __restrict__ ur_v, const int* __restrict__ ur_vix, const int* __restrict__ ur_vcstri)\n {\n #pragma omp parallel\n {\n int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);\n int cb, ce;\n loop_sched_1d(_nx, align, &cb, &ce);\n for (int _x = cb; _x < ce; _x++)\n {\n \n // Perform the Riemann solve\n fpdtype_t fn[4];\n {\n\n // Compute the left and right fluxes + velocities and pressures\n fpdtype_t fl_[2][4], fr_[2][4];\n fpdtype_t vl_[2], vr_[2];\n fpdtype_t pl_, pr_;\n\n {\n\n fpdtype_t invrho__ = 1.0/ul_v[ul_vix[_x] + ul_vcstri[_x]*0], E__ = ul_v[ul_vix[_x] + ul_vcstri[_x]*3];\n\n // Compute the velocities\n fpdtype_t rhov__[2];\n rhov__[0] = ul_v[ul_vix[_x] + ul_vcstri[_x]*1];\n vl_[0] = invrho__*rhov__[0];\n rhov__[1] = ul_v[ul_vix[_x] + ul_vcstri[_x]*2];\n vl_[1] = invrho__*rhov__[1];\n\n // Compute the pressure\n pl_ = 0.3999999999999999*(E__ - 0.5*invrho__*((rhov__[0])*(rhov__[0]) + (rhov__[1])*(rhov__[1])));\n\n // Density and energy fluxes\n fl_[0][0] = rhov__[0];\n fl_[0][3] = (E__ + pl_)*vl_[0];\n fl_[1][0] = rhov__[1];\n fl_[1][3] = (E__ + pl_)*vl_[1];\n\n // Momentum fluxes\n fl_[0][1] = rhov__[0]*vl_[0] + pl_;\n fl_[0][2] = rhov__[0]*vl_[1];\n fl_[1][1] = rhov__[1]*vl_[0];\n fl_[1][2] = rhov__[1]*vl_[1] + pl_;\n\n};\n {\n\n fpdtype_t invrho__ = 1.0/ur_v[ur_vix[_x] + ur_vcstri[_x]*0], E__ = ur_v[ur_vix[_x] + ur_vcstri[_x]*3];\n\n // Compute the velocities\n fpdtype_t rhov__[2];\n rhov__[0] = ur_v[ur_vix[_x] + ur_vcstri[_x]*1];\n vr_[0] = invrho__*rhov__[0];\n rhov__[1] = ur_v[ur_vix[_x] + ur_vcstri[_x]*2];\n vr_[1] = invrho__*rhov__[1];\n\n // Compute the pressure\n pr_ = 0.3999999999999999*(E__ - 0.5*invrho__*((rhov__[0])*(rhov__[0]) + (rhov__[1])*(rhov__[1])));\n\n // Density and energy fluxes\n fr_[0][0] = rhov__[0];\n fr_[0][3] = (E__ + pr_)*vr_[0];\n fr_[1][0] = rhov__[1];\n fr_[1][3] = (E__ + pr_)*vr_[1];\n\n // Momentum fluxes\n fr_[0][1] = rhov__[0]*vr_[0] + pr_;\n fr_[0][2] = rhov__[0]*vr_[1];\n fr_[1][1] = rhov__[1]*vr_[0];\n fr_[1][2] = rhov__[1]*vr_[1] + pr_;\n\n};\n\n // Sum the left and right velocities and take the normal\n fpdtype_t nv_ = ((nl_v[lsdnl*0 + _x])*(vl_[0] + vr_[0]) + (nl_v[lsdnl*1 + _x])*(vl_[1] + vr_[1]));\n\n // Estimate the maximum wave speed / 2\n fpdtype_t a_ = sqrt(0.35*(pl_ + pr_)/(ul_v[ul_vix[_x] + ul_vcstri[_x]*0] + ur_v[ur_vix[_x] + ur_vcstri[_x]*0]))\n + 0.25*fabs(nv_);\n\n // Output\n fn[0] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][0] + fr_[0][0]) + nl_v[lsdnl*1 + _x]*(fl_[1][0] + fr_[1][0]))\n + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*0] - ur_v[ur_vix[_x] + ur_vcstri[_x]*0]);\n fn[1] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][1] + fr_[0][1]) + nl_v[lsdnl*1 + _x]*(fl_[1][1] + fr_[1][1]))\n + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*1] - ur_v[ur_vix[_x] + ur_vcstri[_x]*1]);\n fn[2] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][2] + fr_[0][2]) + nl_v[lsdnl*1 + _x]*(fl_[1][2] + fr_[1][2]))\n + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*2] - ur_v[ur_vix[_x] + ur_vcstri[_x]*2]);\n fn[3] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][3] + fr_[0][3]) + nl_v[lsdnl*1 + _x]*(fl_[1][3] + fr_[1][3]))\n + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*3] - ur_v[ur_vix[_x] + ur_vcstri[_x]*3]);\n\n};\n\n // Scale and write out the common normal fluxes\n ul_v[ul_vix[_x] + ul_vcstri[_x]*0] = magnl_v[_x]*fn[0];\n ur_v[ur_vix[_x] + ur_vcstri[_x]*0] = -magnl_v[_x]*fn[0];\n ul_v[ul_vix[_x] + ul_vcstri[_x]*1] = magnl_v[_x]*fn[1];\n ur_v[ur_vix[_x] + ur_vcstri[_x]*1] = -magnl_v[_x]*fn[1];\n ul_v[ul_vix[_x] + ul_vcstri[_x]*2] = magnl_v[_x]*fn[2];\n ur_v[ur_vix[_x] + ur_vcstri[_x]*2] = -magnl_v[_x]*fn[2];\n ul_v[ul_vix[_x] + ul_vcstri[_x]*3] = magnl_v[_x]*fn[3];\n ur_v[ur_vix[_x] + ur_vcstri[_x]*3] = -magnl_v[_x]*fn[3];\n\n }\n }\n }\n\n', 1, ['_nx', 'magnl', 'nl', 'ul', 'ur'], [[<class 'numpy.int32'>], [<class 'numpy.int64'>], [<class 'numpy.int64'>, <class 'numpy.int32'>], [<class 'numpy.int64'>, <class 'numpy.int64'>, <class 'numpy.int64'>], [<class 'numpy.int64'>, <class 'numpy.int64'>, <class 'numpy.int64'>]])
- new res cache key
- <pyfr.util.memoize object at 0x6fffe7267b8>
- --self
- <pyfr.backends.openmp.provider.OpenMPPointwiseKernelProvider object at 0x6fffe826f60> intcflux
- #include <omp.h>
- #include <stdlib.h>
- #include <tgmath.h>
- #define PYFR_ALIGN_BYTES 32
- #define PYFR_NOINLINE __attribute__ ((noinline))
- #define min(a, b) ((a) < (b) ? (a) : (b))
- #define max(a, b) ((a) > (b) ? (a) : (b))
- // Typedefs
- typedef double fpdtype_t;
- // OpenMP static loop scheduling functions
- static inline int
- gcd(int a, int b)
- {
- return (a == 0) ? b : gcd(b % a, a);
- }
- static inline void
- loop_sched_1d(int n, int align, int *b, int *e)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Round up n to be a multiple of nth
- int rn = n + nth - 1 - (n - 1) % nth;
- // Nominal tile size
- int sz = rn / nth;
- // Handle alignment
- sz += align - 1 - (sz - 1) % align;
- // Assign the starting and ending index
- *b = sz * tid;
- *e = min(*b + sz, n);
- // Clamp
- if (*b >= n)
- *b = *e = 0;
- }
- static inline void
- loop_sched_2d(int nrow, int ncol, int colalign,
- int *rowb, int *rowe, int *colb, int *cole)
- {
- int tid = omp_get_thread_num();
- int nth = omp_get_num_threads();
- // Distribute threads
- int nrowth = gcd(nrow, nth);
- int ncolth = nth / nrowth;
- // Row and column indices for our thread
- int rowix = tid / ncolth;
- int colix = tid % ncolth;
- // Round up ncol to be a multiple of ncolth
- int rncol = ncol + ncolth - 1 - (ncol - 1) % ncolth;
- // Nominal tile size
- int ntilerow = nrow / nrowth;
- int ntilecol = rncol / ncolth;
- // Handle column alignment
- ntilecol += colalign - 1 - (ntilecol - 1) % colalign;
- // Assign the starting and ending row to each thread
- *rowb = ntilerow * rowix;
- *rowe = *rowb + ntilerow;
- // Assign the starting and ending column to each thread
- *colb = ntilecol * colix;
- *cole = min(*colb + ntilecol, ncol);
- // Clamp
- if (*colb >= ncol)
- *colb = *cole = 0;
- }
- void intcflux(int _nx, const fpdtype_t* __restrict__ magnl_v, const fpdtype_t* __restrict__ nl_v, int lsdnl, fpdtype_t* __restrict__ ul_v, const int* __restrict__ ul_vix, const int* __restrict__ ul_vcstri, fpdtype_t* __restrict__ ur_v, const int* __restrict__ ur_vix, const int* __restrict__ ur_vcstri)
- {
- #pragma omp parallel
- {
- int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);
- int cb, ce;
- loop_sched_1d(_nx, align, &cb, &ce);
- for (int _x = cb; _x < ce; _x++)
- {
- // Perform the Riemann solve
- fpdtype_t fn[4];
- {
- // Compute the left and right fluxes + velocities and pressures
- fpdtype_t fl_[2][4], fr_[2][4];
- fpdtype_t vl_[2], vr_[2];
- fpdtype_t pl_, pr_;
- {
- fpdtype_t invrho__ = 1.0/ul_v[ul_vix[_x] + ul_vcstri[_x]*0], E__ = ul_v[ul_vix[_x] + ul_vcstri[_x]*3];
- // Compute the velocities
- fpdtype_t rhov__[2];
- rhov__[0] = ul_v[ul_vix[_x] + ul_vcstri[_x]*1];
- vl_[0] = invrho__*rhov__[0];
- rhov__[1] = ul_v[ul_vix[_x] + ul_vcstri[_x]*2];
- vl_[1] = invrho__*rhov__[1];
- // Compute the pressure
- pl_ = 0.3999999999999999*(E__ - 0.5*invrho__*((rhov__[0])*(rhov__[0]) + (rhov__[1])*(rhov__[1])));
- // Density and energy fluxes
- fl_[0][0] = rhov__[0];
- fl_[0][3] = (E__ + pl_)*vl_[0];
- fl_[1][0] = rhov__[1];
- fl_[1][3] = (E__ + pl_)*vl_[1];
- // Momentum fluxes
- fl_[0][1] = rhov__[0]*vl_[0] + pl_;
- fl_[0][2] = rhov__[0]*vl_[1];
- fl_[1][1] = rhov__[1]*vl_[0];
- fl_[1][2] = rhov__[1]*vl_[1] + pl_;
- };
- {
- fpdtype_t invrho__ = 1.0/ur_v[ur_vix[_x] + ur_vcstri[_x]*0], E__ = ur_v[ur_vix[_x] + ur_vcstri[_x]*3];
- // Compute the velocities
- fpdtype_t rhov__[2];
- rhov__[0] = ur_v[ur_vix[_x] + ur_vcstri[_x]*1];
- vr_[0] = invrho__*rhov__[0];
- rhov__[1] = ur_v[ur_vix[_x] + ur_vcstri[_x]*2];
- vr_[1] = invrho__*rhov__[1];
- // Compute the pressure
- pr_ = 0.3999999999999999*(E__ - 0.5*invrho__*((rhov__[0])*(rhov__[0]) + (rhov__[1])*(rhov__[1])));
- // Density and energy fluxes
- fr_[0][0] = rhov__[0];
- fr_[0][3] = (E__ + pr_)*vr_[0];
- fr_[1][0] = rhov__[1];
- fr_[1][3] = (E__ + pr_)*vr_[1];
- // Momentum fluxes
- fr_[0][1] = rhov__[0]*vr_[0] + pr_;
- fr_[0][2] = rhov__[0]*vr_[1];
- fr_[1][1] = rhov__[1]*vr_[0];
- fr_[1][2] = rhov__[1]*vr_[1] + pr_;
- };
- // Sum the left and right velocities and take the normal
- fpdtype_t nv_ = ((nl_v[lsdnl*0 + _x])*(vl_[0] + vr_[0]) + (nl_v[lsdnl*1 + _x])*(vl_[1] + vr_[1]));
- // Estimate the maximum wave speed / 2
- fpdtype_t a_ = sqrt(0.35*(pl_ + pr_)/(ul_v[ul_vix[_x] + ul_vcstri[_x]*0] + ur_v[ur_vix[_x] + ur_vcstri[_x]*0]))
- + 0.25*fabs(nv_);
- // Output
- fn[0] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][0] + fr_[0][0]) + nl_v[lsdnl*1 + _x]*(fl_[1][0] + fr_[1][0]))
- + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*0] - ur_v[ur_vix[_x] + ur_vcstri[_x]*0]);
- fn[1] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][1] + fr_[0][1]) + nl_v[lsdnl*1 + _x]*(fl_[1][1] + fr_[1][1]))
- + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*1] - ur_v[ur_vix[_x] + ur_vcstri[_x]*1]);
- fn[2] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][2] + fr_[0][2]) + nl_v[lsdnl*1 + _x]*(fl_[1][2] + fr_[1][2]))
- + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*2] - ur_v[ur_vix[_x] + ur_vcstri[_x]*2]);
- fn[3] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][3] + fr_[0][3]) + nl_v[lsdnl*1 + _x]*(fl_[1][3] + fr_[1][3]))
- + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*3] - ur_v[ur_vix[_x] + ur_vcstri[_x]*3]);
- };
- // Scale and write out the common normal fluxes
- ul_v[ul_vix[_x] + ul_vcstri[_x]*0] = magnl_v[_x]*fn[0];
- ur_v[ur_vix[_x] + ur_vcstri[_x]*0] = -magnl_v[_x]*fn[0];
- ul_v[ul_vix[_x] + ul_vcstri[_x]*1] = magnl_v[_x]*fn[1];
- ur_v[ur_vix[_x] + ur_vcstri[_x]*1] = -magnl_v[_x]*fn[1];
- ul_v[ul_vix[_x] + ul_vcstri[_x]*2] = magnl_v[_x]*fn[2];
- ur_v[ur_vix[_x] + ur_vcstri[_x]*2] = -magnl_v[_x]*fn[2];
- ul_v[ul_vix[_x] + ul_vcstri[_x]*3] = magnl_v[_x]*fn[3];
- ur_v[ur_vix[_x] + ur_vcstri[_x]*3] = -magnl_v[_x]*fn[3];
- }
- }
- }
- [<class 'numpy.int32'>, <class 'numpy.int64'>, <class 'numpy.int64'>, <class 'numpy.int32'>, <class 'numpy.int64'>, <class 'numpy.int64'>, <class 'numpy.int64'>, <class 'numpy.int64'>, <class 'numpy.int64'>, <class 'numpy.int64'>]
- --args
- --kwargs
- 0 [main] python3 4796 child_info_fork::abort: unable to remap libtmp.so to same address as parent (0x1F0000) - try running rebaseall
- Traceback (most recent call last):
- File "/home/User/PyFR/env4/src/pyfr/pyfr/util.py", line 39, in __call__
- res = cache[key]
- KeyError: (<function OpenMPKernelProvider._build_kernel at 0x6fffe54a6a8>, b'\x80\x03X\x08\x00\x00\x00intcfluxq\x00XZ\x18\x00\x00\n\n#include <omp.h>\n#include <stdlib.h>\n#include <tgmath.h>\n\n#define PYFR_ALIGN_BYTES 32\n#define PYFR_NOINLINE __attribute__ ((noinline))\n\n#define min(a, b) ((a) < (b) ? (a) : (b))\n#define max(a, b) ((a) > (b) ? (a) : (b))\n\n// Typedefs\ntypedef double fpdtype_t;\n\n// OpenMP static loop scheduling functions\n\nstatic inline int\ngcd(int a, int b)\n{\n return (a == 0) ? b : gcd(b % a, a);\n}\n\nstatic inline void\nloop_sched_1d(int n, int align, int *b, int *e)\n{\n int tid = omp_get_thread_num();\n int nth = omp_get_num_threads();\n\n // Round up n to be a multiple of nth\n int rn = n + nth - 1 - (n - 1) % nth;\n\n // Nominal tile size\n int sz = rn / nth;\n\n // Handle alignment\n sz += align - 1 - (sz - 1) % align;\n\n // Assign the starting and ending index\n *b = sz * tid;\n *e = min(*b + sz, n);\n\n // Clamp\n if (*b >= n)\n *b = *e = 0;\n}\n\nstatic inline void\nloop_sched_2d(int nrow, int ncol, int colalign,\n int *rowb, int *rowe, int *colb, int *cole)\n{\n int tid = omp_get_thread_num();\n int nth = omp_get_num_threads();\n\n // Distribute threads\n int nrowth = gcd(nrow, nth);\n int ncolth = nth / nrowth;\n\n // Row and column indices for our thread\n int rowix = tid / ncolth;\n int colix = tid % ncolth;\n\n // Round up ncol to be a multiple of ncolth\n int rncol = ncol + ncolth - 1 - (ncol - 1) % ncolth;\n\n // Nominal tile size\n int ntilerow = nrow / nrowth;\n int ntilecol = rncol / ncolth;\n\n // Handle column alignment\n ntilecol += colalign - 1 - (ntilecol - 1) % colalign;\n\n // Assign the starting and ending row to each thread\n *rowb = ntilerow * rowix;\n *rowe = *rowb + ntilerow;\n\n // Assign the starting and ending column to each thread\n *colb = ntilecol * colix;\n *cole = min(*colb + ntilecol, ncol);\n\n // Clamp\n if (*colb >= ncol)\n *colb = *cole = 0;\n}\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n void intcflux(int _nx, const fpdtype_t* __restrict__ magnl_v, const fpdtype_t* __restrict__ nl_v, int lsdnl, fpdtype_t* __restrict__ ul_v, const int* __restrict__ ul_vix, const int* __restrict__ ul_vcstri, fpdtype_t* __restrict__ ur_v, const int* __restrict__ ur_vix, const int* __restrict__ ur_vcstri)\n {\n #pragma omp parallel\n {\n int align = PYFR_ALIGN_BYTES / sizeof(fpdtype_t);\n int cb, ce;\n loop_sched_1d(_nx, align, &cb, &ce);\n for (int _x = cb; _x < ce; _x++)\n {\n \n // Perform the Riemann solve\n fpdtype_t fn[4];\n {\n\n // Compute the left and right fluxes + velocities and pressures\n fpdtype_t fl_[2][4], fr_[2][4];\n fpdtype_t vl_[2], vr_[2];\n fpdtype_t pl_, pr_;\n\n {\n\n fpdtype_t invrho__ = 1.0/ul_v[ul_vix[_x] + ul_vcstri[_x]*0], E__ = ul_v[ul_vix[_x] + ul_vcstri[_x]*3];\n\n // Compute the velocities\n fpdtype_t rhov__[2];\n rhov__[0] = ul_v[ul_vix[_x] + ul_vcstri[_x]*1];\n vl_[0] = invrho__*rhov__[0];\n rhov__[1] = ul_v[ul_vix[_x] + ul_vcstri[_x]*2];\n vl_[1] = invrho__*rhov__[1];\n\n // Compute the pressure\n pl_ = 0.3999999999999999*(E__ - 0.5*invrho__*((rhov__[0])*(rhov__[0]) + (rhov__[1])*(rhov__[1])));\n\n // Density and energy fluxes\n fl_[0][0] = rhov__[0];\n fl_[0][3] = (E__ + pl_)*vl_[0];\n fl_[1][0] = rhov__[1];\n fl_[1][3] = (E__ + pl_)*vl_[1];\n\n // Momentum fluxes\n fl_[0][1] = rhov__[0]*vl_[0] + pl_;\n fl_[0][2] = rhov__[0]*vl_[1];\n fl_[1][1] = rhov__[1]*vl_[0];\n fl_[1][2] = rhov__[1]*vl_[1] + pl_;\n\n};\n {\n\n fpdtype_t invrho__ = 1.0/ur_v[ur_vix[_x] + ur_vcstri[_x]*0], E__ = ur_v[ur_vix[_x] + ur_vcstri[_x]*3];\n\n // Compute the velocities\n fpdtype_t rhov__[2];\n rhov__[0] = ur_v[ur_vix[_x] + ur_vcstri[_x]*1];\n vr_[0] = invrho__*rhov__[0];\n rhov__[1] = ur_v[ur_vix[_x] + ur_vcstri[_x]*2];\n vr_[1] = invrho__*rhov__[1];\n\n // Compute the pressure\n pr_ = 0.3999999999999999*(E__ - 0.5*invrho__*((rhov__[0])*(rhov__[0]) + (rhov__[1])*(rhov__[1])));\n\n // Density and energy fluxes\n fr_[0][0] = rhov__[0];\n fr_[0][3] = (E__ + pr_)*vr_[0];\n fr_[1][0] = rhov__[1];\n fr_[1][3] = (E__ + pr_)*vr_[1];\n\n // Momentum fluxes\n fr_[0][1] = rhov__[0]*vr_[0] + pr_;\n fr_[0][2] = rhov__[0]*vr_[1];\n fr_[1][1] = rhov__[1]*vr_[0];\n fr_[1][2] = rhov__[1]*vr_[1] + pr_;\n\n};\n\n // Sum the left and right velocities and take the normal\n fpdtype_t nv_ = ((nl_v[lsdnl*0 + _x])*(vl_[0] + vr_[0]) + (nl_v[lsdnl*1 + _x])*(vl_[1] + vr_[1]));\n\n // Estimate the maximum wave speed / 2\n fpdtype_t a_ = sqrt(0.35*(pl_ + pr_)/(ul_v[ul_vix[_x] + ul_vcstri[_x]*0] + ur_v[ur_vix[_x] + ur_vcstri[_x]*0]))\n + 0.25*fabs(nv_);\n\n // Output\n fn[0] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][0] + fr_[0][0]) + nl_v[lsdnl*1 + _x]*(fl_[1][0] + fr_[1][0]))\n + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*0] - ur_v[ur_vix[_x] + ur_vcstri[_x]*0]);\n fn[1] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][1] + fr_[0][1]) + nl_v[lsdnl*1 + _x]*(fl_[1][1] + fr_[1][1]))\n + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*1] - ur_v[ur_vix[_x] + ur_vcstri[_x]*1]);\n fn[2] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][2] + fr_[0][2]) + nl_v[lsdnl*1 + _x]*(fl_[1][2] + fr_[1][2]))\n + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*2] - ur_v[ur_vix[_x] + ur_vcstri[_x]*2]);\n fn[3] = 0.5*(nl_v[lsdnl*0 + _x]*(fl_[0][3] + fr_[0][3]) + nl_v[lsdnl*1 + _x]*(fl_[1][3] + fr_[1][3]))\n + a_*(ul_v[ul_vix[_x] + ul_vcstri[_x]*3] - ur_v[ur_vix[_x] + ur_vcstri[_x]*3]);\n\n};\n\n // Scale and write out the common normal fluxes\n ul_v[ul_vix[_x] + ul_vcstri[_x]*0] = magnl_v[_x]*fn[0];\n ur_v[ur_vix[_x] + ur_vcstri[_x]*0] = -magnl_v[_x]*fn[0];\n ul_v[ul_vix[_x] + ul_vcstri[_x]*1] = magnl_v[_x]*fn[1];\n ur_v[ur_vix[_x] + ur_vcstri[_x]*1] = -magnl_v[_x]*fn[1];\n ul_v[ul_vix[_x] + ul_vcstri[_x]*2] = magnl_v[_x]*fn[2];\n ur_v[ur_vix[_x] + ur_vcstri[_x]*2] = -magnl_v[_x]*fn[2];\n ul_v[ul_vix[_x] + ul_vcstri[_x]*3] = magnl_v[_x]*fn[3];\n ur_v[ur_vix[_x] + ur_vcstri[_x]*3] = -magnl_v[_x]*fn[3];\n\n }\n }\n }\n\nq\x01]q\x02(cnumpy\nint32\nq\x03cnumpy\nint64\nq\x04h\x04h\x03h\x04h\x04h\x04h\x04h\x04h\x04e\x87q\x05.', b'\x80\x03}q\x00.')
- During handling of the above exception, another exception occurred:
- Traceback (most recent call last):
- File "/home/User/PyFR/env4/lib/python3.4/site-packages/pytools/prefork.py", line 46, in call_capture_output
- popen = Popen(cmdline, cwd=cwd, stdin=PIPE, stdout=PIPE, stderr=PIPE)
- File "/usr/lib/python3.4/subprocess.py", line 859, in __init__
- restore_signals, start_new_session)
- File "/usr/lib/python3.4/subprocess.py", line 1395, in _execute_child
- restore_signals, start_new_session, preexec_fn)
- BlockingIOError: [Errno 11] Resource temporarily unavailable
- During handling of the above exception, another exception occurred:
- Traceback (most recent call last):
- File "/home/User/PyFR/env4/bin/pyfr", line 9, in <module>
- load_entry_point('pyfr==1.3.0', 'console_scripts', 'pyfr')()
- File "/home/User/PyFR/env4/src/pyfr/pyfr/scripts/main.py", line 109, in main
- args.process(args)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/scripts/main.py", line 230, in process_run
- args, NativeReader(args.mesh), None, Inifile.load(args.cfg)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/scripts/main.py", line 214, in _process_common
- solver = get_solver(backend, rallocs, mesh, soln, cfg)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/solvers/__init__.py", line 14, in get_solver
- return get_integrator(backend, systemcls, rallocs, mesh, initsoln, cfg)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/integrators/__init__.py", line 26, in get_integrator
- return integrator(backend, systemcls, rallocs, mesh, initsoln, cfg)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/integrators/controllers.py", line 14, in __init__
- super().__init__(*args, **kwargs)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/integrators/steppers.py", line 9, in __init__
- super().__init__(*args, **kwargs)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/integrators/base.py", line 41, in __init__
- self.system = systemcls(backend, rallocs, mesh, initsoln, nreg, cfg)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/solvers/base/system.py", line 59, in __init__
- self._gen_kernels(eles, int_inters, mpi_inters, bc_inters)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/solvers/base/system.py", line 167, in _gen_kernels
- kernels[pn, kn].append(kgetter())
- File "/home/User/PyFR/env4/src/pyfr/pyfr/solvers/euler/inters.py", line 21, in <lambda>
- magnl=self._mag_pnorm_lhs, nl=self._norm_pnorm_lhs
- File "/home/User/PyFR/env4/src/pyfr/pyfr/backends/base/backend.py", line 154, in kernel
- return kern(*args, **kwargs)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/backends/base/kernels.py", line 162, in kernel_meth
- fun = self._build_kernel(name, src, list(it.chain(*argt)))
- File "/home/User/PyFR/env4/src/pyfr/pyfr/util.py", line 43, in __call__
- res = cache[key] = self.func(*args, **kwargs)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/backends/openmp/provider.py", line 13, in _build_kernel
- mod = GccSourceModule(src, self.backend.cfg)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/backends/openmp/compiler.py", line 61, in __init__
- super().__init__(src, cfg)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/backends/openmp/compiler.py", line 30, in __init__
- lname = self._build(tmpdir)
- File "/home/User/PyFR/env4/src/pyfr/pyfr/backends/openmp/compiler.py", line 80, in _build
- call_capture_output(cmd + self._cflags, cwd=tmpdir)
- File "/home/User/PyFR/env4/lib/python3.4/site-packages/pytools/prefork.py", line 197, in call_capture_output
- return forker[0].call_capture_output(cmdline, cwd, error_on_nonzero)
- File "/home/User/PyFR/env4/lib/python3.4/site-packages/pytools/prefork.py", line 54, in call_capture_output
- % ( " ".join(cmdline), e))
- pytools.prefork.ExecError: error invoking 'gcc -shared -std=c99 -Ofast -march=native -fopenmp -fPIC -o libtmp.so tmp.c': [Errno 11] Resource temporarily unavailable
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement