Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -- Gauss-Siedel Laplacian solver for LuaJIT with FFI data structures.
- -- Based on: https://www.scipy.org/PerformancePython
- -- Written by Mike Pall. Public domain.
- local ffi = require("ffi")
- ffi.cdef[[
- typedef struct {
- double dx, dy;
- int nx, ny;
- double *u[?];
- } Grid;
- void *malloc(size_t size);
- void free(void *ptr);
- ]]
- local function BC(x, y)
- return x*x - y*y
- end
- local Grid = ffi.metatype("Grid", {
- __index = {
- setBCFunc = function(grid, f)
- local nx, ny = grid.nx, grid.ny
- local dx, dy = grid.dx, grid.dy
- for j=0,ny-1 do
- grid.u[0][j] = f(0, j*dy)
- grid.u[nx-1][j] = f(1, j*dy)
- end
- for i=0,nx-1 do
- grid.u[i][0] = f(i*dx, 0)
- grid.u[i][ny-1] = f(i*dx, 1)
- end
- end,
- },
- __gc = function(grid)
- ffi.C.free(grid.u[0])
- end
- })
- local function newgrid(nx, ny)
- local grid = ffi.new("Grid", nx)
- grid.nx, grid.ny = nx, ny
- grid.dx, grid.dy = 1/(nx-1), 1/(ny-1)
- local p = ffi.cast("double *", ffi.C.malloc(ffi.sizeof("double")*nx*ny))
- assert(p ~= nil)
- ffi.fill(p, ffi.sizeof("double")*nx*ny)
- for i=0,nx-1 do grid.u[i] = p+ny*i end
- return grid
- end
- local function laplace_timestep(grid)
- local nx2, ny2 = grid.nx-2, grid.ny-2
- local dx2, dy2 = grid.dx^2, grid.dy^2
- local dnr_inv = 0.5/(dx2 + dy2)
- local err = 0
- for i=1,nx2 do
- for j=1,ny2 do
- local tmp = grid.u[i][j]
- grid.u[i][j] = ((grid.u[i-1][j] + grid.u[i+1][j]) * dy2 +
- (grid.u[i][j-1] + grid.u[i][j+1]) * dx2) * dnr_inv
- err = err + (grid.u[i][j] - tmp)^2
- end
- end
- return math.sqrt(err)
- end
- local function laplace_solve(grid, n_iter, eps)
- local err
- for count=1,n_iter do
- err = laplace_timestep(grid)
- if err <= eps then return count end
- end
- return err
- end
- local nx = tonumber(arg and arg[1]) or 500
- local n_iter = tonumber(arg and arg[2]) or 100
- local eps = tonumber(arg and arg[3]) or 1e-16
- local grid = newgrid(nx, nx)
- grid:setBCFunc(BC)
- local start = os.clock()
- print(string.format("nx = %d, ny = %d, n_iter = %d, eps = %f",
- nx, nx, n_iter, eps))
- print(laplace_solve(grid, n_iter, eps))
- print(string.format("Iterations took %.2f seconds.", os.clock() - start))
- -----------------------------------------------------------------------------
- -- Timings on an Intel Core2 E8400 3.0 GHz:
- -- C++ code from above site compiled with: GCC 4.4.3 -O2 -m64 (-O3 is slower)
- --
- -- $ ./laplace
- -- Enter nx n_iter eps --> 400 2000 1e-16
- -- nx = 500, ny = 500, n_iter = 2000, eps = 1e-16
- -- 0.023096
- -- Iterations took 3.69 seconds.
- --
- -- $ luajit-2.0.0-beta7 500 2000 1e-16
- -- nx = 500, ny = 500, n_iter = 2000, eps = 0.000000
- -- 0.023095969317126
- -- Iterations took 2.72 seconds.
- --
- -- Yes, LuaJIT is faster than C++ on this algorithm, because it can reduce
- -- the 5 loads in the innermost loop to only 3 loads plus load/store
- -- forwarding. This is mainly attributable to better alias analysis.
- -----------------------------------------------------------------------------
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement