Advertisement
Guest User

Laplace solver for LuaJIT + FFI

a guest
May 22nd, 2011
666
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lua 2.99 KB | None | 0 0
  1. -- Gauss-Siedel Laplacian solver for LuaJIT with FFI data structures.
  2. -- Based on: https://www.scipy.org/PerformancePython
  3. -- Written by Mike Pall. Public domain.
  4.  
  5. local ffi = require("ffi")
  6.  
  7. ffi.cdef[[
  8. typedef struct {
  9.   double dx, dy;
  10.   int nx, ny;
  11.   double *u[?];
  12. } Grid;
  13. void *malloc(size_t size);
  14. void free(void *ptr);
  15. ]]
  16.  
  17. local function BC(x, y)
  18.   return x*x - y*y
  19. end
  20.  
  21. local Grid = ffi.metatype("Grid", {
  22.   __index = {
  23.     setBCFunc = function(grid, f)
  24.       local nx, ny = grid.nx, grid.ny
  25.       local dx, dy = grid.dx, grid.dy
  26.       for j=0,ny-1 do
  27.     grid.u[0][j] = f(0, j*dy)
  28.     grid.u[nx-1][j] = f(1, j*dy)
  29.       end
  30.       for i=0,nx-1 do
  31.     grid.u[i][0] = f(i*dx, 0)
  32.     grid.u[i][ny-1] = f(i*dx, 1)
  33.       end
  34.     end,
  35.   },
  36.   __gc = function(grid)
  37.     ffi.C.free(grid.u[0])
  38.   end
  39. })
  40.  
  41. local function newgrid(nx, ny)
  42.   local grid = ffi.new("Grid", nx)
  43.   grid.nx, grid.ny = nx, ny
  44.   grid.dx, grid.dy = 1/(nx-1), 1/(ny-1)
  45.   local p = ffi.cast("double *", ffi.C.malloc(ffi.sizeof("double")*nx*ny))
  46.   assert(p ~= nil)
  47.   ffi.fill(p, ffi.sizeof("double")*nx*ny)
  48.   for i=0,nx-1 do grid.u[i] = p+ny*i end
  49.   return grid
  50. end
  51.  
  52. local function laplace_timestep(grid)
  53.   local nx2, ny2 = grid.nx-2, grid.ny-2
  54.   local dx2, dy2 = grid.dx^2, grid.dy^2
  55.   local dnr_inv = 0.5/(dx2 + dy2)
  56.   local err = 0
  57.   for i=1,nx2 do
  58.     for j=1,ny2 do
  59.       local tmp = grid.u[i][j]
  60.       grid.u[i][j] = ((grid.u[i-1][j] + grid.u[i+1][j]) * dy2 +
  61.               (grid.u[i][j-1] + grid.u[i][j+1]) * dx2) * dnr_inv
  62.       err = err + (grid.u[i][j] - tmp)^2
  63.     end
  64.   end
  65.   return math.sqrt(err)
  66. end
  67.  
  68. local function laplace_solve(grid, n_iter, eps)
  69.   local err
  70.   for count=1,n_iter do
  71.     err = laplace_timestep(grid)
  72.     if err <= eps then return count end
  73.   end
  74.   return err
  75. end
  76.  
  77. local nx = tonumber(arg and arg[1]) or 500
  78. local n_iter = tonumber(arg and arg[2]) or 100
  79. local eps = tonumber(arg and arg[3]) or 1e-16
  80.  
  81. local grid = newgrid(nx, nx)
  82. grid:setBCFunc(BC)
  83.  
  84. local start = os.clock()
  85. print(string.format("nx = %d, ny = %d, n_iter = %d, eps = %f",
  86.       nx, nx, n_iter, eps))
  87. print(laplace_solve(grid, n_iter, eps))
  88. print(string.format("Iterations took %.2f seconds.", os.clock() - start))
  89.  
  90. -----------------------------------------------------------------------------
  91. -- Timings on an Intel Core2 E8400 3.0 GHz:
  92. -- C++ code from above site compiled with: GCC 4.4.3 -O2 -m64 (-O3 is slower)
  93. --
  94. -- $ ./laplace
  95. -- Enter nx n_iter eps --> 400 2000 1e-16
  96. -- nx = 500, ny = 500, n_iter = 2000, eps = 1e-16
  97. -- 0.023096
  98. -- Iterations took 3.69 seconds.
  99. --
  100. -- $ luajit-2.0.0-beta7 500 2000 1e-16
  101. -- nx = 500, ny = 500, n_iter = 2000, eps = 0.000000
  102. -- 0.023095969317126
  103. -- Iterations took 2.72 seconds.
  104. --
  105. -- Yes, LuaJIT is faster than C++ on this algorithm, because it can reduce
  106. -- the 5 loads in the innermost loop to only 3 loads plus load/store
  107. -- forwarding. This is mainly attributable to better alias analysis.
  108. -----------------------------------------------------------------------------
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement