Reactor_Games

sha2

Jul 29th, 2021
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lua 150.63 KB | None | 0 0
  1. --------------------------------------------------------------------------------------------------------------------------
  2. -- sha2.lua
  3. --------------------------------------------------------------------------------------------------------------------------
  4. -- VERSION: 9 (2020-05-10)
  5. -- AUTHOR:  Egor Skriptunoff
  6. -- LICENSE: MIT (the same license as Lua itself)
  7. --
  8. --
  9. -- DESCRIPTION:
  10. --    This module contains functions to calculate SHA digest:
  11. --       MD5, SHA-1,
  12. --       SHA-224, SHA-256, SHA-512/224, SHA-512/256, SHA-384, SHA-512,
  13. --       SHA3-224, SHA3-256, SHA3-384, SHA3-512, SHAKE128, SHAKE256,
  14. --       HMAC
  15. --    Written in pure Lua.
  16. --    Compatible with:
  17. --       Lua 5.1, Lua 5.2, Lua 5.3, Lua 5.4, Fengari, LuaJIT 2.0/2.1 (any CPU endianness).
  18. --    Main feature of this module: it was heavily optimized for speed.
  19. --    For every Lua version the module contains particular implementation branch to get benefits from version-specific features.
  20. --       - branch for Lua 5.1 (emulating bitwise operators using look-up table)
  21. --       - branch for Lua 5.2 (using bit32/bit library), suitable for both Lua 5.2 with native "bit32" and Lua 5.1 with external library "bit"
  22. --       - branch for Lua 5.3/5.4 (using native 64-bit bitwise operators)
  23. --       - branch for Lua 5.3/5.4 (using native 32-bit bitwise operators) for Lua built with LUA_INT_TYPE=LUA_INT_INT
  24. --       - branch for LuaJIT without FFI library (useful in a sandboxed environment)
  25. --       - branch for LuaJIT x86 without FFI library (LuaJIT x86 has oddity because of lack of CPU registers)
  26. --       - branch for LuaJIT 2.0 with FFI library (bit.* functions work only with Lua numbers)
  27. --       - branch for LuaJIT 2.1 with FFI library (bit.* functions can work with "int64_t" arguments)
  28. --
  29. --
  30. -- USAGE:
  31. --    Input data should be provided as a binary string: either as a whole string or as a sequence of substrings (chunk-by-chunk loading, total length < 9*10^15 bytes).
  32. --    Result (SHA digest) is returned in hexadecimal representation as a string of lowercase hex digits.
  33. --    Simplest usage example:
  34. --       local sha = require("sha2")
  35. --       local your_hash = sha.sha256("your string")
  36. --    See file "sha2_test.lua" for more examples.
  37. --
  38. --
  39. -- CHANGELOG:
  40. --  version     date      description
  41. --  -------  ----------   -----------
  42. --     9     2020-05-10   Now works in OpenWrt's Lua (dialect of Lua 5.1 with "double" + "invisible int32")
  43. --     8     2019-09-03   SHA3 functions added
  44. --     7     2019-03-17   Added functions to convert to/from base64
  45. --     6     2018-11-12   HMAC added
  46. --     5     2018-11-10   SHA-1 added
  47. --     4     2018-11-03   MD5 added
  48. --     3     2018-11-02   Bug fixed: incorrect hashing of long (2 GByte) data streams on Lua 5.3/5.4 built with "int32" integers
  49. --     2     2018-10-07   Decreased module loading time in Lua 5.1 implementation branch (thanks to Peter Melnichenko for giving a hint)
  50. --     1     2018-10-06   First release (only SHA-2 functions)
  51. -----------------------------------------------------------------------------
  52.  
  53. local print_debug_messages = false  -- set to true to view some messages about your system's abilities and implementation branch chosen for your system
  54.  
  55. local unpack, table_concat, byte, char, string_rep, sub, gsub, gmatch, string_format, floor, ceil, math_min, math_max, tonumber, type =
  56.    table.unpack or unpack, table.concat, string.byte, string.char, string.rep, string.sub, string.gsub, string.gmatch, string.format, math.floor, math.ceil, math.min, math.max, tonumber, type
  57.  
  58.  
  59. --------------------------------------------------------------------------------
  60. -- EXAMINING YOUR SYSTEM
  61. --------------------------------------------------------------------------------
  62.  
  63. local function get_precision(one)
  64.    -- "one" must be either float 1.0 or integer 1
  65.    -- returns bits_precision, is_integer
  66.    -- This function works correctly with all floating point datatypes (including non-IEEE-754)
  67.    local k, n, m, prev_n = 0, one, one
  68.    while true do
  69.       k, prev_n, n, m = k + 1, n, n + n + 1, m + m + k % 2
  70.       if k > 256 or n - (n - 1) ~= 1 or m - (m - 1) ~= 1 or n == m then
  71.          return k, false   -- floating point datatype
  72.       elseif n == prev_n then
  73.          return k, true    -- integer datatype
  74.       end
  75.    end
  76. end
  77.  
  78. -- Make sure Lua has "double" numbers
  79. local x = 2/3
  80. local Lua_has_double = x * 5 > 3 and x * 4 < 3 and get_precision(1.0) >= 53
  81. assert(Lua_has_double, "at least 53-bit floating point numbers are required")
  82.  
  83. -- Q:
  84. --    SHA2 was designed for FPU-less machines.
  85. --    So, why floating point numbers are needed for this module?
  86. -- A:
  87. --    53-bit "double" numbers are useful to calculate "magic numbers" used in SHA.
  88. --    I prefer to write 50 LOC "magic numbers calculator" instead of storing more than 200 constants explicitly in this source file.
  89.  
  90. local int_prec, Lua_has_integers = get_precision(1)
  91. local Lua_has_int64 = Lua_has_integers and int_prec == 64
  92. local Lua_has_int32 = Lua_has_integers and int_prec == 32
  93. assert(Lua_has_int64 or Lua_has_int32 or not Lua_has_integers, "Lua integers must be either 32-bit or 64-bit")
  94.  
  95. -- Q:
  96. --    Does it mean that almost all non-standard configurations are not supported?
  97. -- A:
  98. --    Yes.  Sorry, too many problems to support all possible Lua numbers configurations.
  99. --       Lua 5.1/5.2    with "int32"               will not work.
  100. --       Lua 5.1/5.2    with "int64"               will not work.
  101. --       Lua 5.1/5.2    with "int128"              will not work.
  102. --       Lua 5.1/5.2    with "float"               will not work.
  103. --       Lua 5.1/5.2    with "double"              is OK.          (default config for Lua 5.1, Lua 5.2, LuaJIT)
  104. --       Lua 5.3/5.4    with "int32"  + "float"    will not work.
  105. --       Lua 5.3/5.4    with "int64"  + "float"    will not work.
  106. --       Lua 5.3/5.4    with "int128" + "float"    will not work.
  107. --       Lua 5.3/5.4    with "int32"  + "double"   is OK.          (config used by Fengari)
  108. --       Lua 5.3/5.4    with "int64"  + "double"   is OK.          (default config for Lua 5.3, Lua 5.4)
  109. --       Lua 5.3/5.4    with "int128" + "double"   will not work.
  110. --   Using floating point numbers better than "double" instead of "double" is OK (non-IEEE-754 floating point implementation are allowed).
  111. --   Using "int128" instead of "int64" is not OK: "int128" would require different branch of implementation for optimized SHA512.
  112.  
  113. -- Check for LuaJIT and 32-bit bitwise libraries
  114. local is_LuaJIT = ({false, [1] = true})[1] and (type(jit) ~= "table" or jit.version_num >= 20000)  -- LuaJIT 1.x.x is treated as vanilla Lua 5.1
  115. local is_LuaJIT_21  -- LuaJIT 2.1+
  116. local LuaJIT_arch
  117. local ffi           -- LuaJIT FFI library (as a table)
  118. local b             -- 32-bit bitwise library (as a table)
  119. local library_name
  120.  
  121. if is_LuaJIT then
  122.    -- Assuming "bit" library is always available on LuaJIT
  123.    b = require"bit"
  124.    library_name = "bit"
  125.    -- "ffi" is intentionally disabled on some systems for safety reason
  126.    local LuaJIT_has_FFI, result = pcall(require, "ffi")
  127.    if LuaJIT_has_FFI then
  128.       ffi = result
  129.    end
  130.    is_LuaJIT_21 = not not loadstring"b=0b0"
  131.    LuaJIT_arch = type(jit) == "table" and jit.arch or ffi and ffi.arch or nil
  132. else
  133.    -- For vanilla Lua, "bit"/"bit32" libraries are searched in global namespace only.  No attempt is made to load a library if it's not loaded yet.
  134.    for _, libname in ipairs(_VERSION == "Lua 5.2" and {"bit32", "bit"} or {"bit", "bit32"}) do
  135.       if type(_G[libname]) == "table" and _G[libname].bxor then
  136.          b = _G[libname]
  137.          library_name = libname
  138.          break
  139.       end
  140.    end
  141. end
  142.  
  143. --------------------------------------------------------------------------------
  144. -- You can disable here some of your system's abilities (for testing purposes)
  145. --------------------------------------------------------------------------------
  146. -- is_LuaJIT = nil
  147. -- is_LuaJIT_21 = nil
  148. -- ffi = nil
  149. -- Lua_has_int32 = nil
  150. -- Lua_has_int64 = nil
  151. -- b, library_name = nil
  152. --------------------------------------------------------------------------------
  153.  
  154. if print_debug_messages then
  155.    -- Printing list of abilities of your system
  156.    print("Abilities:")
  157.    print("   Lua version:               "..(is_LuaJIT and "LuaJIT "..(is_LuaJIT_21 and "2.1 " or "2.0 ")..(LuaJIT_arch or "")..(ffi and " with FFI" or " without FFI") or _VERSION))
  158.    print("   Integer bitwise operators: "..(Lua_has_int64 and "int64" or Lua_has_int32 and "int32" or "no"))
  159.    print("   32-bit bitwise library:    "..(library_name or "not found"))
  160. end
  161.  
  162. -- Selecting the most suitable implementation for given set of abilities
  163. local method, branch
  164. if is_LuaJIT and ffi then
  165.    method = "Using 'ffi' library of LuaJIT"
  166.    branch = "FFI"
  167. elseif is_LuaJIT then
  168.    method = "Using special code for FFI-less LuaJIT"
  169.    branch = "LJ"
  170. elseif Lua_has_int64 then
  171.    method = "Using native int64 bitwise operators"
  172.    branch = "INT64"
  173. elseif Lua_has_int32 then
  174.    method = "Using native int32 bitwise operators"
  175.    branch = "INT32"
  176. elseif library_name then   -- when bitwise library is available (Lua 5.2 with native library "bit32" or Lua 5.1 with external library "bit")
  177.    method = "Using '"..library_name.."' library"
  178.    branch = "LIB32"
  179. else
  180.    method = "Emulating bitwise operators using look-up table"
  181.    branch = "EMUL"
  182. end
  183.  
  184. if print_debug_messages then
  185.    -- Printing the implementation selected to be used on your system
  186.    print("Implementation selected:")
  187.    print("   "..method)
  188. end
  189.  
  190.  
  191. --------------------------------------------------------------------------------
  192. -- BASIC 32-BIT BITWISE FUNCTIONS
  193. --------------------------------------------------------------------------------
  194.  
  195. local AND, OR, XOR, SHL, SHR, ROL, ROR, NOT, NORM, HEX, XOR_BYTE
  196. -- Only low 32 bits of function arguments matter, high bits are ignored
  197. -- The result of all functions (except HEX) is an integer inside "correct range":
  198. --    for "bit" library:    (-2^31)..(2^31-1)
  199. --    for "bit32" library:        0..(2^32-1)
  200.  
  201. if branch == "FFI" or branch == "LJ" or branch == "LIB32" then
  202.  
  203.    -- Your system has 32-bit bitwise library (either "bit" or "bit32")
  204.  
  205.    AND  = b.band                -- 2 arguments
  206.    OR   = b.bor                 -- 2 arguments
  207.    XOR  = b.bxor                -- 2..5 arguments
  208.    SHL  = b.lshift              -- second argument is integer 0..31
  209.    SHR  = b.rshift              -- second argument is integer 0..31
  210.    ROL  = b.rol or b.lrotate    -- second argument is integer 0..31
  211.    ROR  = b.ror or b.rrotate    -- second argument is integer 0..31
  212.    NOT  = b.bnot                -- only for LuaJIT
  213.    NORM = b.tobit               -- only for LuaJIT
  214.    HEX  = b.tohex               -- returns string of 8 lowercase hexadecimal digits
  215.    assert(AND and OR and XOR and SHL and SHR and ROL and ROR and NOT, "Library '"..library_name.."' is incomplete")
  216.    XOR_BYTE = XOR               -- XOR of two bytes (0..255)
  217.  
  218. elseif branch == "EMUL" then
  219.  
  220.    -- Emulating 32-bit bitwise operations using 53-bit floating point arithmetic
  221.  
  222.    function SHL(x, n)
  223.       return (x * 2^n) % 2^32
  224.    end
  225.  
  226.    function SHR(x, n)
  227.       -- return (x % 2^32 - x % 2^n) / 2^n
  228.       x = x % 2^32 / 2^n
  229.       return x - x % 1
  230.    end
  231.  
  232.    function ROL(x, n)
  233.       x = x % 2^32 * 2^n
  234.       local r = x % 2^32
  235.       return r + (x - r) / 2^32
  236.    end
  237.  
  238.    function ROR(x, n)
  239.       x = x % 2^32 / 2^n
  240.       local r = x % 1
  241.       return r * 2^32 + (x - r)
  242.    end
  243.  
  244.    local AND_of_two_bytes = {[0] = 0}  -- look-up table (256*256 entries)
  245.    local idx = 0
  246.    for y = 0, 127 * 256, 256 do
  247.       for x = y, y + 127 do
  248.          x = AND_of_two_bytes[x] * 2
  249.          AND_of_two_bytes[idx] = x
  250.          AND_of_two_bytes[idx + 1] = x
  251.          AND_of_two_bytes[idx + 256] = x
  252.          AND_of_two_bytes[idx + 257] = x + 1
  253.          idx = idx + 2
  254.       end
  255.       idx = idx + 256
  256.    end
  257.  
  258.    local function and_or_xor(x, y, operation)
  259.       -- operation: nil = AND, 1 = OR, 2 = XOR
  260.       local x0 = x % 2^32
  261.       local y0 = y % 2^32
  262.       local rx = x0 % 256
  263.       local ry = y0 % 256
  264.       local res = AND_of_two_bytes[rx + ry * 256]
  265.       x = x0 - rx
  266.       y = (y0 - ry) / 256
  267.       rx = x % 65536
  268.       ry = y % 256
  269.       res = res + AND_of_two_bytes[rx + ry] * 256
  270.       x = (x - rx) / 256
  271.       y = (y - ry) / 256
  272.       rx = x % 65536 + y % 256
  273.       res = res + AND_of_two_bytes[rx] * 65536
  274.       res = res + AND_of_two_bytes[(x + y - rx) / 256] * 16777216
  275.       if operation then
  276.          res = x0 + y0 - operation * res
  277.       end
  278.       return res
  279.    end
  280.  
  281.    function AND(x, y)
  282.       return and_or_xor(x, y)
  283.    end
  284.  
  285.    function OR(x, y)
  286.       return and_or_xor(x, y, 1)
  287.    end
  288.  
  289.    function XOR(x, y, z, t, u)          -- 2..5 arguments
  290.       if z then
  291.          if t then
  292.             if u then
  293.                t = and_or_xor(t, u, 2)
  294.             end
  295.             z = and_or_xor(z, t, 2)
  296.          end
  297.          y = and_or_xor(y, z, 2)
  298.       end
  299.       return and_or_xor(x, y, 2)
  300.    end
  301.  
  302.    function XOR_BYTE(x, y)
  303.       return x + y - 2 * AND_of_two_bytes[x + y * 256]
  304.    end
  305.  
  306. end
  307.  
  308. HEX = HEX
  309.    or
  310.       pcall(string_format, "%x", 2^31) and
  311.       function (x)  -- returns string of 8 lowercase hexadecimal digits
  312.          return string_format("%08x", x % 4294967296)
  313.       end
  314.    or
  315.       function (x)  -- for OpenWrt's dialect of Lua
  316.          return string_format("%08x", (x + 2^31) % 2^32 - 2^31)
  317.       end
  318.  
  319. local function XOR32A5(x)
  320.    return XOR(x, 0xA5A5A5A5) % 4294967296
  321. end
  322.  
  323. local function create_array_of_lanes()
  324.    return {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}
  325. end
  326.  
  327.  
  328. --------------------------------------------------------------------------------
  329. -- CREATING OPTIMIZED INNER LOOP
  330. --------------------------------------------------------------------------------
  331.  
  332. -- Inner loop functions
  333. local sha256_feed_64, sha512_feed_128, md5_feed_64, sha1_feed_64, keccak_feed
  334.  
  335. -- Arrays of SHA2 "magic numbers" (in "INT64" and "FFI" branches "*_lo" arrays contain 64-bit values)
  336. local sha2_K_lo, sha2_K_hi, sha2_H_lo, sha2_H_hi, sha3_RC_lo, sha3_RC_hi = {}, {}, {}, {}, {}, {}
  337. local sha2_H_ext256 = {[224] = {}, [256] = sha2_H_hi}
  338. local sha2_H_ext512_lo, sha2_H_ext512_hi = {[384] = {}, [512] = sha2_H_lo}, {[384] = {}, [512] = sha2_H_hi}
  339. local md5_K, md5_sha1_H = {}, {0x67452301, 0xEFCDAB89, 0x98BADCFE, 0x10325476, 0xC3D2E1F0}
  340. local md5_next_shift = {0, 0, 0, 0, 0, 0, 0, 0, 28, 25, 26, 27, 0, 0, 10, 9, 11, 12, 0, 15, 16, 17, 18, 0, 20, 22, 23, 21}
  341. local HEX64, XOR64A5, lanes_index_base  -- defined only for branches that internally use 64-bit integers: "INT64" and "FFI"
  342. local common_W = {}    -- temporary table shared between all calculations (to avoid creating new temporary table every time)
  343. local K_lo_modulo, hi_factor, hi_factor_keccak = 4294967296, 0, 0
  344.  
  345. local function build_keccak_format(elem)
  346.    local keccak_format = {}
  347.    for _, size in ipairs{1, 9, 13, 17, 18, 21} do
  348.       keccak_format[size] = "<"..string_rep(elem, size)
  349.    end
  350.    return keccak_format
  351. end
  352.  
  353.  
  354. if branch == "FFI" then
  355.  
  356.  
  357.    -- SHA256 implementation for "LuaJIT with FFI" branch
  358.  
  359.    local common_W_FFI_int32 = ffi.new"int32_t[80]"   -- 64 is enough for SHA256, but 80 is needed for SHA-1
  360.  
  361.    function sha256_feed_64(H, str, offs, size)
  362.       -- offs >= 0, size >= 0, size is multiple of 64
  363.       local W, K = common_W_FFI_int32, sha2_K_hi
  364.       for pos = offs, offs + size - 1, 64 do
  365.          for j = 0, 15 do
  366.             pos = pos + 4
  367.             local a, b, c, d = byte(str, pos - 3, pos)   -- slow, but doesn't depend on endianness
  368.             W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  369.          end
  370.          for j = 16, 63 do
  371.             local a, b = W[j-15], W[j-2]
  372.             W[j] = NORM( XOR(ROR(a, 7), ROL(a, 14), SHR(a, 3)) + XOR(ROL(b, 15), ROL(b, 13), SHR(b, 10)) + W[j-7] + W[j-16] )
  373.          end
  374.          local a, b, c, d, e, f, g, h = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  375.          for j = 0, 63, 8 do  -- Thanks to Peter Cawley for this workaround (unroll the loop to avoid "PHI shuffling too complex" due to PHIs overlap)
  376.             local z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j] + K[j+1] + h) )
  377.             h, g, f, e = g, f, e, NORM( d + z )
  378.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  379.             z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+1] + K[j+2] + h) )
  380.             h, g, f, e = g, f, e, NORM( d + z )
  381.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  382.             z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+2] + K[j+3] + h) )
  383.             h, g, f, e = g, f, e, NORM( d + z )
  384.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  385.             z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+3] + K[j+4] + h) )
  386.             h, g, f, e = g, f, e, NORM( d + z )
  387.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  388.             z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+4] + K[j+5] + h) )
  389.             h, g, f, e = g, f, e, NORM( d + z )
  390.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  391.             z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+5] + K[j+6] + h) )
  392.             h, g, f, e = g, f, e, NORM( d + z )
  393.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  394.             z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+6] + K[j+7] + h) )
  395.             h, g, f, e = g, f, e, NORM( d + z )
  396.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  397.             z = NORM( XOR(g, AND(e, XOR(f, g))) + XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + (W[j+7] + K[j+8] + h) )
  398.             h, g, f, e = g, f, e, NORM( d + z )
  399.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  400.          end
  401.          H[1], H[2], H[3], H[4] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4])
  402.          H[5], H[6], H[7], H[8] = NORM(e + H[5]), NORM(f + H[6]), NORM(g + H[7]), NORM(h + H[8])
  403.       end
  404.    end
  405.  
  406.    local common_W_FFI_int64 = ffi.new"int64_t[80]"
  407.    local int64 = ffi.typeof"int64_t"
  408.    local int32 = ffi.typeof"int32_t"
  409.    local uint32 = ffi.typeof"uint32_t"
  410.  
  411.    hi_factor = int64(2^32)
  412.  
  413.    if is_LuaJIT_21 then   -- LuaJIT 2.1 supports bitwise 64-bit operations
  414.  
  415.       local AND64, OR64, XOR64, NOT64, SHL64, SHR64, ROL64, ROR64  -- introducing synonyms for better code readability
  416.           = AND,   OR,   XOR,   NOT,   SHL,   SHR,   ROL,   ROR
  417.       HEX64 = HEX
  418.  
  419.  
  420.       -- SHA3 implementation for "LuaJIT 2.1 + FFI" branch
  421.  
  422.       local lanes_arr64 = ffi.typeof"int64_t[30]"  -- 25 + 5 for temporary usage
  423.       -- lanes array is indexed from 0
  424.       lanes_index_base = 0
  425.       hi_factor_keccak = int64(2^32)
  426.  
  427.       function create_array_of_lanes()
  428.          return lanes_arr64()
  429.       end
  430.  
  431.       function keccak_feed(lanes, _, str, offs, size, block_size_in_bytes)
  432.          -- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
  433.          local RC = sha3_RC_lo
  434.          local qwords_qty = SHR(block_size_in_bytes, 3)
  435.          for pos = offs, offs + size - 1, block_size_in_bytes do
  436.             for j = 0, qwords_qty - 1 do
  437.                pos = pos + 8
  438.                local h, g, f, e, d, c, b, a = byte(str, pos - 7, pos)   -- slow, but doesn't depend on endianness
  439.                lanes[j] = XOR64(lanes[j], OR64(OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d) * int64(2^32), uint32(int32(OR(SHL(e, 24), SHL(f, 16), SHL(g, 8), h)))))
  440.             end
  441.             for round_idx = 1, 24 do
  442.                for j = 0, 4 do
  443.                   lanes[25 + j] = XOR64(lanes[j], lanes[j+5], lanes[j+10], lanes[j+15], lanes[j+20])
  444.                end
  445.                local D = XOR64(lanes[25], ROL64(lanes[27], 1))
  446.                lanes[1], lanes[6], lanes[11], lanes[16] = ROL64(XOR64(D, lanes[6]), 44), ROL64(XOR64(D, lanes[16]), 45), ROL64(XOR64(D, lanes[1]), 1), ROL64(XOR64(D, lanes[11]), 10)
  447.                lanes[21] = ROL64(XOR64(D, lanes[21]), 2)
  448.                D = XOR64(lanes[26], ROL64(lanes[28], 1))
  449.                lanes[2], lanes[7], lanes[12], lanes[22] = ROL64(XOR64(D, lanes[12]), 43), ROL64(XOR64(D, lanes[22]), 61), ROL64(XOR64(D, lanes[7]), 6), ROL64(XOR64(D, lanes[2]), 62)
  450.                lanes[17] = ROL64(XOR64(D, lanes[17]), 15)
  451.                D = XOR64(lanes[27], ROL64(lanes[29], 1))
  452.                lanes[3], lanes[8], lanes[18], lanes[23] = ROL64(XOR64(D, lanes[18]), 21), ROL64(XOR64(D, lanes[3]), 28), ROL64(XOR64(D, lanes[23]), 56), ROL64(XOR64(D, lanes[8]), 55)
  453.                lanes[13] = ROL64(XOR64(D, lanes[13]), 25)
  454.                D = XOR64(lanes[28], ROL64(lanes[25], 1))
  455.                lanes[4], lanes[14], lanes[19], lanes[24] = ROL64(XOR64(D, lanes[24]), 14), ROL64(XOR64(D, lanes[19]), 8), ROL64(XOR64(D, lanes[4]), 27), ROL64(XOR64(D, lanes[14]), 39)
  456.                lanes[9] = ROL64(XOR64(D, lanes[9]), 20)
  457.                D = XOR64(lanes[29], ROL64(lanes[26], 1))
  458.                lanes[5], lanes[10], lanes[15], lanes[20] = ROL64(XOR64(D, lanes[10]), 3), ROL64(XOR64(D, lanes[20]), 18), ROL64(XOR64(D, lanes[5]), 36), ROL64(XOR64(D, lanes[15]), 41)
  459.                lanes[0] = XOR64(D, lanes[0])
  460.                lanes[0], lanes[1], lanes[2], lanes[3], lanes[4] = XOR64(lanes[0], AND64(NOT64(lanes[1]), lanes[2]), RC[round_idx]), XOR64(lanes[1], AND64(NOT64(lanes[2]), lanes[3])), XOR64(lanes[2], AND64(NOT64(lanes[3]), lanes[4])), XOR64(lanes[3], AND64(NOT64(lanes[4]), lanes[0])), XOR64(lanes[4], AND64(NOT64(lanes[0]), lanes[1]))
  461.                lanes[5], lanes[6], lanes[7], lanes[8], lanes[9] = XOR64(lanes[8], AND64(NOT64(lanes[9]), lanes[5])), XOR64(lanes[9], AND64(NOT64(lanes[5]), lanes[6])), XOR64(lanes[5], AND64(NOT64(lanes[6]), lanes[7])), XOR64(lanes[6], AND64(NOT64(lanes[7]), lanes[8])), XOR64(lanes[7], AND64(NOT64(lanes[8]), lanes[9]))
  462.                lanes[10], lanes[11], lanes[12], lanes[13], lanes[14] = XOR64(lanes[11], AND64(NOT64(lanes[12]), lanes[13])), XOR64(lanes[12], AND64(NOT64(lanes[13]), lanes[14])), XOR64(lanes[13], AND64(NOT64(lanes[14]), lanes[10])), XOR64(lanes[14], AND64(NOT64(lanes[10]), lanes[11])), XOR64(lanes[10], AND64(NOT64(lanes[11]), lanes[12]))
  463.                lanes[15], lanes[16], lanes[17], lanes[18], lanes[19] = XOR64(lanes[19], AND64(NOT64(lanes[15]), lanes[16])), XOR64(lanes[15], AND64(NOT64(lanes[16]), lanes[17])), XOR64(lanes[16], AND64(NOT64(lanes[17]), lanes[18])), XOR64(lanes[17], AND64(NOT64(lanes[18]), lanes[19])), XOR64(lanes[18], AND64(NOT64(lanes[19]), lanes[15]))
  464.                lanes[20], lanes[21], lanes[22], lanes[23], lanes[24] = XOR64(lanes[22], AND64(NOT64(lanes[23]), lanes[24])), XOR64(lanes[23], AND64(NOT64(lanes[24]), lanes[20])), XOR64(lanes[24], AND64(NOT64(lanes[20]), lanes[21])), XOR64(lanes[20], AND64(NOT64(lanes[21]), lanes[22])), XOR64(lanes[21], AND64(NOT64(lanes[22]), lanes[23]))
  465.             end
  466.          end
  467.       end
  468.  
  469.  
  470.       -- SHA512 implementation for "LuaJIT 2.1 + FFI" branch
  471.  
  472.       local A5_long = 0xA5A5A5A5 * int64(2^32 + 1)  -- It's impossible to use constant 0xA5A5A5A5A5A5A5A5LL because it will raise syntax error on other Lua versions
  473.  
  474.       function XOR64A5(long)
  475.          return XOR64(long, A5_long)
  476.       end
  477.  
  478.       function sha512_feed_128(H, _, str, offs, size)
  479.          -- offs >= 0, size >= 0, size is multiple of 128
  480.          local W, K = common_W_FFI_int64, sha2_K_lo
  481.          for pos = offs, offs + size - 1, 128 do
  482.             for j = 0, 15 do
  483.                pos = pos + 8
  484.                local a, b, c, d, e, f, g, h = byte(str, pos - 7, pos)   -- slow, but doesn't depend on endianness
  485.                W[j] = OR64(OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d) * int64(2^32), uint32(int32(OR(SHL(e, 24), SHL(f, 16), SHL(g, 8), h))))
  486.             end
  487.             for j = 16, 79 do
  488.                local a, b = W[j-15], W[j-2]
  489.                W[j] = XOR64(ROR64(a, 1), ROR64(a, 8), SHR64(a, 7)) + XOR64(ROR64(b, 19), ROL64(b, 3), SHR64(b, 6)) + W[j-7] + W[j-16]
  490.             end
  491.             local a, b, c, d, e, f, g, h = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  492.             for j = 0, 79, 8 do
  493.                local z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+1] + W[j]
  494.                h, g, f, e = g, f, e, z + d
  495.                d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  496.                z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+2] + W[j+1]
  497.                h, g, f, e = g, f, e, z + d
  498.                d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  499.                z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+3] + W[j+2]
  500.                h, g, f, e = g, f, e, z + d
  501.                d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  502.                z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+4] + W[j+3]
  503.                h, g, f, e = g, f, e, z + d
  504.                d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  505.                z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+5] + W[j+4]
  506.                h, g, f, e = g, f, e, z + d
  507.                d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  508.                z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+6] + W[j+5]
  509.                h, g, f, e = g, f, e, z + d
  510.                d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  511.                z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+7] + W[j+6]
  512.                h, g, f, e = g, f, e, z + d
  513.                d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  514.                z = XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23)) + XOR64(g, AND64(e, XOR64(f, g))) + h + K[j+8] + W[j+7]
  515.                h, g, f, e = g, f, e, z + d
  516.                d, c, b, a = c, b, a, XOR64(AND64(XOR64(a, b), c), AND64(a, b)) + XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30)) + z
  517.             end
  518.             H[1] = a + H[1]
  519.             H[2] = b + H[2]
  520.             H[3] = c + H[3]
  521.             H[4] = d + H[4]
  522.             H[5] = e + H[5]
  523.             H[6] = f + H[6]
  524.             H[7] = g + H[7]
  525.             H[8] = h + H[8]
  526.          end
  527.       end
  528.  
  529.    else  -- LuaJIT 2.0 doesn't support 64-bit bitwise operations
  530.  
  531.  
  532.       -- SHA512 implementation for "LuaJIT 2.0 + FFI" branch
  533.  
  534.       local union64 = ffi.typeof"union{int64_t i64; struct{int32_t lo, hi;} i32;}"
  535.       do  -- make sure the struct is endianness-compatible
  536.          local u = union64(1)
  537.          if u.i32.lo < u.i32.hi then
  538.             union64 = ffi.typeof"union{int64_t i64; struct{int32_t hi, lo;} i32;}"
  539.          end
  540.       end
  541.       local unions64 = ffi.typeof("$[?]", union64)
  542.       local U = unions64(3)   -- this array of unions is used for fast splitting int64 into int32_high and int32_low
  543.  
  544.       -- "xorrific" 64-bit functions :-)
  545.       -- int64 input is splitted into two int32 parts, some bitwise 32-bit operations are performed, finally the result is converted to int64
  546.       -- these functions are needed because bit.* functions in LuaJIT 2.0 don't work with int64_t
  547.  
  548.       local function XORROR64_1(a)
  549.          -- return XOR64(ROR64(a, 1), ROR64(a, 8), SHR64(a, 7))
  550.          U[0].i64 = a
  551.          local a_lo, a_hi = U[0].i32.lo, U[0].i32.hi
  552.          local t_lo = XOR(OR(SHR(a_lo, 1), SHL(a_hi, 31)), OR(SHR(a_lo, 8), SHL(a_hi, 24)), OR(SHR(a_lo, 7), SHL(a_hi, 25)))
  553.          local t_hi = XOR(OR(SHR(a_hi, 1), SHL(a_lo, 31)), OR(SHR(a_hi, 8), SHL(a_lo, 24)), SHR(a_hi, 7))
  554.          return t_hi * int64(2^32) + uint32(int32(t_lo))
  555.       end
  556.  
  557.       local function XORROR64_2(b)
  558.          -- return XOR64(ROR64(b, 19), ROL64(b, 3), SHR64(b, 6))
  559.          U[0].i64 = b
  560.          local b_lo, b_hi = U[0].i32.lo, U[0].i32.hi
  561.          local u_lo = XOR(OR(SHR(b_lo, 19), SHL(b_hi, 13)), OR(SHL(b_lo, 3), SHR(b_hi, 29)), OR(SHR(b_lo, 6), SHL(b_hi, 26)))
  562.          local u_hi = XOR(OR(SHR(b_hi, 19), SHL(b_lo, 13)), OR(SHL(b_hi, 3), SHR(b_lo, 29)), SHR(b_hi, 6))
  563.          return u_hi * int64(2^32) + uint32(int32(u_lo))
  564.       end
  565.  
  566.       local function XORROR64_3(e)
  567.          -- return XOR64(ROR64(e, 14), ROR64(e, 18), ROL64(e, 23))
  568.          U[0].i64 = e
  569.          local e_lo, e_hi = U[0].i32.lo, U[0].i32.hi
  570.          local u_lo = XOR(OR(SHR(e_lo, 14), SHL(e_hi, 18)), OR(SHR(e_lo, 18), SHL(e_hi, 14)), OR(SHL(e_lo, 23), SHR(e_hi, 9)))
  571.          local u_hi = XOR(OR(SHR(e_hi, 14), SHL(e_lo, 18)), OR(SHR(e_hi, 18), SHL(e_lo, 14)), OR(SHL(e_hi, 23), SHR(e_lo, 9)))
  572.          return u_hi * int64(2^32) + uint32(int32(u_lo))
  573.       end
  574.  
  575.       local function XORROR64_6(a)
  576.          -- return XOR64(ROR64(a, 28), ROL64(a, 25), ROL64(a, 30))
  577.          U[0].i64 = a
  578.          local b_lo, b_hi = U[0].i32.lo, U[0].i32.hi
  579.          local u_lo = XOR(OR(SHR(b_lo, 28), SHL(b_hi, 4)), OR(SHL(b_lo, 30), SHR(b_hi, 2)), OR(SHL(b_lo, 25), SHR(b_hi, 7)))
  580.          local u_hi = XOR(OR(SHR(b_hi, 28), SHL(b_lo, 4)), OR(SHL(b_hi, 30), SHR(b_lo, 2)), OR(SHL(b_hi, 25), SHR(b_lo, 7)))
  581.          return u_hi * int64(2^32) + uint32(int32(u_lo))
  582.       end
  583.  
  584.       local function XORROR64_4(e, f, g)
  585.          -- return XOR64(g, AND64(e, XOR64(f, g)))
  586.          U[0].i64 = f
  587.          U[1].i64 = g
  588.          U[2].i64 = e
  589.          local f_lo, f_hi = U[0].i32.lo, U[0].i32.hi
  590.          local g_lo, g_hi = U[1].i32.lo, U[1].i32.hi
  591.          local e_lo, e_hi = U[2].i32.lo, U[2].i32.hi
  592.          local result_lo = XOR(g_lo, AND(e_lo, XOR(f_lo, g_lo)))
  593.          local result_hi = XOR(g_hi, AND(e_hi, XOR(f_hi, g_hi)))
  594.          return result_hi * int64(2^32) + uint32(int32(result_lo))
  595.       end
  596.  
  597.       local function XORROR64_5(a, b, c)
  598.          -- return XOR64(AND64(XOR64(a, b), c), AND64(a, b))
  599.          U[0].i64 = a
  600.          U[1].i64 = b
  601.          U[2].i64 = c
  602.          local a_lo, a_hi = U[0].i32.lo, U[0].i32.hi
  603.          local b_lo, b_hi = U[1].i32.lo, U[1].i32.hi
  604.          local c_lo, c_hi = U[2].i32.lo, U[2].i32.hi
  605.          local result_lo = XOR(AND(XOR(a_lo, b_lo), c_lo), AND(a_lo, b_lo))
  606.          local result_hi = XOR(AND(XOR(a_hi, b_hi), c_hi), AND(a_hi, b_hi))
  607.          return result_hi * int64(2^32) + uint32(int32(result_lo))
  608.       end
  609.  
  610.       function XOR64A5(long)
  611.          -- return XOR64(long, 0xA5A5A5A5A5A5A5A5)
  612.          U[0].i64 = long
  613.          local lo32, hi32 = U[0].i32.lo, U[0].i32.hi
  614.          lo32 = XOR(lo32, 0xA5A5A5A5)
  615.          hi32 = XOR(hi32, 0xA5A5A5A5)
  616.          return hi32 * int64(2^32) + uint32(int32(lo32))
  617.       end
  618.  
  619.       function HEX64(long)
  620.          U[0].i64 = long
  621.          return HEX(U[0].i32.hi)..HEX(U[0].i32.lo)
  622.       end
  623.  
  624.       function sha512_feed_128(H, _, str, offs, size)
  625.          -- offs >= 0, size >= 0, size is multiple of 128
  626.          local W, K = common_W_FFI_int64, sha2_K_lo
  627.          for pos = offs, offs + size - 1, 128 do
  628.             for j = 0, 15 do
  629.                pos = pos + 8
  630.                local a, b, c, d, e, f, g, h = byte(str, pos - 7, pos)   -- slow, but doesn't depend on endianness
  631.                W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d) * int64(2^32) + uint32(int32(OR(SHL(e, 24), SHL(f, 16), SHL(g, 8), h)))
  632.             end
  633.             for j = 16, 79 do
  634.                W[j] = XORROR64_1(W[j-15]) + XORROR64_2(W[j-2]) + W[j-7] + W[j-16]
  635.             end
  636.             local a, b, c, d, e, f, g, h = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  637.             for j = 0, 79, 8 do
  638.                local z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+1] + W[j]
  639.                h, g, f, e = g, f, e, z + d
  640.                d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  641.                z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+2] + W[j+1]
  642.                h, g, f, e = g, f, e, z + d
  643.                d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  644.                z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+3] + W[j+2]
  645.                h, g, f, e = g, f, e, z + d
  646.                d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  647.                z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+4] + W[j+3]
  648.                h, g, f, e = g, f, e, z + d
  649.                d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  650.                z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+5] + W[j+4]
  651.                h, g, f, e = g, f, e, z + d
  652.                d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  653.                z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+6] + W[j+5]
  654.                h, g, f, e = g, f, e, z + d
  655.                d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  656.                z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+7] + W[j+6]
  657.                h, g, f, e = g, f, e, z + d
  658.                d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  659.                z = XORROR64_3(e) + XORROR64_4(e, f, g) + h + K[j+8] + W[j+7]
  660.                h, g, f, e = g, f, e, z + d
  661.                d, c, b, a = c, b, a, XORROR64_5(a, b, c) + XORROR64_6(a) + z
  662.             end
  663.             H[1] = a + H[1]
  664.             H[2] = b + H[2]
  665.             H[3] = c + H[3]
  666.             H[4] = d + H[4]
  667.             H[5] = e + H[5]
  668.             H[6] = f + H[6]
  669.             H[7] = g + H[7]
  670.             H[8] = h + H[8]
  671.          end
  672.       end
  673.  
  674.    end
  675.  
  676.  
  677.    -- MD5 implementation for "LuaJIT with FFI" branch
  678.  
  679.    function md5_feed_64(H, str, offs, size)
  680.       -- offs >= 0, size >= 0, size is multiple of 64
  681.       local W, K = common_W_FFI_int32, md5_K
  682.       for pos = offs, offs + size - 1, 64 do
  683.          for j = 0, 15 do
  684.             pos = pos + 4
  685.             local a, b, c, d = byte(str, pos - 3, pos)   -- slow, but doesn't depend on endianness
  686.             W[j] = OR(SHL(d, 24), SHL(c, 16), SHL(b, 8), a)
  687.          end
  688.          local a, b, c, d = H[1], H[2], H[3], H[4]
  689.          for j = 0, 15, 4 do
  690.             a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+1] + W[j  ] + a),  7) + b)
  691.             a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+2] + W[j+1] + a), 12) + b)
  692.             a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+3] + W[j+2] + a), 17) + b)
  693.             a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+4] + W[j+3] + a), 22) + b)
  694.          end
  695.          for j = 16, 31, 4 do
  696.             local g = 5*j
  697.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+1] + W[AND(g + 1, 15)] + a),  5) + b)
  698.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+2] + W[AND(g + 6, 15)] + a),  9) + b)
  699.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+3] + W[AND(g - 5, 15)] + a), 14) + b)
  700.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+4] + W[AND(g    , 15)] + a), 20) + b)
  701.          end
  702.          for j = 32, 47, 4 do
  703.             local g = 3*j
  704.             a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+1] + W[AND(g + 5, 15)] + a),  4) + b)
  705.             a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+2] + W[AND(g + 8, 15)] + a), 11) + b)
  706.             a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+3] + W[AND(g - 5, 15)] + a), 16) + b)
  707.             a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+4] + W[AND(g - 2, 15)] + a), 23) + b)
  708.          end
  709.          for j = 48, 63, 4 do
  710.             local g = 7*j
  711.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+1] + W[AND(g    , 15)] + a),  6) + b)
  712.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+2] + W[AND(g + 7, 15)] + a), 10) + b)
  713.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+3] + W[AND(g - 2, 15)] + a), 15) + b)
  714.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+4] + W[AND(g + 5, 15)] + a), 21) + b)
  715.          end
  716.          H[1], H[2], H[3], H[4] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4])
  717.       end
  718.    end
  719.  
  720.  
  721.    -- SHA-1 implementation for "LuaJIT with FFI" branch
  722.  
  723.    function sha1_feed_64(H, str, offs, size)
  724.       -- offs >= 0, size >= 0, size is multiple of 64
  725.       local W = common_W_FFI_int32
  726.       for pos = offs, offs + size - 1, 64 do
  727.          for j = 0, 15 do
  728.             pos = pos + 4
  729.             local a, b, c, d = byte(str, pos - 3, pos)   -- slow, but doesn't depend on endianness
  730.             W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  731.          end
  732.          for j = 16, 79 do
  733.             W[j] = ROL(XOR(W[j-3], W[j-8], W[j-14], W[j-16]), 1)
  734.          end
  735.          local a, b, c, d, e = H[1], H[2], H[3], H[4], H[5]
  736.          for j = 0, 19, 5 do
  737.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j]   + 0x5A827999 + e))          -- constant = floor(2^30 * sqrt(2))
  738.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+1] + 0x5A827999 + e))
  739.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+2] + 0x5A827999 + e))
  740.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+3] + 0x5A827999 + e))
  741.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+4] + 0x5A827999 + e))
  742.          end
  743.          for j = 20, 39, 5 do
  744.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j]   + 0x6ED9EBA1 + e))                       -- 2^30 * sqrt(3)
  745.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+1] + 0x6ED9EBA1 + e))
  746.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+2] + 0x6ED9EBA1 + e))
  747.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+3] + 0x6ED9EBA1 + e))
  748.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+4] + 0x6ED9EBA1 + e))
  749.          end
  750.          for j = 40, 59, 5 do
  751.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j]   + 0x8F1BBCDC + e))  -- 2^30 * sqrt(5)
  752.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+1] + 0x8F1BBCDC + e))
  753.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+2] + 0x8F1BBCDC + e))
  754.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+3] + 0x8F1BBCDC + e))
  755.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+4] + 0x8F1BBCDC + e))
  756.          end
  757.          for j = 60, 79, 5 do
  758.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j]   + 0xCA62C1D6 + e))                       -- 2^30 * sqrt(10)
  759.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+1] + 0xCA62C1D6 + e))
  760.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+2] + 0xCA62C1D6 + e))
  761.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+3] + 0xCA62C1D6 + e))
  762.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+4] + 0xCA62C1D6 + e))
  763.          end
  764.          H[1], H[2], H[3], H[4], H[5] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4]), NORM(e + H[5])
  765.       end
  766.    end
  767.  
  768. end
  769.  
  770.  
  771. -- SHA3 implementation for "LuaJIT 2.0 + FFI" and "LuaJIT without FFI" branches
  772.  
  773. if branch == "FFI" and not is_LuaJIT_21 or branch == "LJ" then
  774.  
  775.    if branch == "FFI" then
  776.       local lanes_arr32 = ffi.typeof"int32_t[31]"  -- 25 + 5 + 1 (due to 1-based indexing)
  777.  
  778.       function create_array_of_lanes()
  779.          return lanes_arr32()
  780.       end
  781.  
  782.    end
  783.  
  784.    function keccak_feed(lanes_lo, lanes_hi, str, offs, size, block_size_in_bytes)
  785.       -- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
  786.       local RC_lo, RC_hi = sha3_RC_lo, sha3_RC_hi
  787.       local qwords_qty = SHR(block_size_in_bytes, 3)
  788.       for pos = offs, offs + size - 1, block_size_in_bytes do
  789.          for j = 1, qwords_qty do
  790.             local a, b, c, d = byte(str, pos + 1, pos + 4)
  791.             lanes_lo[j] = XOR(lanes_lo[j], OR(SHL(d, 24), SHL(c, 16), SHL(b, 8), a))
  792.             pos = pos + 8
  793.             a, b, c, d = byte(str, pos - 3, pos)
  794.             lanes_hi[j] = XOR(lanes_hi[j], OR(SHL(d, 24), SHL(c, 16), SHL(b, 8), a))
  795.          end
  796.          for round_idx = 1, 24 do
  797.             for j = 1, 5 do
  798.                lanes_lo[25 + j] = XOR(lanes_lo[j], lanes_lo[j + 5], lanes_lo[j + 10], lanes_lo[j + 15], lanes_lo[j + 20])
  799.             end
  800.             for j = 1, 5 do
  801.                lanes_hi[25 + j] = XOR(lanes_hi[j], lanes_hi[j + 5], lanes_hi[j + 10], lanes_hi[j + 15], lanes_hi[j + 20])
  802.             end
  803.             local D_lo = XOR(lanes_lo[26], SHL(lanes_lo[28], 1), SHR(lanes_hi[28], 31))
  804.             local D_hi = XOR(lanes_hi[26], SHL(lanes_hi[28], 1), SHR(lanes_lo[28], 31))
  805.             lanes_lo[2], lanes_hi[2], lanes_lo[7], lanes_hi[7], lanes_lo[12], lanes_hi[12], lanes_lo[17], lanes_hi[17] = XOR(SHR(XOR(D_lo, lanes_lo[7]), 20), SHL(XOR(D_hi, lanes_hi[7]), 12)), XOR(SHR(XOR(D_hi, lanes_hi[7]), 20), SHL(XOR(D_lo, lanes_lo[7]), 12)), XOR(SHR(XOR(D_lo, lanes_lo[17]), 19), SHL(XOR(D_hi, lanes_hi[17]), 13)), XOR(SHR(XOR(D_hi, lanes_hi[17]), 19), SHL(XOR(D_lo, lanes_lo[17]), 13)), XOR(SHL(XOR(D_lo, lanes_lo[2]), 1), SHR(XOR(D_hi, lanes_hi[2]), 31)), XOR(SHL(XOR(D_hi, lanes_hi[2]), 1), SHR(XOR(D_lo, lanes_lo[2]), 31)), XOR(SHL(XOR(D_lo, lanes_lo[12]), 10), SHR(XOR(D_hi, lanes_hi[12]), 22)), XOR(SHL(XOR(D_hi, lanes_hi[12]), 10), SHR(XOR(D_lo, lanes_lo[12]), 22))
  806.             local L, H = XOR(D_lo, lanes_lo[22]), XOR(D_hi, lanes_hi[22])
  807.             lanes_lo[22], lanes_hi[22] = XOR(SHL(L, 2), SHR(H, 30)), XOR(SHL(H, 2), SHR(L, 30))
  808.             D_lo = XOR(lanes_lo[27], SHL(lanes_lo[29], 1), SHR(lanes_hi[29], 31))
  809.             D_hi = XOR(lanes_hi[27], SHL(lanes_hi[29], 1), SHR(lanes_lo[29], 31))
  810.             lanes_lo[3], lanes_hi[3], lanes_lo[8], lanes_hi[8], lanes_lo[13], lanes_hi[13], lanes_lo[23], lanes_hi[23] = XOR(SHR(XOR(D_lo, lanes_lo[13]), 21), SHL(XOR(D_hi, lanes_hi[13]), 11)), XOR(SHR(XOR(D_hi, lanes_hi[13]), 21), SHL(XOR(D_lo, lanes_lo[13]), 11)), XOR(SHR(XOR(D_lo, lanes_lo[23]), 3), SHL(XOR(D_hi, lanes_hi[23]), 29)), XOR(SHR(XOR(D_hi, lanes_hi[23]), 3), SHL(XOR(D_lo, lanes_lo[23]), 29)), XOR(SHL(XOR(D_lo, lanes_lo[8]), 6), SHR(XOR(D_hi, lanes_hi[8]), 26)), XOR(SHL(XOR(D_hi, lanes_hi[8]), 6), SHR(XOR(D_lo, lanes_lo[8]), 26)), XOR(SHR(XOR(D_lo, lanes_lo[3]), 2), SHL(XOR(D_hi, lanes_hi[3]), 30)), XOR(SHR(XOR(D_hi, lanes_hi[3]), 2), SHL(XOR(D_lo, lanes_lo[3]), 30))
  811.             L, H = XOR(D_lo, lanes_lo[18]), XOR(D_hi, lanes_hi[18])
  812.             lanes_lo[18], lanes_hi[18] = XOR(SHL(L, 15), SHR(H, 17)), XOR(SHL(H, 15), SHR(L, 17))
  813.             D_lo = XOR(lanes_lo[28], SHL(lanes_lo[30], 1), SHR(lanes_hi[30], 31))
  814.             D_hi = XOR(lanes_hi[28], SHL(lanes_hi[30], 1), SHR(lanes_lo[30], 31))
  815.             lanes_lo[4], lanes_hi[4], lanes_lo[9], lanes_hi[9], lanes_lo[19], lanes_hi[19], lanes_lo[24], lanes_hi[24] = XOR(SHL(XOR(D_lo, lanes_lo[19]), 21), SHR(XOR(D_hi, lanes_hi[19]), 11)), XOR(SHL(XOR(D_hi, lanes_hi[19]), 21), SHR(XOR(D_lo, lanes_lo[19]), 11)), XOR(SHL(XOR(D_lo, lanes_lo[4]), 28), SHR(XOR(D_hi, lanes_hi[4]), 4)), XOR(SHL(XOR(D_hi, lanes_hi[4]), 28), SHR(XOR(D_lo, lanes_lo[4]), 4)), XOR(SHR(XOR(D_lo, lanes_lo[24]), 8), SHL(XOR(D_hi, lanes_hi[24]), 24)), XOR(SHR(XOR(D_hi, lanes_hi[24]), 8), SHL(XOR(D_lo, lanes_lo[24]), 24)), XOR(SHR(XOR(D_lo, lanes_lo[9]), 9), SHL(XOR(D_hi, lanes_hi[9]), 23)), XOR(SHR(XOR(D_hi, lanes_hi[9]), 9), SHL(XOR(D_lo, lanes_lo[9]), 23))
  816.             L, H = XOR(D_lo, lanes_lo[14]), XOR(D_hi, lanes_hi[14])
  817.             lanes_lo[14], lanes_hi[14] = XOR(SHL(L, 25), SHR(H, 7)), XOR(SHL(H, 25), SHR(L, 7))
  818.             D_lo = XOR(lanes_lo[29], SHL(lanes_lo[26], 1), SHR(lanes_hi[26], 31))
  819.             D_hi = XOR(lanes_hi[29], SHL(lanes_hi[26], 1), SHR(lanes_lo[26], 31))
  820.             lanes_lo[5], lanes_hi[5], lanes_lo[15], lanes_hi[15], lanes_lo[20], lanes_hi[20], lanes_lo[25], lanes_hi[25] = XOR(SHL(XOR(D_lo, lanes_lo[25]), 14), SHR(XOR(D_hi, lanes_hi[25]), 18)), XOR(SHL(XOR(D_hi, lanes_hi[25]), 14), SHR(XOR(D_lo, lanes_lo[25]), 18)), XOR(SHL(XOR(D_lo, lanes_lo[20]), 8), SHR(XOR(D_hi, lanes_hi[20]), 24)), XOR(SHL(XOR(D_hi, lanes_hi[20]), 8), SHR(XOR(D_lo, lanes_lo[20]), 24)), XOR(SHL(XOR(D_lo, lanes_lo[5]), 27), SHR(XOR(D_hi, lanes_hi[5]), 5)), XOR(SHL(XOR(D_hi, lanes_hi[5]), 27), SHR(XOR(D_lo, lanes_lo[5]), 5)), XOR(SHR(XOR(D_lo, lanes_lo[15]), 25), SHL(XOR(D_hi, lanes_hi[15]), 7)), XOR(SHR(XOR(D_hi, lanes_hi[15]), 25), SHL(XOR(D_lo, lanes_lo[15]), 7))
  821.             L, H = XOR(D_lo, lanes_lo[10]), XOR(D_hi, lanes_hi[10])
  822.             lanes_lo[10], lanes_hi[10] = XOR(SHL(L, 20), SHR(H, 12)), XOR(SHL(H, 20), SHR(L, 12))
  823.             D_lo = XOR(lanes_lo[30], SHL(lanes_lo[27], 1), SHR(lanes_hi[27], 31))
  824.             D_hi = XOR(lanes_hi[30], SHL(lanes_hi[27], 1), SHR(lanes_lo[27], 31))
  825.             lanes_lo[6], lanes_hi[6], lanes_lo[11], lanes_hi[11], lanes_lo[16], lanes_hi[16], lanes_lo[21], lanes_hi[21] = XOR(SHL(XOR(D_lo, lanes_lo[11]), 3), SHR(XOR(D_hi, lanes_hi[11]), 29)), XOR(SHL(XOR(D_hi, lanes_hi[11]), 3), SHR(XOR(D_lo, lanes_lo[11]), 29)), XOR(SHL(XOR(D_lo, lanes_lo[21]), 18), SHR(XOR(D_hi, lanes_hi[21]), 14)), XOR(SHL(XOR(D_hi, lanes_hi[21]), 18), SHR(XOR(D_lo, lanes_lo[21]), 14)), XOR(SHR(XOR(D_lo, lanes_lo[6]), 28), SHL(XOR(D_hi, lanes_hi[6]), 4)), XOR(SHR(XOR(D_hi, lanes_hi[6]), 28), SHL(XOR(D_lo, lanes_lo[6]), 4)), XOR(SHR(XOR(D_lo, lanes_lo[16]), 23), SHL(XOR(D_hi, lanes_hi[16]), 9)), XOR(SHR(XOR(D_hi, lanes_hi[16]), 23), SHL(XOR(D_lo, lanes_lo[16]), 9))
  826.             lanes_lo[1], lanes_hi[1] = XOR(D_lo, lanes_lo[1]), XOR(D_hi, lanes_hi[1])
  827.             lanes_lo[1], lanes_lo[2], lanes_lo[3], lanes_lo[4], lanes_lo[5] = XOR(lanes_lo[1], AND(NOT(lanes_lo[2]), lanes_lo[3]), RC_lo[round_idx]), XOR(lanes_lo[2], AND(NOT(lanes_lo[3]), lanes_lo[4])), XOR(lanes_lo[3], AND(NOT(lanes_lo[4]), lanes_lo[5])), XOR(lanes_lo[4], AND(NOT(lanes_lo[5]), lanes_lo[1])), XOR(lanes_lo[5], AND(NOT(lanes_lo[1]), lanes_lo[2]))
  828.             lanes_lo[6], lanes_lo[7], lanes_lo[8], lanes_lo[9], lanes_lo[10] = XOR(lanes_lo[9], AND(NOT(lanes_lo[10]), lanes_lo[6])), XOR(lanes_lo[10], AND(NOT(lanes_lo[6]), lanes_lo[7])), XOR(lanes_lo[6], AND(NOT(lanes_lo[7]), lanes_lo[8])), XOR(lanes_lo[7], AND(NOT(lanes_lo[8]), lanes_lo[9])), XOR(lanes_lo[8], AND(NOT(lanes_lo[9]), lanes_lo[10]))
  829.             lanes_lo[11], lanes_lo[12], lanes_lo[13], lanes_lo[14], lanes_lo[15] = XOR(lanes_lo[12], AND(NOT(lanes_lo[13]), lanes_lo[14])), XOR(lanes_lo[13], AND(NOT(lanes_lo[14]), lanes_lo[15])), XOR(lanes_lo[14], AND(NOT(lanes_lo[15]), lanes_lo[11])), XOR(lanes_lo[15], AND(NOT(lanes_lo[11]), lanes_lo[12])), XOR(lanes_lo[11], AND(NOT(lanes_lo[12]), lanes_lo[13]))
  830.             lanes_lo[16], lanes_lo[17], lanes_lo[18], lanes_lo[19], lanes_lo[20] = XOR(lanes_lo[20], AND(NOT(lanes_lo[16]), lanes_lo[17])), XOR(lanes_lo[16], AND(NOT(lanes_lo[17]), lanes_lo[18])), XOR(lanes_lo[17], AND(NOT(lanes_lo[18]), lanes_lo[19])), XOR(lanes_lo[18], AND(NOT(lanes_lo[19]), lanes_lo[20])), XOR(lanes_lo[19], AND(NOT(lanes_lo[20]), lanes_lo[16]))
  831.             lanes_lo[21], lanes_lo[22], lanes_lo[23], lanes_lo[24], lanes_lo[25] = XOR(lanes_lo[23], AND(NOT(lanes_lo[24]), lanes_lo[25])), XOR(lanes_lo[24], AND(NOT(lanes_lo[25]), lanes_lo[21])), XOR(lanes_lo[25], AND(NOT(lanes_lo[21]), lanes_lo[22])), XOR(lanes_lo[21], AND(NOT(lanes_lo[22]), lanes_lo[23])), XOR(lanes_lo[22], AND(NOT(lanes_lo[23]), lanes_lo[24]))
  832.             lanes_hi[1], lanes_hi[2], lanes_hi[3], lanes_hi[4], lanes_hi[5] = XOR(lanes_hi[1], AND(NOT(lanes_hi[2]), lanes_hi[3]), RC_hi[round_idx]), XOR(lanes_hi[2], AND(NOT(lanes_hi[3]), lanes_hi[4])), XOR(lanes_hi[3], AND(NOT(lanes_hi[4]), lanes_hi[5])), XOR(lanes_hi[4], AND(NOT(lanes_hi[5]), lanes_hi[1])), XOR(lanes_hi[5], AND(NOT(lanes_hi[1]), lanes_hi[2]))
  833.             lanes_hi[6], lanes_hi[7], lanes_hi[8], lanes_hi[9], lanes_hi[10] = XOR(lanes_hi[9], AND(NOT(lanes_hi[10]), lanes_hi[6])), XOR(lanes_hi[10], AND(NOT(lanes_hi[6]), lanes_hi[7])), XOR(lanes_hi[6], AND(NOT(lanes_hi[7]), lanes_hi[8])), XOR(lanes_hi[7], AND(NOT(lanes_hi[8]), lanes_hi[9])), XOR(lanes_hi[8], AND(NOT(lanes_hi[9]), lanes_hi[10]))
  834.             lanes_hi[11], lanes_hi[12], lanes_hi[13], lanes_hi[14], lanes_hi[15] = XOR(lanes_hi[12], AND(NOT(lanes_hi[13]), lanes_hi[14])), XOR(lanes_hi[13], AND(NOT(lanes_hi[14]), lanes_hi[15])), XOR(lanes_hi[14], AND(NOT(lanes_hi[15]), lanes_hi[11])), XOR(lanes_hi[15], AND(NOT(lanes_hi[11]), lanes_hi[12])), XOR(lanes_hi[11], AND(NOT(lanes_hi[12]), lanes_hi[13]))
  835.             lanes_hi[16], lanes_hi[17], lanes_hi[18], lanes_hi[19], lanes_hi[20] = XOR(lanes_hi[20], AND(NOT(lanes_hi[16]), lanes_hi[17])), XOR(lanes_hi[16], AND(NOT(lanes_hi[17]), lanes_hi[18])), XOR(lanes_hi[17], AND(NOT(lanes_hi[18]), lanes_hi[19])), XOR(lanes_hi[18], AND(NOT(lanes_hi[19]), lanes_hi[20])), XOR(lanes_hi[19], AND(NOT(lanes_hi[20]), lanes_hi[16]))
  836.             lanes_hi[21], lanes_hi[22], lanes_hi[23], lanes_hi[24], lanes_hi[25] = XOR(lanes_hi[23], AND(NOT(lanes_hi[24]), lanes_hi[25])), XOR(lanes_hi[24], AND(NOT(lanes_hi[25]), lanes_hi[21])), XOR(lanes_hi[25], AND(NOT(lanes_hi[21]), lanes_hi[22])), XOR(lanes_hi[21], AND(NOT(lanes_hi[22]), lanes_hi[23])), XOR(lanes_hi[22], AND(NOT(lanes_hi[23]), lanes_hi[24]))
  837.          end
  838.       end
  839.    end
  840.  
  841. end
  842.  
  843.  
  844. if branch == "LJ" then
  845.  
  846.  
  847.    -- SHA256 implementation for "LuaJIT without FFI" branch
  848.  
  849.    function sha256_feed_64(H, str, offs, size)
  850.       -- offs >= 0, size >= 0, size is multiple of 64
  851.       local W, K = common_W, sha2_K_hi
  852.       for pos = offs, offs + size - 1, 64 do
  853.          for j = 1, 16 do
  854.             pos = pos + 4
  855.             local a, b, c, d = byte(str, pos - 3, pos)
  856.             W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  857.          end
  858.          for j = 17, 64 do
  859.             local a, b = W[j-15], W[j-2]
  860.             W[j] = NORM( NORM( XOR(ROR(a, 7), ROL(a, 14), SHR(a, 3)) + XOR(ROL(b, 15), ROL(b, 13), SHR(b, 10)) ) + NORM( W[j-7] + W[j-16] ) )
  861.          end
  862.          local a, b, c, d, e, f, g, h = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  863.          for j = 1, 64, 8 do  -- Thanks to Peter Cawley for this workaround (unroll the loop to avoid "PHI shuffling too complex" due to PHIs overlap)
  864.             local z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j] + W[j] + h) )
  865.             h, g, f, e = g, f, e, NORM(d + z)
  866.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  867.             z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+1] + W[j+1] + h) )
  868.             h, g, f, e = g, f, e, NORM(d + z)
  869.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  870.             z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+2] + W[j+2] + h) )
  871.             h, g, f, e = g, f, e, NORM(d + z)
  872.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  873.             z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+3] + W[j+3] + h) )
  874.             h, g, f, e = g, f, e, NORM(d + z)
  875.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  876.             z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+4] + W[j+4] + h) )
  877.             h, g, f, e = g, f, e, NORM(d + z)
  878.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  879.             z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+5] + W[j+5] + h) )
  880.             h, g, f, e = g, f, e, NORM(d + z)
  881.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  882.             z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+6] + W[j+6] + h) )
  883.             h, g, f, e = g, f, e, NORM(d + z)
  884.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  885.             z = NORM( XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + XOR(g, AND(e, XOR(f, g))) + (K[j+7] + W[j+7] + h) )
  886.             h, g, f, e = g, f, e, NORM(d + z)
  887.             d, c, b, a = c, b, a, NORM( XOR(AND(a, XOR(b, c)), AND(b, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10)) + z )
  888.          end
  889.          H[1], H[2], H[3], H[4] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4])
  890.          H[5], H[6], H[7], H[8] = NORM(e + H[5]), NORM(f + H[6]), NORM(g + H[7]), NORM(h + H[8])
  891.       end
  892.    end
  893.  
  894.    local function ADD64_4(a_lo, a_hi, b_lo, b_hi, c_lo, c_hi, d_lo, d_hi)
  895.       local sum_lo = a_lo % 2^32 + b_lo % 2^32 + c_lo % 2^32 + d_lo % 2^32
  896.       local sum_hi = a_hi + b_hi + c_hi + d_hi
  897.       local result_lo = NORM( sum_lo )
  898.       local result_hi = NORM( sum_hi + floor(sum_lo / 2^32) )
  899.       return result_lo, result_hi
  900.    end
  901.  
  902.    if LuaJIT_arch == "x86" then  -- Special trick is required to avoid "PHI shuffling too complex" on x86 platform
  903.  
  904.  
  905.       -- SHA512 implementation for "LuaJIT x86 without FFI" branch
  906.  
  907.       function sha512_feed_128(H_lo, H_hi, str, offs, size)
  908.          -- offs >= 0, size >= 0, size is multiple of 128
  909.          -- W1_hi, W1_lo, W2_hi, W2_lo, ...   Wk_hi = W[2*k-1], Wk_lo = W[2*k]
  910.          local W, K_lo, K_hi = common_W, sha2_K_lo, sha2_K_hi
  911.          for pos = offs, offs + size - 1, 128 do
  912.             for j = 1, 16*2 do
  913.                pos = pos + 4
  914.                local a, b, c, d = byte(str, pos - 3, pos)
  915.                W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  916.             end
  917.             for jj = 17*2, 80*2, 2 do
  918.                local a_lo, a_hi = W[jj-30], W[jj-31]
  919.                local t_lo = XOR(OR(SHR(a_lo, 1), SHL(a_hi, 31)), OR(SHR(a_lo, 8), SHL(a_hi, 24)), OR(SHR(a_lo, 7), SHL(a_hi, 25)))
  920.                local t_hi = XOR(OR(SHR(a_hi, 1), SHL(a_lo, 31)), OR(SHR(a_hi, 8), SHL(a_lo, 24)), SHR(a_hi, 7))
  921.                local b_lo, b_hi = W[jj-4], W[jj-5]
  922.                local u_lo = XOR(OR(SHR(b_lo, 19), SHL(b_hi, 13)), OR(SHL(b_lo, 3), SHR(b_hi, 29)), OR(SHR(b_lo, 6), SHL(b_hi, 26)))
  923.                local u_hi = XOR(OR(SHR(b_hi, 19), SHL(b_lo, 13)), OR(SHL(b_hi, 3), SHR(b_lo, 29)), SHR(b_hi, 6))
  924.                W[jj], W[jj-1] = ADD64_4(t_lo, t_hi, u_lo, u_hi, W[jj-14], W[jj-15], W[jj-32], W[jj-33])
  925.             end
  926.             local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo = H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8]
  927.             local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi = H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8]
  928.             local zero = 0
  929.             for j = 1, 80 do
  930.                local t_lo = XOR(g_lo, AND(e_lo, XOR(f_lo, g_lo)))
  931.                local t_hi = XOR(g_hi, AND(e_hi, XOR(f_hi, g_hi)))
  932.                local u_lo = XOR(OR(SHR(e_lo, 14), SHL(e_hi, 18)), OR(SHR(e_lo, 18), SHL(e_hi, 14)), OR(SHL(e_lo, 23), SHR(e_hi, 9)))
  933.                local u_hi = XOR(OR(SHR(e_hi, 14), SHL(e_lo, 18)), OR(SHR(e_hi, 18), SHL(e_lo, 14)), OR(SHL(e_hi, 23), SHR(e_lo, 9)))
  934.                local sum_lo = u_lo % 2^32 + t_lo % 2^32 + h_lo % 2^32 + K_lo[j] + W[2*j] % 2^32
  935.                local z_lo, z_hi = NORM( sum_lo ), NORM( u_hi + t_hi + h_hi + K_hi[j] + W[2*j-1] + floor(sum_lo / 2^32) )
  936.                zero = zero + zero  -- this thick is needed to avoid "PHI shuffling too complex" due to PHIs overlap
  937.                h_lo, h_hi, g_lo, g_hi, f_lo, f_hi = OR(zero, g_lo), OR(zero, g_hi), OR(zero, f_lo), OR(zero, f_hi), OR(zero, e_lo), OR(zero, e_hi)
  938.                local sum_lo = z_lo % 2^32 + d_lo % 2^32
  939.                e_lo, e_hi = NORM( sum_lo ), NORM( z_hi + d_hi + floor(sum_lo / 2^32) )
  940.                d_lo, d_hi, c_lo, c_hi, b_lo, b_hi = OR(zero, c_lo), OR(zero, c_hi), OR(zero, b_lo), OR(zero, b_hi), OR(zero, a_lo), OR(zero, a_hi)
  941.                u_lo = XOR(OR(SHR(b_lo, 28), SHL(b_hi, 4)), OR(SHL(b_lo, 30), SHR(b_hi, 2)), OR(SHL(b_lo, 25), SHR(b_hi, 7)))
  942.                u_hi = XOR(OR(SHR(b_hi, 28), SHL(b_lo, 4)), OR(SHL(b_hi, 30), SHR(b_lo, 2)), OR(SHL(b_hi, 25), SHR(b_lo, 7)))
  943.                t_lo = OR(AND(d_lo, c_lo), AND(b_lo, XOR(d_lo, c_lo)))
  944.                t_hi = OR(AND(d_hi, c_hi), AND(b_hi, XOR(d_hi, c_hi)))
  945.                local sum_lo = z_lo % 2^32 + t_lo % 2^32 + u_lo % 2^32
  946.                a_lo, a_hi = NORM( sum_lo ), NORM( z_hi + t_hi + u_hi + floor(sum_lo / 2^32) )
  947.             end
  948.             H_lo[1], H_hi[1] = ADD64_4(H_lo[1], H_hi[1], a_lo, a_hi, 0, 0, 0, 0)
  949.             H_lo[2], H_hi[2] = ADD64_4(H_lo[2], H_hi[2], b_lo, b_hi, 0, 0, 0, 0)
  950.             H_lo[3], H_hi[3] = ADD64_4(H_lo[3], H_hi[3], c_lo, c_hi, 0, 0, 0, 0)
  951.             H_lo[4], H_hi[4] = ADD64_4(H_lo[4], H_hi[4], d_lo, d_hi, 0, 0, 0, 0)
  952.             H_lo[5], H_hi[5] = ADD64_4(H_lo[5], H_hi[5], e_lo, e_hi, 0, 0, 0, 0)
  953.             H_lo[6], H_hi[6] = ADD64_4(H_lo[6], H_hi[6], f_lo, f_hi, 0, 0, 0, 0)
  954.             H_lo[7], H_hi[7] = ADD64_4(H_lo[7], H_hi[7], g_lo, g_hi, 0, 0, 0, 0)
  955.             H_lo[8], H_hi[8] = ADD64_4(H_lo[8], H_hi[8], h_lo, h_hi, 0, 0, 0, 0)
  956.          end
  957.       end
  958.  
  959.    else  -- all platforms except x86
  960.  
  961.  
  962.       -- SHA512 implementation for "LuaJIT non-x86 without FFI" branch
  963.  
  964.       function sha512_feed_128(H_lo, H_hi, str, offs, size)
  965.          -- offs >= 0, size >= 0, size is multiple of 128
  966.          -- W1_hi, W1_lo, W2_hi, W2_lo, ...   Wk_hi = W[2*k-1], Wk_lo = W[2*k]
  967.          local W, K_lo, K_hi = common_W, sha2_K_lo, sha2_K_hi
  968.          for pos = offs, offs + size - 1, 128 do
  969.             for j = 1, 16*2 do
  970.                pos = pos + 4
  971.                local a, b, c, d = byte(str, pos - 3, pos)
  972.                W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  973.             end
  974.             for jj = 17*2, 80*2, 2 do
  975.                local a_lo, a_hi = W[jj-30], W[jj-31]
  976.                local t_lo = XOR(OR(SHR(a_lo, 1), SHL(a_hi, 31)), OR(SHR(a_lo, 8), SHL(a_hi, 24)), OR(SHR(a_lo, 7), SHL(a_hi, 25)))
  977.                local t_hi = XOR(OR(SHR(a_hi, 1), SHL(a_lo, 31)), OR(SHR(a_hi, 8), SHL(a_lo, 24)), SHR(a_hi, 7))
  978.                local b_lo, b_hi = W[jj-4], W[jj-5]
  979.                local u_lo = XOR(OR(SHR(b_lo, 19), SHL(b_hi, 13)), OR(SHL(b_lo, 3), SHR(b_hi, 29)), OR(SHR(b_lo, 6), SHL(b_hi, 26)))
  980.                local u_hi = XOR(OR(SHR(b_hi, 19), SHL(b_lo, 13)), OR(SHL(b_hi, 3), SHR(b_lo, 29)), SHR(b_hi, 6))
  981.                W[jj], W[jj-1] = ADD64_4(t_lo, t_hi, u_lo, u_hi, W[jj-14], W[jj-15], W[jj-32], W[jj-33])
  982.             end
  983.             local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo = H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8]
  984.             local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi = H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8]
  985.             for j = 1, 80 do
  986.                local t_lo = XOR(g_lo, AND(e_lo, XOR(f_lo, g_lo)))
  987.                local t_hi = XOR(g_hi, AND(e_hi, XOR(f_hi, g_hi)))
  988.                local u_lo = XOR(OR(SHR(e_lo, 14), SHL(e_hi, 18)), OR(SHR(e_lo, 18), SHL(e_hi, 14)), OR(SHL(e_lo, 23), SHR(e_hi, 9)))
  989.                local u_hi = XOR(OR(SHR(e_hi, 14), SHL(e_lo, 18)), OR(SHR(e_hi, 18), SHL(e_lo, 14)), OR(SHL(e_hi, 23), SHR(e_lo, 9)))
  990.                local sum_lo = u_lo % 2^32 + t_lo % 2^32 + h_lo % 2^32 + K_lo[j] + W[2*j] % 2^32
  991.                local z_lo, z_hi = NORM( sum_lo ), NORM( u_hi + t_hi + h_hi + K_hi[j] + W[2*j-1] + floor(sum_lo / 2^32) )
  992.                h_lo, h_hi, g_lo, g_hi, f_lo, f_hi = g_lo, g_hi, f_lo, f_hi, e_lo, e_hi
  993.                local sum_lo = z_lo % 2^32 + d_lo % 2^32
  994.                e_lo, e_hi = NORM( sum_lo ), NORM( z_hi + d_hi + floor(sum_lo / 2^32) )
  995.                d_lo, d_hi, c_lo, c_hi, b_lo, b_hi = c_lo, c_hi, b_lo, b_hi, a_lo, a_hi
  996.                u_lo = XOR(OR(SHR(b_lo, 28), SHL(b_hi, 4)), OR(SHL(b_lo, 30), SHR(b_hi, 2)), OR(SHL(b_lo, 25), SHR(b_hi, 7)))
  997.                u_hi = XOR(OR(SHR(b_hi, 28), SHL(b_lo, 4)), OR(SHL(b_hi, 30), SHR(b_lo, 2)), OR(SHL(b_hi, 25), SHR(b_lo, 7)))
  998.                t_lo = OR(AND(d_lo, c_lo), AND(b_lo, XOR(d_lo, c_lo)))
  999.                t_hi = OR(AND(d_hi, c_hi), AND(b_hi, XOR(d_hi, c_hi)))
  1000.                local sum_lo = z_lo % 2^32 + u_lo % 2^32 + t_lo % 2^32
  1001.                a_lo, a_hi = NORM( sum_lo ), NORM( z_hi + u_hi + t_hi + floor(sum_lo / 2^32) )
  1002.             end
  1003.             H_lo[1], H_hi[1] = ADD64_4(H_lo[1], H_hi[1], a_lo, a_hi, 0, 0, 0, 0)
  1004.             H_lo[2], H_hi[2] = ADD64_4(H_lo[2], H_hi[2], b_lo, b_hi, 0, 0, 0, 0)
  1005.             H_lo[3], H_hi[3] = ADD64_4(H_lo[3], H_hi[3], c_lo, c_hi, 0, 0, 0, 0)
  1006.             H_lo[4], H_hi[4] = ADD64_4(H_lo[4], H_hi[4], d_lo, d_hi, 0, 0, 0, 0)
  1007.             H_lo[5], H_hi[5] = ADD64_4(H_lo[5], H_hi[5], e_lo, e_hi, 0, 0, 0, 0)
  1008.             H_lo[6], H_hi[6] = ADD64_4(H_lo[6], H_hi[6], f_lo, f_hi, 0, 0, 0, 0)
  1009.             H_lo[7], H_hi[7] = ADD64_4(H_lo[7], H_hi[7], g_lo, g_hi, 0, 0, 0, 0)
  1010.             H_lo[8], H_hi[8] = ADD64_4(H_lo[8], H_hi[8], h_lo, h_hi, 0, 0, 0, 0)
  1011.          end
  1012.       end
  1013.  
  1014.    end
  1015.  
  1016.  
  1017.    -- MD5 implementation for "LuaJIT without FFI" branch
  1018.  
  1019.    function md5_feed_64(H, str, offs, size)
  1020.       -- offs >= 0, size >= 0, size is multiple of 64
  1021.       local W, K = common_W, md5_K
  1022.       for pos = offs, offs + size - 1, 64 do
  1023.          for j = 1, 16 do
  1024.             pos = pos + 4
  1025.             local a, b, c, d = byte(str, pos - 3, pos)
  1026.             W[j] = OR(SHL(d, 24), SHL(c, 16), SHL(b, 8), a)
  1027.          end
  1028.          local a, b, c, d = H[1], H[2], H[3], H[4]
  1029.          for j = 1, 16, 4 do
  1030.             a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j  ] + W[j  ] + a),  7) + b)
  1031.             a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+1] + W[j+1] + a), 12) + b)
  1032.             a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+2] + W[j+2] + a), 17) + b)
  1033.             a, d, c, b = d, c, b, NORM(ROL(XOR(d, AND(b, XOR(c, d))) + (K[j+3] + W[j+3] + a), 22) + b)
  1034.          end
  1035.          for j = 17, 32, 4 do
  1036.             local g = 5*j-4
  1037.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j  ] + W[AND(g     , 15) + 1] + a),  5) + b)
  1038.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+1] + W[AND(g +  5, 15) + 1] + a),  9) + b)
  1039.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+2] + W[AND(g + 10, 15) + 1] + a), 14) + b)
  1040.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, AND(d, XOR(b, c))) + (K[j+3] + W[AND(g -  1, 15) + 1] + a), 20) + b)
  1041.          end
  1042.          for j = 33, 48, 4 do
  1043.             local g = 3*j+2
  1044.             a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j  ] + W[AND(g    , 15) + 1] + a),  4) + b)
  1045.             a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+1] + W[AND(g + 3, 15) + 1] + a), 11) + b)
  1046.             a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+2] + W[AND(g + 6, 15) + 1] + a), 16) + b)
  1047.             a, d, c, b = d, c, b, NORM(ROL(XOR(b, c, d) + (K[j+3] + W[AND(g - 7, 15) + 1] + a), 23) + b)
  1048.          end
  1049.          for j = 49, 64, 4 do
  1050.             local g = j*7
  1051.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j  ] + W[AND(g - 7, 15) + 1] + a),  6) + b)
  1052.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+1] + W[AND(g    , 15) + 1] + a), 10) + b)
  1053.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+2] + W[AND(g + 7, 15) + 1] + a), 15) + b)
  1054.             a, d, c, b = d, c, b, NORM(ROL(XOR(c, OR(b, NOT(d))) + (K[j+3] + W[AND(g - 2, 15) + 1] + a), 21) + b)
  1055.          end
  1056.          H[1], H[2], H[3], H[4] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4])
  1057.       end
  1058.    end
  1059.  
  1060.  
  1061.    -- SHA-1 implementation for "LuaJIT without FFI" branch
  1062.  
  1063.    function sha1_feed_64(H, str, offs, size)
  1064.       -- offs >= 0, size >= 0, size is multiple of 64
  1065.       local W = common_W
  1066.       for pos = offs, offs + size - 1, 64 do
  1067.          for j = 1, 16 do
  1068.             pos = pos + 4
  1069.             local a, b, c, d = byte(str, pos - 3, pos)
  1070.             W[j] = OR(SHL(a, 24), SHL(b, 16), SHL(c, 8), d)
  1071.          end
  1072.          for j = 17, 80 do
  1073.             W[j] = ROL(XOR(W[j-3], W[j-8], W[j-14], W[j-16]), 1)
  1074.          end
  1075.          local a, b, c, d, e = H[1], H[2], H[3], H[4], H[5]
  1076.          for j = 1, 20, 5 do
  1077.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j]   + 0x5A827999 + e))          -- constant = floor(2^30 * sqrt(2))
  1078.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+1] + 0x5A827999 + e))
  1079.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+2] + 0x5A827999 + e))
  1080.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+3] + 0x5A827999 + e))
  1081.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(d, AND(b, XOR(d, c))) + (W[j+4] + 0x5A827999 + e))
  1082.          end
  1083.          for j = 21, 40, 5 do
  1084.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j]   + 0x6ED9EBA1 + e))                       -- 2^30 * sqrt(3)
  1085.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+1] + 0x6ED9EBA1 + e))
  1086.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+2] + 0x6ED9EBA1 + e))
  1087.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+3] + 0x6ED9EBA1 + e))
  1088.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+4] + 0x6ED9EBA1 + e))
  1089.          end
  1090.          for j = 41, 60, 5 do
  1091.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j]   + 0x8F1BBCDC + e))  -- 2^30 * sqrt(5)
  1092.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+1] + 0x8F1BBCDC + e))
  1093.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+2] + 0x8F1BBCDC + e))
  1094.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+3] + 0x8F1BBCDC + e))
  1095.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(AND(d, XOR(b, c)), AND(b, c)) + (W[j+4] + 0x8F1BBCDC + e))
  1096.          end
  1097.          for j = 61, 80, 5 do
  1098.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j]   + 0xCA62C1D6 + e))                       -- 2^30 * sqrt(10)
  1099.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+1] + 0xCA62C1D6 + e))
  1100.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+2] + 0xCA62C1D6 + e))
  1101.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+3] + 0xCA62C1D6 + e))
  1102.             e, d, c, b, a = d, c, ROR(b, 2), a, NORM(ROL(a, 5) + XOR(b, c, d) + (W[j+4] + 0xCA62C1D6 + e))
  1103.          end
  1104.          H[1], H[2], H[3], H[4], H[5] = NORM(a + H[1]), NORM(b + H[2]), NORM(c + H[3]), NORM(d + H[4]), NORM(e + H[5])
  1105.       end
  1106.    end
  1107.  
  1108. end
  1109.  
  1110.  
  1111. if branch == "INT64" then
  1112.  
  1113.  
  1114.    -- implementation for Lua 5.3/5.4
  1115.  
  1116.    hi_factor = 4294967296
  1117.    hi_factor_keccak = 4294967296
  1118.    lanes_index_base = 1
  1119.  
  1120.    HEX64, XOR64A5, XOR_BYTE, sha256_feed_64, sha512_feed_128, md5_feed_64, sha1_feed_64, keccak_feed = load[[
  1121.       local md5_next_shift, md5_K, sha2_K_lo, sha2_K_hi, build_keccak_format, sha3_RC_lo = ...
  1122.       local string_format, string_unpack = string.format, string.unpack
  1123.  
  1124.       local function HEX64(x)
  1125.          return string_format("%016x", x)
  1126.       end
  1127.  
  1128.       local function XOR64A5(x)
  1129.          return x ~ 0xa5a5a5a5a5a5a5a5
  1130.       end
  1131.  
  1132.       local function XOR_BYTE(x, y)
  1133.          return x ~ y
  1134.       end
  1135.  
  1136.       local common_W = {}
  1137.  
  1138.       local function sha256_feed_64(H, str, offs, size)
  1139.          -- offs >= 0, size >= 0, size is multiple of 64
  1140.          local W, K = common_W, sha2_K_hi
  1141.          local h1, h2, h3, h4, h5, h6, h7, h8 = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  1142.          for pos = offs + 1, offs + size, 64 do
  1143.             W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1144.                string_unpack(">I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4", str, pos)
  1145.             for j = 17, 64 do
  1146.                local a = W[j-15]
  1147.                a = a<<32 | a
  1148.                local b = W[j-2]
  1149.                b = b<<32 | b
  1150.                W[j] = (a>>7 ~ a>>18 ~ a>>35) + (b>>17 ~ b>>19 ~ b>>42) + W[j-7] + W[j-16] & (1<<32)-1
  1151.             end
  1152.             local a, b, c, d, e, f, g, h = h1, h2, h3, h4, h5, h6, h7, h8
  1153.             for j = 1, 64 do
  1154.                e = e<<32 | e & (1<<32)-1
  1155.                local z = (e>>6 ~ e>>11 ~ e>>25) + (g ~ e & (f ~ g)) + h + K[j] + W[j]
  1156.                h = g
  1157.                g = f
  1158.                f = e
  1159.                e = z + d
  1160.                d = c
  1161.                c = b
  1162.                b = a
  1163.                a = a<<32 | a & (1<<32)-1
  1164.                a = z + ((a ~ c) & d ~ a & c) + (a>>2 ~ a>>13 ~ a>>22)
  1165.             end
  1166.             h1 = a + h1
  1167.             h2 = b + h2
  1168.             h3 = c + h3
  1169.             h4 = d + h4
  1170.             h5 = e + h5
  1171.             h6 = f + h6
  1172.             h7 = g + h7
  1173.             h8 = h + h8
  1174.          end
  1175.          H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = h1, h2, h3, h4, h5, h6, h7, h8
  1176.       end
  1177.  
  1178.       local function sha512_feed_128(H, _, str, offs, size)
  1179.          -- offs >= 0, size >= 0, size is multiple of 128
  1180.          local W, K = common_W, sha2_K_lo
  1181.          local h1, h2, h3, h4, h5, h6, h7, h8 = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  1182.          for pos = offs + 1, offs + size, 128 do
  1183.             W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1184.                string_unpack(">i8i8i8i8i8i8i8i8i8i8i8i8i8i8i8i8", str, pos)
  1185.             for j = 17, 80 do
  1186.                local a = W[j-15]
  1187.                local b = W[j-2]
  1188.                W[j] = (a >> 1 ~ a >> 7 ~ a >> 8 ~ a << 56 ~ a << 63) + (b >> 6 ~ b >> 19 ~ b >> 61 ~ b << 3 ~ b << 45) + W[j-7] + W[j-16]
  1189.             end
  1190.             local a, b, c, d, e, f, g, h = h1, h2, h3, h4, h5, h6, h7, h8
  1191.             for j = 1, 80 do
  1192.                local z = (e >> 14 ~ e >> 18 ~ e >> 41 ~ e << 23 ~ e << 46 ~ e << 50) + (g ~ e & (f ~ g)) + h + K[j] + W[j]
  1193.                h = g
  1194.                g = f
  1195.                f = e
  1196.                e = z + d
  1197.                d = c
  1198.                c = b
  1199.                b = a
  1200.                a = z + ((a ~ c) & d ~ a & c) + (a >> 28 ~ a >> 34 ~ a >> 39 ~ a << 25 ~ a << 30 ~ a << 36)
  1201.             end
  1202.             h1 = a + h1
  1203.             h2 = b + h2
  1204.             h3 = c + h3
  1205.             h4 = d + h4
  1206.             h5 = e + h5
  1207.             h6 = f + h6
  1208.             h7 = g + h7
  1209.             h8 = h + h8
  1210.          end
  1211.          H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = h1, h2, h3, h4, h5, h6, h7, h8
  1212.       end
  1213.  
  1214.       local function md5_feed_64(H, str, offs, size)
  1215.          -- offs >= 0, size >= 0, size is multiple of 64
  1216.          local W, K, md5_next_shift = common_W, md5_K, md5_next_shift
  1217.          local h1, h2, h3, h4 = H[1], H[2], H[3], H[4]
  1218.          for pos = offs + 1, offs + size, 64 do
  1219.             W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1220.                string_unpack("<I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4", str, pos)
  1221.             local a, b, c, d = h1, h2, h3, h4
  1222.             local s = 32-7
  1223.             for j = 1, 16 do
  1224.                local F = (d ~ b & (c ~ d)) + a + K[j] + W[j]
  1225.                a = d
  1226.                d = c
  1227.                c = b
  1228.                b = ((F<<32 | F & (1<<32)-1) >> s) + b
  1229.                s = md5_next_shift[s]
  1230.             end
  1231.             s = 32-5
  1232.             for j = 17, 32 do
  1233.                local F = (c ~ d & (b ~ c)) + a + K[j] + W[(5*j-4 & 15) + 1]
  1234.                a = d
  1235.                d = c
  1236.                c = b
  1237.                b = ((F<<32 | F & (1<<32)-1) >> s) + b
  1238.                s = md5_next_shift[s]
  1239.             end
  1240.             s = 32-4
  1241.             for j = 33, 48 do
  1242.                local F = (b ~ c ~ d) + a + K[j] + W[(3*j+2 & 15) + 1]
  1243.                a = d
  1244.                d = c
  1245.                c = b
  1246.                b = ((F<<32 | F & (1<<32)-1) >> s) + b
  1247.                s = md5_next_shift[s]
  1248.             end
  1249.             s = 32-6
  1250.             for j = 49, 64 do
  1251.                local F = (c ~ (b | ~d)) + a + K[j] + W[(j*7-7 & 15) + 1]
  1252.                a = d
  1253.                d = c
  1254.                c = b
  1255.                b = ((F<<32 | F & (1<<32)-1) >> s) + b
  1256.                s = md5_next_shift[s]
  1257.             end
  1258.             h1 = a + h1
  1259.             h2 = b + h2
  1260.             h3 = c + h3
  1261.             h4 = d + h4
  1262.          end
  1263.          H[1], H[2], H[3], H[4] = h1, h2, h3, h4
  1264.       end
  1265.  
  1266.       local function sha1_feed_64(H, str, offs, size)
  1267.          -- offs >= 0, size >= 0, size is multiple of 64
  1268.          local W = common_W
  1269.          local h1, h2, h3, h4, h5 = H[1], H[2], H[3], H[4], H[5]
  1270.          for pos = offs + 1, offs + size, 64 do
  1271.             W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1272.                string_unpack(">I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4I4", str, pos)
  1273.             for j = 17, 80 do
  1274.                local a = W[j-3] ~ W[j-8] ~ W[j-14] ~ W[j-16]
  1275.                W[j] = (a<<32 | a) << 1 >> 32
  1276.             end
  1277.             local a, b, c, d, e = h1, h2, h3, h4, h5
  1278.             for j = 1, 20 do
  1279.                local z = ((a<<32 | a & (1<<32)-1) >> 27) + (d ~ b & (c ~ d)) + 0x5A827999 + W[j] + e      -- constant = floor(2^30 * sqrt(2))
  1280.                e = d
  1281.                d = c
  1282.                c = (b<<32 | b & (1<<32)-1) >> 2
  1283.                b = a
  1284.                a = z
  1285.             end
  1286.             for j = 21, 40 do
  1287.                local z = ((a<<32 | a & (1<<32)-1) >> 27) + (b ~ c ~ d) + 0x6ED9EBA1 + W[j] + e            -- 2^30 * sqrt(3)
  1288.                e = d
  1289.                d = c
  1290.                c = (b<<32 | b & (1<<32)-1) >> 2
  1291.                b = a
  1292.                a = z
  1293.             end
  1294.             for j = 41, 60 do
  1295.                local z = ((a<<32 | a & (1<<32)-1) >> 27) + ((b ~ c) & d ~ b & c) + 0x8F1BBCDC + W[j] + e  -- 2^30 * sqrt(5)
  1296.                e = d
  1297.                d = c
  1298.                c = (b<<32 | b & (1<<32)-1) >> 2
  1299.                b = a
  1300.                a = z
  1301.             end
  1302.             for j = 61, 80 do
  1303.                local z = ((a<<32 | a & (1<<32)-1) >> 27) + (b ~ c ~ d) + 0xCA62C1D6 + W[j] + e            -- 2^30 * sqrt(10)
  1304.                e = d
  1305.                d = c
  1306.                c = (b<<32 | b & (1<<32)-1) >> 2
  1307.                b = a
  1308.                a = z
  1309.             end
  1310.             h1 = a + h1
  1311.             h2 = b + h2
  1312.             h3 = c + h3
  1313.             h4 = d + h4
  1314.             h5 = e + h5
  1315.          end
  1316.          H[1], H[2], H[3], H[4], H[5] = h1, h2, h3, h4, h5
  1317.       end
  1318.  
  1319.       local keccak_format_i8 = build_keccak_format("i8")
  1320.  
  1321.       local function keccak_feed(lanes, _, str, offs, size, block_size_in_bytes)
  1322.          -- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
  1323.          local RC = sha3_RC_lo
  1324.          local qwords_qty = block_size_in_bytes / 8
  1325.          local keccak_format = keccak_format_i8[qwords_qty]
  1326.          for pos = offs + 1, offs + size, block_size_in_bytes do
  1327.             local qwords_from_message = {string_unpack(keccak_format, str, pos)}
  1328.             for j = 1, qwords_qty do
  1329.                lanes[j] = lanes[j] ~ qwords_from_message[j]
  1330.             end
  1331.             local L01, L02, L03, L04, L05, L06, L07, L08, L09, L10, L11, L12, L13, L14, L15, L16, L17, L18, L19, L20, L21, L22, L23, L24, L25 =
  1332.                lanes[1], lanes[2], lanes[3], lanes[4], lanes[5], lanes[6], lanes[7], lanes[8], lanes[9], lanes[10], lanes[11], lanes[12], lanes[13],
  1333.                lanes[14], lanes[15], lanes[16], lanes[17], lanes[18], lanes[19], lanes[20], lanes[21], lanes[22], lanes[23], lanes[24], lanes[25]
  1334.             for round_idx = 1, 24 do
  1335.                local C1 = L01 ~ L06 ~ L11 ~ L16 ~ L21
  1336.                local C2 = L02 ~ L07 ~ L12 ~ L17 ~ L22
  1337.                local C3 = L03 ~ L08 ~ L13 ~ L18 ~ L23
  1338.                local C4 = L04 ~ L09 ~ L14 ~ L19 ~ L24
  1339.                local C5 = L05 ~ L10 ~ L15 ~ L20 ~ L25
  1340.                local D = C1 ~ C3<<1 ~ C3>>63
  1341.                local T0 = D ~ L02
  1342.                local T1 = D ~ L07
  1343.                local T2 = D ~ L12
  1344.                local T3 = D ~ L17
  1345.                local T4 = D ~ L22
  1346.                L02 = T1<<44 ~ T1>>20
  1347.                L07 = T3<<45 ~ T3>>19
  1348.                L12 = T0<<1 ~ T0>>63
  1349.                L17 = T2<<10 ~ T2>>54
  1350.                L22 = T4<<2 ~ T4>>62
  1351.                D = C2 ~ C4<<1 ~ C4>>63
  1352.                T0 = D ~ L03
  1353.                T1 = D ~ L08
  1354.                T2 = D ~ L13
  1355.                T3 = D ~ L18
  1356.                T4 = D ~ L23
  1357.                L03 = T2<<43 ~ T2>>21
  1358.                L08 = T4<<61 ~ T4>>3
  1359.                L13 = T1<<6 ~ T1>>58
  1360.                L18 = T3<<15 ~ T3>>49
  1361.                L23 = T0<<62 ~ T0>>2
  1362.                D = C3 ~ C5<<1 ~ C5>>63
  1363.                T0 = D ~ L04
  1364.                T1 = D ~ L09
  1365.                T2 = D ~ L14
  1366.                T3 = D ~ L19
  1367.                T4 = D ~ L24
  1368.                L04 = T3<<21 ~ T3>>43
  1369.                L09 = T0<<28 ~ T0>>36
  1370.                L14 = T2<<25 ~ T2>>39
  1371.                L19 = T4<<56 ~ T4>>8
  1372.                L24 = T1<<55 ~ T1>>9
  1373.                D = C4 ~ C1<<1 ~ C1>>63
  1374.                T0 = D ~ L05
  1375.                T1 = D ~ L10
  1376.                T2 = D ~ L15
  1377.                T3 = D ~ L20
  1378.                T4 = D ~ L25
  1379.                L05 = T4<<14 ~ T4>>50
  1380.                L10 = T1<<20 ~ T1>>44
  1381.                L15 = T3<<8 ~ T3>>56
  1382.                L20 = T0<<27 ~ T0>>37
  1383.                L25 = T2<<39 ~ T2>>25
  1384.                D = C5 ~ C2<<1 ~ C2>>63
  1385.                T1 = D ~ L06
  1386.                T2 = D ~ L11
  1387.                T3 = D ~ L16
  1388.                T4 = D ~ L21
  1389.                L06 = T2<<3 ~ T2>>61
  1390.                L11 = T4<<18 ~ T4>>46
  1391.                L16 = T1<<36 ~ T1>>28
  1392.                L21 = T3<<41 ~ T3>>23
  1393.                L01 = D ~ L01
  1394.                L01, L02, L03, L04, L05 = L01 ~ ~L02 & L03, L02 ~ ~L03 & L04, L03 ~ ~L04 & L05, L04 ~ ~L05 & L01, L05 ~ ~L01 & L02
  1395.                L06, L07, L08, L09, L10 = L09 ~ ~L10 & L06, L10 ~ ~L06 & L07, L06 ~ ~L07 & L08, L07 ~ ~L08 & L09, L08 ~ ~L09 & L10
  1396.                L11, L12, L13, L14, L15 = L12 ~ ~L13 & L14, L13 ~ ~L14 & L15, L14 ~ ~L15 & L11, L15 ~ ~L11 & L12, L11 ~ ~L12 & L13
  1397.                L16, L17, L18, L19, L20 = L20 ~ ~L16 & L17, L16 ~ ~L17 & L18, L17 ~ ~L18 & L19, L18 ~ ~L19 & L20, L19 ~ ~L20 & L16
  1398.                L21, L22, L23, L24, L25 = L23 ~ ~L24 & L25, L24 ~ ~L25 & L21, L25 ~ ~L21 & L22, L21 ~ ~L22 & L23, L22 ~ ~L23 & L24
  1399.                L01 = L01 ~ RC[round_idx]
  1400.             end
  1401.             lanes[1]  = L01
  1402.             lanes[2]  = L02
  1403.             lanes[3]  = L03
  1404.             lanes[4]  = L04
  1405.             lanes[5]  = L05
  1406.             lanes[6]  = L06
  1407.             lanes[7]  = L07
  1408.             lanes[8]  = L08
  1409.             lanes[9]  = L09
  1410.             lanes[10] = L10
  1411.             lanes[11] = L11
  1412.             lanes[12] = L12
  1413.             lanes[13] = L13
  1414.             lanes[14] = L14
  1415.             lanes[15] = L15
  1416.             lanes[16] = L16
  1417.             lanes[17] = L17
  1418.             lanes[18] = L18
  1419.             lanes[19] = L19
  1420.             lanes[20] = L20
  1421.             lanes[21] = L21
  1422.             lanes[22] = L22
  1423.             lanes[23] = L23
  1424.             lanes[24] = L24
  1425.             lanes[25] = L25
  1426.          end
  1427.       end
  1428.  
  1429.       return HEX64, XOR64A5, XOR_BYTE, sha256_feed_64, sha512_feed_128, md5_feed_64, sha1_feed_64, keccak_feed
  1430.    ]](md5_next_shift, md5_K, sha2_K_lo, sha2_K_hi, build_keccak_format, sha3_RC_lo)
  1431.  
  1432. end
  1433.  
  1434.  
  1435. if branch == "INT32" then
  1436.  
  1437.  
  1438.    -- implementation for Lua 5.3/5.4 having non-standard numbers config "int32"+"double" (built with LUA_INT_TYPE=LUA_INT_INT)
  1439.  
  1440.    K_lo_modulo = 2^32
  1441.  
  1442.    function HEX(x) -- returns string of 8 lowercase hexadecimal digits
  1443.       return string_format("%08x", x)
  1444.    end
  1445.  
  1446.    XOR32A5, XOR_BYTE, sha256_feed_64, sha512_feed_128, md5_feed_64, sha1_feed_64, keccak_feed = load[[
  1447.       local md5_next_shift, md5_K, sha2_K_lo, sha2_K_hi, build_keccak_format, sha3_RC_lo, sha3_RC_hi = ...
  1448.       local string_unpack, floor = string.unpack, math.floor
  1449.  
  1450.       local function XOR32A5(x)
  1451.          return x ~ 0xA5A5A5A5
  1452.       end
  1453.  
  1454.       local function XOR_BYTE(x, y)
  1455.          return x ~ y
  1456.       end
  1457.  
  1458.       local common_W = {}
  1459.  
  1460.       local function sha256_feed_64(H, str, offs, size)
  1461.          -- offs >= 0, size >= 0, size is multiple of 64
  1462.          local W, K = common_W, sha2_K_hi
  1463.          local h1, h2, h3, h4, h5, h6, h7, h8 = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  1464.          for pos = offs + 1, offs + size, 64 do
  1465.             W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1466.                string_unpack(">i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4", str, pos)
  1467.             for j = 17, 64 do
  1468.                local a, b = W[j-15], W[j-2]
  1469.                W[j] = (a>>7 ~ a<<25 ~ a<<14 ~ a>>18 ~ a>>3) + (b<<15 ~ b>>17 ~ b<<13 ~ b>>19 ~ b>>10) + W[j-7] + W[j-16]
  1470.             end
  1471.             local a, b, c, d, e, f, g, h = h1, h2, h3, h4, h5, h6, h7, h8
  1472.             for j = 1, 64 do
  1473.                local z = (e>>6 ~ e<<26 ~ e>>11 ~ e<<21 ~ e>>25 ~ e<<7) + (g ~ e & (f ~ g)) + h + K[j] + W[j]
  1474.                h = g
  1475.                g = f
  1476.                f = e
  1477.                e = z + d
  1478.                d = c
  1479.                c = b
  1480.                b = a
  1481.                a = z + ((a ~ c) & d ~ a & c) + (a>>2 ~ a<<30 ~ a>>13 ~ a<<19 ~ a<<10 ~ a>>22)
  1482.             end
  1483.             h1 = a + h1
  1484.             h2 = b + h2
  1485.             h3 = c + h3
  1486.             h4 = d + h4
  1487.             h5 = e + h5
  1488.             h6 = f + h6
  1489.             h7 = g + h7
  1490.             h8 = h + h8
  1491.          end
  1492.          H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = h1, h2, h3, h4, h5, h6, h7, h8
  1493.       end
  1494.  
  1495.       local function sha512_feed_128(H_lo, H_hi, str, offs, size)
  1496.          -- offs >= 0, size >= 0, size is multiple of 128
  1497.          -- W1_hi, W1_lo, W2_hi, W2_lo, ...   Wk_hi = W[2*k-1], Wk_lo = W[2*k]
  1498.          local floor, W, K_lo, K_hi = floor, common_W, sha2_K_lo, sha2_K_hi
  1499.          local h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo = H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8]
  1500.          local h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi = H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8]
  1501.          for pos = offs + 1, offs + size, 128 do
  1502.             W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16],
  1503.                W[17], W[18], W[19], W[20], W[21], W[22], W[23], W[24], W[25], W[26], W[27], W[28], W[29], W[30], W[31], W[32] =
  1504.                string_unpack(">i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4", str, pos)
  1505.             for jj = 17*2, 80*2, 2 do
  1506.                local a_lo, a_hi, b_lo, b_hi = W[jj-30], W[jj-31], W[jj-4], W[jj-5]
  1507.                local tmp =
  1508.                   (a_lo>>1 ~ a_hi<<31 ~ a_lo>>8 ~ a_hi<<24 ~ a_lo>>7 ~ a_hi<<25) % 2^32
  1509.                   + (b_lo>>19 ~ b_hi<<13 ~ b_lo<<3 ~ b_hi>>29 ~ b_lo>>6 ~ b_hi<<26) % 2^32
  1510.                   + W[jj-14] % 2^32 + W[jj-32] % 2^32
  1511.                W[jj-1] =
  1512.                   (a_hi>>1 ~ a_lo<<31 ~ a_hi>>8 ~ a_lo<<24 ~ a_hi>>7)
  1513.                   + (b_hi>>19 ~ b_lo<<13 ~ b_hi<<3 ~ b_lo>>29 ~ b_hi>>6)
  1514.                   + W[jj-15] + W[jj-33] + floor(tmp / 2^32)
  1515.                W[jj] = 0|((tmp + 2^31) % 2^32 - 2^31)
  1516.             end
  1517.             local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
  1518.             local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
  1519.             for j = 1, 80 do
  1520.                local jj = 2*j
  1521.                local z_lo = (e_lo>>14 ~ e_hi<<18 ~ e_lo>>18 ~ e_hi<<14 ~ e_lo<<23 ~ e_hi>>9) % 2^32 + (g_lo ~ e_lo & (f_lo ~ g_lo)) % 2^32 + h_lo % 2^32 + K_lo[j] + W[jj] % 2^32
  1522.                local z_hi = (e_hi>>14 ~ e_lo<<18 ~ e_hi>>18 ~ e_lo<<14 ~ e_hi<<23 ~ e_lo>>9) + (g_hi ~ e_hi & (f_hi ~ g_hi)) + h_hi + K_hi[j] + W[jj-1] + floor(z_lo / 2^32)
  1523.                z_lo = z_lo % 2^32
  1524.                h_lo = g_lo
  1525.                h_hi = g_hi
  1526.                g_lo = f_lo
  1527.                g_hi = f_hi
  1528.                f_lo = e_lo
  1529.                f_hi = e_hi
  1530.                e_lo = z_lo + d_lo % 2^32
  1531.                e_hi = z_hi + d_hi + floor(e_lo / 2^32)
  1532.                e_lo = 0|((e_lo + 2^31) % 2^32 - 2^31)
  1533.                d_lo = c_lo
  1534.                d_hi = c_hi
  1535.                c_lo = b_lo
  1536.                c_hi = b_hi
  1537.                b_lo = a_lo
  1538.                b_hi = a_hi
  1539.                z_lo = z_lo + (d_lo & c_lo ~ b_lo & (d_lo ~ c_lo)) % 2^32 + (b_lo>>28 ~ b_hi<<4 ~ b_lo<<30 ~ b_hi>>2 ~ b_lo<<25 ~ b_hi>>7) % 2^32
  1540.                a_hi = z_hi + (d_hi & c_hi ~ b_hi & (d_hi ~ c_hi)) + (b_hi>>28 ~ b_lo<<4 ~ b_hi<<30 ~ b_lo>>2 ~ b_hi<<25 ~ b_lo>>7) + floor(z_lo / 2^32)
  1541.                a_lo = 0|((z_lo + 2^31) % 2^32 - 2^31)
  1542.             end
  1543.             a_lo = h1_lo % 2^32 + a_lo % 2^32
  1544.             h1_hi = h1_hi + a_hi + floor(a_lo / 2^32)
  1545.             h1_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1546.             a_lo = h2_lo % 2^32 + b_lo % 2^32
  1547.             h2_hi = h2_hi + b_hi + floor(a_lo / 2^32)
  1548.             h2_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1549.             a_lo = h3_lo % 2^32 + c_lo % 2^32
  1550.             h3_hi = h3_hi + c_hi + floor(a_lo / 2^32)
  1551.             h3_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1552.             a_lo = h4_lo % 2^32 + d_lo % 2^32
  1553.             h4_hi = h4_hi + d_hi + floor(a_lo / 2^32)
  1554.             h4_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1555.             a_lo = h5_lo % 2^32 + e_lo % 2^32
  1556.             h5_hi = h5_hi + e_hi + floor(a_lo / 2^32)
  1557.             h5_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1558.             a_lo = h6_lo % 2^32 + f_lo % 2^32
  1559.             h6_hi = h6_hi + f_hi + floor(a_lo / 2^32)
  1560.             h6_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1561.             a_lo = h7_lo % 2^32 + g_lo % 2^32
  1562.             h7_hi = h7_hi + g_hi + floor(a_lo / 2^32)
  1563.             h7_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1564.             a_lo = h8_lo % 2^32 + h_lo % 2^32
  1565.             h8_hi = h8_hi + h_hi + floor(a_lo / 2^32)
  1566.             h8_lo = 0|((a_lo + 2^31) % 2^32 - 2^31)
  1567.          end
  1568.          H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8] = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
  1569.          H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8] = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
  1570.       end
  1571.  
  1572.       local function md5_feed_64(H, str, offs, size)
  1573.          -- offs >= 0, size >= 0, size is multiple of 64
  1574.          local W, K, md5_next_shift = common_W, md5_K, md5_next_shift
  1575.          local h1, h2, h3, h4 = H[1], H[2], H[3], H[4]
  1576.          for pos = offs + 1, offs + size, 64 do
  1577.             W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1578.                string_unpack("<i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4", str, pos)
  1579.             local a, b, c, d = h1, h2, h3, h4
  1580.             local s = 32-7
  1581.             for j = 1, 16 do
  1582.                local F = (d ~ b & (c ~ d)) + a + K[j] + W[j]
  1583.                a = d
  1584.                d = c
  1585.                c = b
  1586.                b = (F << 32-s | F>>s) + b
  1587.                s = md5_next_shift[s]
  1588.             end
  1589.             s = 32-5
  1590.             for j = 17, 32 do
  1591.                local F = (c ~ d & (b ~ c)) + a + K[j] + W[(5*j-4 & 15) + 1]
  1592.                a = d
  1593.                d = c
  1594.                c = b
  1595.                b = (F << 32-s | F>>s) + b
  1596.                s = md5_next_shift[s]
  1597.             end
  1598.             s = 32-4
  1599.             for j = 33, 48 do
  1600.                local F = (b ~ c ~ d) + a + K[j] + W[(3*j+2 & 15) + 1]
  1601.                a = d
  1602.                d = c
  1603.                c = b
  1604.                b = (F << 32-s | F>>s) + b
  1605.                s = md5_next_shift[s]
  1606.             end
  1607.             s = 32-6
  1608.             for j = 49, 64 do
  1609.                local F = (c ~ (b | ~d)) + a + K[j] + W[(j*7-7 & 15) + 1]
  1610.                a = d
  1611.                d = c
  1612.                c = b
  1613.                b = (F << 32-s | F>>s) + b
  1614.                s = md5_next_shift[s]
  1615.             end
  1616.             h1 = a + h1
  1617.             h2 = b + h2
  1618.             h3 = c + h3
  1619.             h4 = d + h4
  1620.          end
  1621.          H[1], H[2], H[3], H[4] = h1, h2, h3, h4
  1622.       end
  1623.  
  1624.       local function sha1_feed_64(H, str, offs, size)
  1625.          -- offs >= 0, size >= 0, size is multiple of 64
  1626.          local W = common_W
  1627.          local h1, h2, h3, h4, h5 = H[1], H[2], H[3], H[4], H[5]
  1628.          for pos = offs + 1, offs + size, 64 do
  1629.             W[1], W[2], W[3], W[4], W[5], W[6], W[7], W[8], W[9], W[10], W[11], W[12], W[13], W[14], W[15], W[16] =
  1630.                string_unpack(">i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4i4", str, pos)
  1631.             for j = 17, 80 do
  1632.                local a = W[j-3] ~ W[j-8] ~ W[j-14] ~ W[j-16]
  1633.                W[j] = a << 1 ~ a >> 31
  1634.             end
  1635.             local a, b, c, d, e = h1, h2, h3, h4, h5
  1636.             for j = 1, 20 do
  1637.                local z = (a << 5 ~ a >> 27) + (d ~ b & (c ~ d)) + 0x5A827999 + W[j] + e      -- constant = floor(2^30 * sqrt(2))
  1638.                e = d
  1639.                d = c
  1640.                c = b << 30 ~ b >> 2
  1641.                b = a
  1642.                a = z
  1643.             end
  1644.             for j = 21, 40 do
  1645.                local z = (a << 5 ~ a >> 27) + (b ~ c ~ d) + 0x6ED9EBA1 + W[j] + e            -- 2^30 * sqrt(3)
  1646.                e = d
  1647.                d = c
  1648.                c = b << 30 ~ b >> 2
  1649.                b = a
  1650.                a = z
  1651.             end
  1652.             for j = 41, 60 do
  1653.                local z = (a << 5 ~ a >> 27) + ((b ~ c) & d ~ b & c) + 0x8F1BBCDC + W[j] + e  -- 2^30 * sqrt(5)
  1654.                e = d
  1655.                d = c
  1656.                c = b << 30 ~ b >> 2
  1657.                b = a
  1658.                a = z
  1659.             end
  1660.             for j = 61, 80 do
  1661.                local z = (a << 5 ~ a >> 27) + (b ~ c ~ d) + 0xCA62C1D6 + W[j] + e            -- 2^30 * sqrt(10)
  1662.                e = d
  1663.                d = c
  1664.                c = b << 30 ~ b >> 2
  1665.                b = a
  1666.                a = z
  1667.             end
  1668.             h1 = a + h1
  1669.             h2 = b + h2
  1670.             h3 = c + h3
  1671.             h4 = d + h4
  1672.             h5 = e + h5
  1673.          end
  1674.          H[1], H[2], H[3], H[4], H[5] = h1, h2, h3, h4, h5
  1675.       end
  1676.  
  1677.       local keccak_format_i4i4 = build_keccak_format("i4i4")
  1678.  
  1679.       local function keccak_feed(lanes_lo, lanes_hi, str, offs, size, block_size_in_bytes)
  1680.          -- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
  1681.          local RC_lo, RC_hi = sha3_RC_lo, sha3_RC_hi
  1682.          local qwords_qty = block_size_in_bytes / 8
  1683.          local keccak_format = keccak_format_i4i4[qwords_qty]
  1684.          for pos = offs + 1, offs + size, block_size_in_bytes do
  1685.             local dwords_from_message = {string_unpack(keccak_format, str, pos)}
  1686.             for j = 1, qwords_qty do
  1687.                lanes_lo[j] = lanes_lo[j] ~ dwords_from_message[2*j-1]
  1688.                lanes_hi[j] = lanes_hi[j] ~ dwords_from_message[2*j]
  1689.             end
  1690.             local L01_lo, L01_hi, L02_lo, L02_hi, L03_lo, L03_hi, L04_lo, L04_hi, L05_lo, L05_hi, L06_lo, L06_hi, L07_lo, L07_hi, L08_lo, L08_hi,
  1691.                L09_lo, L09_hi, L10_lo, L10_hi, L11_lo, L11_hi, L12_lo, L12_hi, L13_lo, L13_hi, L14_lo, L14_hi, L15_lo, L15_hi, L16_lo, L16_hi,
  1692.                L17_lo, L17_hi, L18_lo, L18_hi, L19_lo, L19_hi, L20_lo, L20_hi, L21_lo, L21_hi, L22_lo, L22_hi, L23_lo, L23_hi, L24_lo, L24_hi, L25_lo, L25_hi =
  1693.                lanes_lo[1], lanes_hi[1], lanes_lo[2], lanes_hi[2], lanes_lo[3], lanes_hi[3], lanes_lo[4], lanes_hi[4], lanes_lo[5], lanes_hi[5],
  1694.                lanes_lo[6], lanes_hi[6], lanes_lo[7], lanes_hi[7], lanes_lo[8], lanes_hi[8], lanes_lo[9], lanes_hi[9], lanes_lo[10], lanes_hi[10],
  1695.                lanes_lo[11], lanes_hi[11], lanes_lo[12], lanes_hi[12], lanes_lo[13], lanes_hi[13], lanes_lo[14], lanes_hi[14], lanes_lo[15], lanes_hi[15],
  1696.                lanes_lo[16], lanes_hi[16], lanes_lo[17], lanes_hi[17], lanes_lo[18], lanes_hi[18], lanes_lo[19], lanes_hi[19], lanes_lo[20], lanes_hi[20],
  1697.                lanes_lo[21], lanes_hi[21], lanes_lo[22], lanes_hi[22], lanes_lo[23], lanes_hi[23], lanes_lo[24], lanes_hi[24], lanes_lo[25], lanes_hi[25]
  1698.             for round_idx = 1, 24 do
  1699.                local C1_lo = L01_lo ~ L06_lo ~ L11_lo ~ L16_lo ~ L21_lo
  1700.                local C1_hi = L01_hi ~ L06_hi ~ L11_hi ~ L16_hi ~ L21_hi
  1701.                local C2_lo = L02_lo ~ L07_lo ~ L12_lo ~ L17_lo ~ L22_lo
  1702.                local C2_hi = L02_hi ~ L07_hi ~ L12_hi ~ L17_hi ~ L22_hi
  1703.                local C3_lo = L03_lo ~ L08_lo ~ L13_lo ~ L18_lo ~ L23_lo
  1704.                local C3_hi = L03_hi ~ L08_hi ~ L13_hi ~ L18_hi ~ L23_hi
  1705.                local C4_lo = L04_lo ~ L09_lo ~ L14_lo ~ L19_lo ~ L24_lo
  1706.                local C4_hi = L04_hi ~ L09_hi ~ L14_hi ~ L19_hi ~ L24_hi
  1707.                local C5_lo = L05_lo ~ L10_lo ~ L15_lo ~ L20_lo ~ L25_lo
  1708.                local C5_hi = L05_hi ~ L10_hi ~ L15_hi ~ L20_hi ~ L25_hi
  1709.                local D_lo = C1_lo ~ C3_lo<<1 ~ C3_hi>>31
  1710.                local D_hi = C1_hi ~ C3_hi<<1 ~ C3_lo>>31
  1711.                local T0_lo = D_lo ~ L02_lo
  1712.                local T0_hi = D_hi ~ L02_hi
  1713.                local T1_lo = D_lo ~ L07_lo
  1714.                local T1_hi = D_hi ~ L07_hi
  1715.                local T2_lo = D_lo ~ L12_lo
  1716.                local T2_hi = D_hi ~ L12_hi
  1717.                local T3_lo = D_lo ~ L17_lo
  1718.                local T3_hi = D_hi ~ L17_hi
  1719.                local T4_lo = D_lo ~ L22_lo
  1720.                local T4_hi = D_hi ~ L22_hi
  1721.                L02_lo = T1_lo>>20 ~ T1_hi<<12
  1722.                L02_hi = T1_hi>>20 ~ T1_lo<<12
  1723.                L07_lo = T3_lo>>19 ~ T3_hi<<13
  1724.                L07_hi = T3_hi>>19 ~ T3_lo<<13
  1725.                L12_lo = T0_lo<<1 ~ T0_hi>>31
  1726.                L12_hi = T0_hi<<1 ~ T0_lo>>31
  1727.                L17_lo = T2_lo<<10 ~ T2_hi>>22
  1728.                L17_hi = T2_hi<<10 ~ T2_lo>>22
  1729.                L22_lo = T4_lo<<2 ~ T4_hi>>30
  1730.                L22_hi = T4_hi<<2 ~ T4_lo>>30
  1731.                D_lo = C2_lo ~ C4_lo<<1 ~ C4_hi>>31
  1732.                D_hi = C2_hi ~ C4_hi<<1 ~ C4_lo>>31
  1733.                T0_lo = D_lo ~ L03_lo
  1734.                T0_hi = D_hi ~ L03_hi
  1735.                T1_lo = D_lo ~ L08_lo
  1736.                T1_hi = D_hi ~ L08_hi
  1737.                T2_lo = D_lo ~ L13_lo
  1738.                T2_hi = D_hi ~ L13_hi
  1739.                T3_lo = D_lo ~ L18_lo
  1740.                T3_hi = D_hi ~ L18_hi
  1741.                T4_lo = D_lo ~ L23_lo
  1742.                T4_hi = D_hi ~ L23_hi
  1743.                L03_lo = T2_lo>>21 ~ T2_hi<<11
  1744.                L03_hi = T2_hi>>21 ~ T2_lo<<11
  1745.                L08_lo = T4_lo>>3 ~ T4_hi<<29
  1746.                L08_hi = T4_hi>>3 ~ T4_lo<<29
  1747.                L13_lo = T1_lo<<6 ~ T1_hi>>26
  1748.                L13_hi = T1_hi<<6 ~ T1_lo>>26
  1749.                L18_lo = T3_lo<<15 ~ T3_hi>>17
  1750.                L18_hi = T3_hi<<15 ~ T3_lo>>17
  1751.                L23_lo = T0_lo>>2 ~ T0_hi<<30
  1752.                L23_hi = T0_hi>>2 ~ T0_lo<<30
  1753.                D_lo = C3_lo ~ C5_lo<<1 ~ C5_hi>>31
  1754.                D_hi = C3_hi ~ C5_hi<<1 ~ C5_lo>>31
  1755.                T0_lo = D_lo ~ L04_lo
  1756.                T0_hi = D_hi ~ L04_hi
  1757.                T1_lo = D_lo ~ L09_lo
  1758.                T1_hi = D_hi ~ L09_hi
  1759.                T2_lo = D_lo ~ L14_lo
  1760.                T2_hi = D_hi ~ L14_hi
  1761.                T3_lo = D_lo ~ L19_lo
  1762.                T3_hi = D_hi ~ L19_hi
  1763.                T4_lo = D_lo ~ L24_lo
  1764.                T4_hi = D_hi ~ L24_hi
  1765.                L04_lo = T3_lo<<21 ~ T3_hi>>11
  1766.                L04_hi = T3_hi<<21 ~ T3_lo>>11
  1767.                L09_lo = T0_lo<<28 ~ T0_hi>>4
  1768.                L09_hi = T0_hi<<28 ~ T0_lo>>4
  1769.                L14_lo = T2_lo<<25 ~ T2_hi>>7
  1770.                L14_hi = T2_hi<<25 ~ T2_lo>>7
  1771.                L19_lo = T4_lo>>8 ~ T4_hi<<24
  1772.                L19_hi = T4_hi>>8 ~ T4_lo<<24
  1773.                L24_lo = T1_lo>>9 ~ T1_hi<<23
  1774.                L24_hi = T1_hi>>9 ~ T1_lo<<23
  1775.                D_lo = C4_lo ~ C1_lo<<1 ~ C1_hi>>31
  1776.                D_hi = C4_hi ~ C1_hi<<1 ~ C1_lo>>31
  1777.                T0_lo = D_lo ~ L05_lo
  1778.                T0_hi = D_hi ~ L05_hi
  1779.                T1_lo = D_lo ~ L10_lo
  1780.                T1_hi = D_hi ~ L10_hi
  1781.                T2_lo = D_lo ~ L15_lo
  1782.                T2_hi = D_hi ~ L15_hi
  1783.                T3_lo = D_lo ~ L20_lo
  1784.                T3_hi = D_hi ~ L20_hi
  1785.                T4_lo = D_lo ~ L25_lo
  1786.                T4_hi = D_hi ~ L25_hi
  1787.                L05_lo = T4_lo<<14 ~ T4_hi>>18
  1788.                L05_hi = T4_hi<<14 ~ T4_lo>>18
  1789.                L10_lo = T1_lo<<20 ~ T1_hi>>12
  1790.                L10_hi = T1_hi<<20 ~ T1_lo>>12
  1791.                L15_lo = T3_lo<<8 ~ T3_hi>>24
  1792.                L15_hi = T3_hi<<8 ~ T3_lo>>24
  1793.                L20_lo = T0_lo<<27 ~ T0_hi>>5
  1794.                L20_hi = T0_hi<<27 ~ T0_lo>>5
  1795.                L25_lo = T2_lo>>25 ~ T2_hi<<7
  1796.                L25_hi = T2_hi>>25 ~ T2_lo<<7
  1797.                D_lo = C5_lo ~ C2_lo<<1 ~ C2_hi>>31
  1798.                D_hi = C5_hi ~ C2_hi<<1 ~ C2_lo>>31
  1799.                T1_lo = D_lo ~ L06_lo
  1800.                T1_hi = D_hi ~ L06_hi
  1801.                T2_lo = D_lo ~ L11_lo
  1802.                T2_hi = D_hi ~ L11_hi
  1803.                T3_lo = D_lo ~ L16_lo
  1804.                T3_hi = D_hi ~ L16_hi
  1805.                T4_lo = D_lo ~ L21_lo
  1806.                T4_hi = D_hi ~ L21_hi
  1807.                L06_lo = T2_lo<<3 ~ T2_hi>>29
  1808.                L06_hi = T2_hi<<3 ~ T2_lo>>29
  1809.                L11_lo = T4_lo<<18 ~ T4_hi>>14
  1810.                L11_hi = T4_hi<<18 ~ T4_lo>>14
  1811.                L16_lo = T1_lo>>28 ~ T1_hi<<4
  1812.                L16_hi = T1_hi>>28 ~ T1_lo<<4
  1813.                L21_lo = T3_lo>>23 ~ T3_hi<<9
  1814.                L21_hi = T3_hi>>23 ~ T3_lo<<9
  1815.                L01_lo = D_lo ~ L01_lo
  1816.                L01_hi = D_hi ~ L01_hi
  1817.                L01_lo, L02_lo, L03_lo, L04_lo, L05_lo = L01_lo ~ ~L02_lo & L03_lo, L02_lo ~ ~L03_lo & L04_lo, L03_lo ~ ~L04_lo & L05_lo, L04_lo ~ ~L05_lo & L01_lo, L05_lo ~ ~L01_lo & L02_lo
  1818.                L01_hi, L02_hi, L03_hi, L04_hi, L05_hi = L01_hi ~ ~L02_hi & L03_hi, L02_hi ~ ~L03_hi & L04_hi, L03_hi ~ ~L04_hi & L05_hi, L04_hi ~ ~L05_hi & L01_hi, L05_hi ~ ~L01_hi & L02_hi
  1819.                L06_lo, L07_lo, L08_lo, L09_lo, L10_lo = L09_lo ~ ~L10_lo & L06_lo, L10_lo ~ ~L06_lo & L07_lo, L06_lo ~ ~L07_lo & L08_lo, L07_lo ~ ~L08_lo & L09_lo, L08_lo ~ ~L09_lo & L10_lo
  1820.                L06_hi, L07_hi, L08_hi, L09_hi, L10_hi = L09_hi ~ ~L10_hi & L06_hi, L10_hi ~ ~L06_hi & L07_hi, L06_hi ~ ~L07_hi & L08_hi, L07_hi ~ ~L08_hi & L09_hi, L08_hi ~ ~L09_hi & L10_hi
  1821.                L11_lo, L12_lo, L13_lo, L14_lo, L15_lo = L12_lo ~ ~L13_lo & L14_lo, L13_lo ~ ~L14_lo & L15_lo, L14_lo ~ ~L15_lo & L11_lo, L15_lo ~ ~L11_lo & L12_lo, L11_lo ~ ~L12_lo & L13_lo
  1822.                L11_hi, L12_hi, L13_hi, L14_hi, L15_hi = L12_hi ~ ~L13_hi & L14_hi, L13_hi ~ ~L14_hi & L15_hi, L14_hi ~ ~L15_hi & L11_hi, L15_hi ~ ~L11_hi & L12_hi, L11_hi ~ ~L12_hi & L13_hi
  1823.                L16_lo, L17_lo, L18_lo, L19_lo, L20_lo = L20_lo ~ ~L16_lo & L17_lo, L16_lo ~ ~L17_lo & L18_lo, L17_lo ~ ~L18_lo & L19_lo, L18_lo ~ ~L19_lo & L20_lo, L19_lo ~ ~L20_lo & L16_lo
  1824.                L16_hi, L17_hi, L18_hi, L19_hi, L20_hi = L20_hi ~ ~L16_hi & L17_hi, L16_hi ~ ~L17_hi & L18_hi, L17_hi ~ ~L18_hi & L19_hi, L18_hi ~ ~L19_hi & L20_hi, L19_hi ~ ~L20_hi & L16_hi
  1825.                L21_lo, L22_lo, L23_lo, L24_lo, L25_lo = L23_lo ~ ~L24_lo & L25_lo, L24_lo ~ ~L25_lo & L21_lo, L25_lo ~ ~L21_lo & L22_lo, L21_lo ~ ~L22_lo & L23_lo, L22_lo ~ ~L23_lo & L24_lo
  1826.                L21_hi, L22_hi, L23_hi, L24_hi, L25_hi = L23_hi ~ ~L24_hi & L25_hi, L24_hi ~ ~L25_hi & L21_hi, L25_hi ~ ~L21_hi & L22_hi, L21_hi ~ ~L22_hi & L23_hi, L22_hi ~ ~L23_hi & L24_hi
  1827.                L01_lo = L01_lo ~ RC_lo[round_idx]
  1828.                L01_hi = L01_hi ~ RC_hi[round_idx]
  1829.             end
  1830.             lanes_lo[1]  = L01_lo
  1831.             lanes_hi[1]  = L01_hi
  1832.             lanes_lo[2]  = L02_lo
  1833.             lanes_hi[2]  = L02_hi
  1834.             lanes_lo[3]  = L03_lo
  1835.             lanes_hi[3]  = L03_hi
  1836.             lanes_lo[4]  = L04_lo
  1837.             lanes_hi[4]  = L04_hi
  1838.             lanes_lo[5]  = L05_lo
  1839.             lanes_hi[5]  = L05_hi
  1840.             lanes_lo[6]  = L06_lo
  1841.             lanes_hi[6]  = L06_hi
  1842.             lanes_lo[7]  = L07_lo
  1843.             lanes_hi[7]  = L07_hi
  1844.             lanes_lo[8]  = L08_lo
  1845.             lanes_hi[8]  = L08_hi
  1846.             lanes_lo[9]  = L09_lo
  1847.             lanes_hi[9]  = L09_hi
  1848.             lanes_lo[10] = L10_lo
  1849.             lanes_hi[10] = L10_hi
  1850.             lanes_lo[11] = L11_lo
  1851.             lanes_hi[11] = L11_hi
  1852.             lanes_lo[12] = L12_lo
  1853.             lanes_hi[12] = L12_hi
  1854.             lanes_lo[13] = L13_lo
  1855.             lanes_hi[13] = L13_hi
  1856.             lanes_lo[14] = L14_lo
  1857.             lanes_hi[14] = L14_hi
  1858.             lanes_lo[15] = L15_lo
  1859.             lanes_hi[15] = L15_hi
  1860.             lanes_lo[16] = L16_lo
  1861.             lanes_hi[16] = L16_hi
  1862.             lanes_lo[17] = L17_lo
  1863.             lanes_hi[17] = L17_hi
  1864.             lanes_lo[18] = L18_lo
  1865.             lanes_hi[18] = L18_hi
  1866.             lanes_lo[19] = L19_lo
  1867.             lanes_hi[19] = L19_hi
  1868.             lanes_lo[20] = L20_lo
  1869.             lanes_hi[20] = L20_hi
  1870.             lanes_lo[21] = L21_lo
  1871.             lanes_hi[21] = L21_hi
  1872.             lanes_lo[22] = L22_lo
  1873.             lanes_hi[22] = L22_hi
  1874.             lanes_lo[23] = L23_lo
  1875.             lanes_hi[23] = L23_hi
  1876.             lanes_lo[24] = L24_lo
  1877.             lanes_hi[24] = L24_hi
  1878.             lanes_lo[25] = L25_lo
  1879.             lanes_hi[25] = L25_hi
  1880.          end
  1881.       end
  1882.  
  1883.       return XOR32A5, XOR_BYTE, sha256_feed_64, sha512_feed_128, md5_feed_64, sha1_feed_64, keccak_feed
  1884.    ]](md5_next_shift, md5_K, sha2_K_lo, sha2_K_hi, build_keccak_format, sha3_RC_lo, sha3_RC_hi)
  1885.  
  1886. end
  1887.  
  1888.  
  1889. if branch == "LIB32" or branch == "EMUL" then
  1890.  
  1891.  
  1892.    -- implementation for Lua 5.1/5.2 (with or without bitwise library available)
  1893.  
  1894.    function sha256_feed_64(H, str, offs, size)
  1895.       -- offs >= 0, size >= 0, size is multiple of 64
  1896.       local W, K = common_W, sha2_K_hi
  1897.       local h1, h2, h3, h4, h5, h6, h7, h8 = H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8]
  1898.       for pos = offs, offs + size - 1, 64 do
  1899.          for j = 1, 16 do
  1900.             pos = pos + 4
  1901.             local a, b, c, d = byte(str, pos - 3, pos)
  1902.             W[j] = ((a * 256 + b) * 256 + c) * 256 + d
  1903.          end
  1904.          for j = 17, 64 do
  1905.             local a, b = W[j-15], W[j-2]
  1906.             W[j] = XOR(ROR(a, 7), ROL(a, 14), SHR(a, 3)) + XOR(ROL(b, 15), ROL(b, 13), SHR(b, 10)) + W[j-7] + W[j-16]
  1907.          end
  1908.          local a, b, c, d, e, f, g, h = h1, h2, h3, h4, h5, h6, h7, h8
  1909.          for j = 1, 64 do
  1910.             local z = XOR(ROR(e, 6), ROR(e, 11), ROL(e, 7)) + AND(e, f) + AND(-1-e, g) + h + K[j] + W[j]
  1911.             h = g
  1912.             g = f
  1913.             f = e
  1914.             e = z + d
  1915.             d = c
  1916.             c = b
  1917.             b = a
  1918.             a = z + AND(d, c) + AND(a, XOR(d, c)) + XOR(ROR(a, 2), ROR(a, 13), ROL(a, 10))
  1919.          end
  1920.          h1, h2, h3, h4 = (a + h1) % 4294967296, (b + h2) % 4294967296, (c + h3) % 4294967296, (d + h4) % 4294967296
  1921.          h5, h6, h7, h8 = (e + h5) % 4294967296, (f + h6) % 4294967296, (g + h7) % 4294967296, (h + h8) % 4294967296
  1922.       end
  1923.       H[1], H[2], H[3], H[4], H[5], H[6], H[7], H[8] = h1, h2, h3, h4, h5, h6, h7, h8
  1924.    end
  1925.  
  1926.    function sha512_feed_128(H_lo, H_hi, str, offs, size)
  1927.       -- offs >= 0, size >= 0, size is multiple of 128
  1928.       -- W1_hi, W1_lo, W2_hi, W2_lo, ...   Wk_hi = W[2*k-1], Wk_lo = W[2*k]
  1929.       local W, K_lo, K_hi = common_W, sha2_K_lo, sha2_K_hi
  1930.       local h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo = H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8]
  1931.       local h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi = H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8]
  1932.       for pos = offs, offs + size - 1, 128 do
  1933.          for j = 1, 16*2 do
  1934.             pos = pos + 4
  1935.             local a, b, c, d = byte(str, pos - 3, pos)
  1936.             W[j] = ((a * 256 + b) * 256 + c) * 256 + d
  1937.          end
  1938.          for jj = 17*2, 80*2, 2 do
  1939.             local a_lo, a_hi, b_lo, b_hi = W[jj-30], W[jj-31], W[jj-4], W[jj-5]
  1940.             local tmp1 = XOR(SHR(a_lo, 1) + SHL(a_hi, 31), SHR(a_lo, 8) + SHL(a_hi, 24), SHR(a_lo, 7) + SHL(a_hi, 25)) % 4294967296 + XOR(SHR(b_lo, 19) + SHL(b_hi, 13), SHL(b_lo, 3) + SHR(b_hi, 29), SHR(b_lo, 6) + SHL(b_hi, 26)) % 4294967296 + W[jj-14] + W[jj-32]
  1941.             local tmp2 = tmp1 % 4294967296
  1942.             W[jj-1] = XOR(SHR(a_hi, 1) + SHL(a_lo, 31), SHR(a_hi, 8) + SHL(a_lo, 24), SHR(a_hi, 7)) + XOR(SHR(b_hi, 19) + SHL(b_lo, 13), SHL(b_hi, 3) + SHR(b_lo, 29), SHR(b_hi, 6)) + W[jj-15] + W[jj-33] + (tmp1 - tmp2) / 4294967296
  1943.             W[jj] = tmp2
  1944.          end
  1945.          local a_lo, b_lo, c_lo, d_lo, e_lo, f_lo, g_lo, h_lo = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
  1946.          local a_hi, b_hi, c_hi, d_hi, e_hi, f_hi, g_hi, h_hi = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
  1947.          for j = 1, 80 do
  1948.             local jj = 2*j
  1949.             local tmp1 = XOR(SHR(e_lo, 14) + SHL(e_hi, 18), SHR(e_lo, 18) + SHL(e_hi, 14), SHL(e_lo, 23) + SHR(e_hi, 9)) % 4294967296 + (AND(e_lo, f_lo) + AND(-1-e_lo, g_lo)) % 4294967296 + h_lo + K_lo[j] + W[jj]
  1950.             local z_lo = tmp1 % 4294967296
  1951.             local z_hi = XOR(SHR(e_hi, 14) + SHL(e_lo, 18), SHR(e_hi, 18) + SHL(e_lo, 14), SHL(e_hi, 23) + SHR(e_lo, 9)) + AND(e_hi, f_hi) + AND(-1-e_hi, g_hi) + h_hi + K_hi[j] + W[jj-1] + (tmp1 - z_lo) / 4294967296
  1952.             h_lo = g_lo
  1953.             h_hi = g_hi
  1954.             g_lo = f_lo
  1955.             g_hi = f_hi
  1956.             f_lo = e_lo
  1957.             f_hi = e_hi
  1958.             tmp1 = z_lo + d_lo
  1959.             e_lo = tmp1 % 4294967296
  1960.             e_hi = z_hi + d_hi + (tmp1 - e_lo) / 4294967296
  1961.             d_lo = c_lo
  1962.             d_hi = c_hi
  1963.             c_lo = b_lo
  1964.             c_hi = b_hi
  1965.             b_lo = a_lo
  1966.             b_hi = a_hi
  1967.             tmp1 = z_lo + (AND(d_lo, c_lo) + AND(b_lo, XOR(d_lo, c_lo))) % 4294967296 + XOR(SHR(b_lo, 28) + SHL(b_hi, 4), SHL(b_lo, 30) + SHR(b_hi, 2), SHL(b_lo, 25) + SHR(b_hi, 7)) % 4294967296
  1968.             a_lo = tmp1 % 4294967296
  1969.             a_hi = z_hi + (AND(d_hi, c_hi) + AND(b_hi, XOR(d_hi, c_hi))) + XOR(SHR(b_hi, 28) + SHL(b_lo, 4), SHL(b_hi, 30) + SHR(b_lo, 2), SHL(b_hi, 25) + SHR(b_lo, 7)) + (tmp1 - a_lo) / 4294967296
  1970.          end
  1971.          a_lo = h1_lo + a_lo
  1972.          h1_lo = a_lo % 4294967296
  1973.          h1_hi = (h1_hi + a_hi + (a_lo - h1_lo) / 4294967296) % 4294967296
  1974.          a_lo = h2_lo + b_lo
  1975.          h2_lo = a_lo % 4294967296
  1976.          h2_hi = (h2_hi + b_hi + (a_lo - h2_lo) / 4294967296) % 4294967296
  1977.          a_lo = h3_lo + c_lo
  1978.          h3_lo = a_lo % 4294967296
  1979.          h3_hi = (h3_hi + c_hi + (a_lo - h3_lo) / 4294967296) % 4294967296
  1980.          a_lo = h4_lo + d_lo
  1981.          h4_lo = a_lo % 4294967296
  1982.          h4_hi = (h4_hi + d_hi + (a_lo - h4_lo) / 4294967296) % 4294967296
  1983.          a_lo = h5_lo + e_lo
  1984.          h5_lo = a_lo % 4294967296
  1985.          h5_hi = (h5_hi + e_hi + (a_lo - h5_lo) / 4294967296) % 4294967296
  1986.          a_lo = h6_lo + f_lo
  1987.          h6_lo = a_lo % 4294967296
  1988.          h6_hi = (h6_hi + f_hi + (a_lo - h6_lo) / 4294967296) % 4294967296
  1989.          a_lo = h7_lo + g_lo
  1990.          h7_lo = a_lo % 4294967296
  1991.          h7_hi = (h7_hi + g_hi + (a_lo - h7_lo) / 4294967296) % 4294967296
  1992.          a_lo = h8_lo + h_lo
  1993.          h8_lo = a_lo % 4294967296
  1994.          h8_hi = (h8_hi + h_hi + (a_lo - h8_lo) / 4294967296) % 4294967296
  1995.       end
  1996.       H_lo[1], H_lo[2], H_lo[3], H_lo[4], H_lo[5], H_lo[6], H_lo[7], H_lo[8] = h1_lo, h2_lo, h3_lo, h4_lo, h5_lo, h6_lo, h7_lo, h8_lo
  1997.       H_hi[1], H_hi[2], H_hi[3], H_hi[4], H_hi[5], H_hi[6], H_hi[7], H_hi[8] = h1_hi, h2_hi, h3_hi, h4_hi, h5_hi, h6_hi, h7_hi, h8_hi
  1998.    end
  1999.  
  2000.    function md5_feed_64(H, str, offs, size)
  2001.       -- offs >= 0, size >= 0, size is multiple of 64
  2002.       local W, K, md5_next_shift = common_W, md5_K, md5_next_shift
  2003.       local h1, h2, h3, h4 = H[1], H[2], H[3], H[4]
  2004.       for pos = offs, offs + size - 1, 64 do
  2005.          for j = 1, 16 do
  2006.             pos = pos + 4
  2007.             local a, b, c, d = byte(str, pos - 3, pos)
  2008.             W[j] = ((d * 256 + c) * 256 + b) * 256 + a
  2009.          end
  2010.          local a, b, c, d = h1, h2, h3, h4
  2011.          local s = 32-7
  2012.          for j = 1, 16 do
  2013.             local F = ROR(AND(b, c) + AND(-1-b, d) + a + K[j] + W[j], s) + b
  2014.             s = md5_next_shift[s]
  2015.             a = d
  2016.             d = c
  2017.             c = b
  2018.             b = F
  2019.          end
  2020.          s = 32-5
  2021.          for j = 17, 32 do
  2022.             local F = ROR(AND(d, b) + AND(-1-d, c) + a + K[j] + W[(5*j-4) % 16 + 1], s) + b
  2023.             s = md5_next_shift[s]
  2024.             a = d
  2025.             d = c
  2026.             c = b
  2027.             b = F
  2028.          end
  2029.          s = 32-4
  2030.          for j = 33, 48 do
  2031.             local F = ROR(XOR(XOR(b, c), d) + a + K[j] + W[(3*j+2) % 16 + 1], s) + b
  2032.             s = md5_next_shift[s]
  2033.             a = d
  2034.             d = c
  2035.             c = b
  2036.             b = F
  2037.          end
  2038.          s = 32-6
  2039.          for j = 49, 64 do
  2040.             local F = ROR(XOR(c, OR(b, -1-d)) + a + K[j] + W[(j*7-7) % 16 + 1], s) + b
  2041.             s = md5_next_shift[s]
  2042.             a = d
  2043.             d = c
  2044.             c = b
  2045.             b = F
  2046.          end
  2047.          h1 = (a + h1) % 4294967296
  2048.          h2 = (b + h2) % 4294967296
  2049.          h3 = (c + h3) % 4294967296
  2050.          h4 = (d + h4) % 4294967296
  2051.       end
  2052.       H[1], H[2], H[3], H[4] = h1, h2, h3, h4
  2053.    end
  2054.  
  2055.    function sha1_feed_64(H, str, offs, size)
  2056.       -- offs >= 0, size >= 0, size is multiple of 64
  2057.       local W = common_W
  2058.       local h1, h2, h3, h4, h5 = H[1], H[2], H[3], H[4], H[5]
  2059.       for pos = offs, offs + size - 1, 64 do
  2060.          for j = 1, 16 do
  2061.             pos = pos + 4
  2062.             local a, b, c, d = byte(str, pos - 3, pos)
  2063.             W[j] = ((a * 256 + b) * 256 + c) * 256 + d
  2064.          end
  2065.          for j = 17, 80 do
  2066.             W[j] = ROL(XOR(W[j-3], W[j-8], W[j-14], W[j-16]), 1)
  2067.          end
  2068.          local a, b, c, d, e = h1, h2, h3, h4, h5
  2069.          for j = 1, 20 do
  2070.             local z = ROL(a, 5) + AND(b, c) + AND(-1-b, d) + 0x5A827999 + W[j] + e        -- constant = floor(2^30 * sqrt(2))
  2071.             e = d
  2072.             d = c
  2073.             c = ROR(b, 2)
  2074.             b = a
  2075.             a = z
  2076.          end
  2077.          for j = 21, 40 do
  2078.             local z = ROL(a, 5) + XOR(b, c, d) + 0x6ED9EBA1 + W[j] + e                    -- 2^30 * sqrt(3)
  2079.             e = d
  2080.             d = c
  2081.             c = ROR(b, 2)
  2082.             b = a
  2083.             a = z
  2084.          end
  2085.          for j = 41, 60 do
  2086.             local z = ROL(a, 5) + AND(d, c) + AND(b, XOR(d, c)) + 0x8F1BBCDC + W[j] + e   -- 2^30 * sqrt(5)
  2087.             e = d
  2088.             d = c
  2089.             c = ROR(b, 2)
  2090.             b = a
  2091.             a = z
  2092.          end
  2093.          for j = 61, 80 do
  2094.             local z = ROL(a, 5) + XOR(b, c, d) + 0xCA62C1D6 + W[j] + e                    -- 2^30 * sqrt(10)
  2095.             e = d
  2096.             d = c
  2097.             c = ROR(b, 2)
  2098.             b = a
  2099.             a = z
  2100.          end
  2101.          h1 = (a + h1) % 4294967296
  2102.          h2 = (b + h2) % 4294967296
  2103.          h3 = (c + h3) % 4294967296
  2104.          h4 = (d + h4) % 4294967296
  2105.          h5 = (e + h5) % 4294967296
  2106.       end
  2107.       H[1], H[2], H[3], H[4], H[5] = h1, h2, h3, h4, h5
  2108.    end
  2109.  
  2110.    function keccak_feed(lanes_lo, lanes_hi, str, offs, size, block_size_in_bytes)
  2111.       -- This is an example of a Lua function having 79 local variables :-)
  2112.       -- offs >= 0, size >= 0, size is multiple of block_size_in_bytes, block_size_in_bytes is positive multiple of 8
  2113.       local RC_lo, RC_hi = sha3_RC_lo, sha3_RC_hi
  2114.       local qwords_qty = block_size_in_bytes / 8
  2115.       for pos = offs, offs + size - 1, block_size_in_bytes do
  2116.          for j = 1, qwords_qty do
  2117.             local a, b, c, d = byte(str, pos + 1, pos + 4)
  2118.             lanes_lo[j] = XOR(lanes_lo[j], ((d * 256 + c) * 256 + b) * 256 + a)
  2119.             pos = pos + 8
  2120.             a, b, c, d = byte(str, pos - 3, pos)
  2121.             lanes_hi[j] = XOR(lanes_hi[j], ((d * 256 + c) * 256 + b) * 256 + a)
  2122.          end
  2123.          local L01_lo, L01_hi, L02_lo, L02_hi, L03_lo, L03_hi, L04_lo, L04_hi, L05_lo, L05_hi, L06_lo, L06_hi, L07_lo, L07_hi, L08_lo, L08_hi,
  2124.             L09_lo, L09_hi, L10_lo, L10_hi, L11_lo, L11_hi, L12_lo, L12_hi, L13_lo, L13_hi, L14_lo, L14_hi, L15_lo, L15_hi, L16_lo, L16_hi,
  2125.             L17_lo, L17_hi, L18_lo, L18_hi, L19_lo, L19_hi, L20_lo, L20_hi, L21_lo, L21_hi, L22_lo, L22_hi, L23_lo, L23_hi, L24_lo, L24_hi, L25_lo, L25_hi =
  2126.             lanes_lo[1], lanes_hi[1], lanes_lo[2], lanes_hi[2], lanes_lo[3], lanes_hi[3], lanes_lo[4], lanes_hi[4], lanes_lo[5], lanes_hi[5],
  2127.             lanes_lo[6], lanes_hi[6], lanes_lo[7], lanes_hi[7], lanes_lo[8], lanes_hi[8], lanes_lo[9], lanes_hi[9], lanes_lo[10], lanes_hi[10],
  2128.             lanes_lo[11], lanes_hi[11], lanes_lo[12], lanes_hi[12], lanes_lo[13], lanes_hi[13], lanes_lo[14], lanes_hi[14], lanes_lo[15], lanes_hi[15],
  2129.             lanes_lo[16], lanes_hi[16], lanes_lo[17], lanes_hi[17], lanes_lo[18], lanes_hi[18], lanes_lo[19], lanes_hi[19], lanes_lo[20], lanes_hi[20],
  2130.             lanes_lo[21], lanes_hi[21], lanes_lo[22], lanes_hi[22], lanes_lo[23], lanes_hi[23], lanes_lo[24], lanes_hi[24], lanes_lo[25], lanes_hi[25]
  2131.          for round_idx = 1, 24 do
  2132.             local C1_lo = XOR(L01_lo, L06_lo, L11_lo, L16_lo, L21_lo)
  2133.             local C1_hi = XOR(L01_hi, L06_hi, L11_hi, L16_hi, L21_hi)
  2134.             local C2_lo = XOR(L02_lo, L07_lo, L12_lo, L17_lo, L22_lo)
  2135.             local C2_hi = XOR(L02_hi, L07_hi, L12_hi, L17_hi, L22_hi)
  2136.             local C3_lo = XOR(L03_lo, L08_lo, L13_lo, L18_lo, L23_lo)
  2137.             local C3_hi = XOR(L03_hi, L08_hi, L13_hi, L18_hi, L23_hi)
  2138.             local C4_lo = XOR(L04_lo, L09_lo, L14_lo, L19_lo, L24_lo)
  2139.             local C4_hi = XOR(L04_hi, L09_hi, L14_hi, L19_hi, L24_hi)
  2140.             local C5_lo = XOR(L05_lo, L10_lo, L15_lo, L20_lo, L25_lo)
  2141.             local C5_hi = XOR(L05_hi, L10_hi, L15_hi, L20_hi, L25_hi)
  2142.             local D_lo = XOR(C1_lo, C3_lo * 2 + (C3_hi % 2^32 - C3_hi % 2^31) / 2^31)
  2143.             local D_hi = XOR(C1_hi, C3_hi * 2 + (C3_lo % 2^32 - C3_lo % 2^31) / 2^31)
  2144.             local T0_lo = XOR(D_lo, L02_lo)
  2145.             local T0_hi = XOR(D_hi, L02_hi)
  2146.             local T1_lo = XOR(D_lo, L07_lo)
  2147.             local T1_hi = XOR(D_hi, L07_hi)
  2148.             local T2_lo = XOR(D_lo, L12_lo)
  2149.             local T2_hi = XOR(D_hi, L12_hi)
  2150.             local T3_lo = XOR(D_lo, L17_lo)
  2151.             local T3_hi = XOR(D_hi, L17_hi)
  2152.             local T4_lo = XOR(D_lo, L22_lo)
  2153.             local T4_hi = XOR(D_hi, L22_hi)
  2154.             L02_lo = (T1_lo % 2^32 - T1_lo % 2^20) / 2^20 + T1_hi * 2^12
  2155.             L02_hi = (T1_hi % 2^32 - T1_hi % 2^20) / 2^20 + T1_lo * 2^12
  2156.             L07_lo = (T3_lo % 2^32 - T3_lo % 2^19) / 2^19 + T3_hi * 2^13
  2157.             L07_hi = (T3_hi % 2^32 - T3_hi % 2^19) / 2^19 + T3_lo * 2^13
  2158.             L12_lo = T0_lo * 2 + (T0_hi % 2^32 - T0_hi % 2^31) / 2^31
  2159.             L12_hi = T0_hi * 2 + (T0_lo % 2^32 - T0_lo % 2^31) / 2^31
  2160.             L17_lo = T2_lo * 2^10 + (T2_hi % 2^32 - T2_hi % 2^22) / 2^22
  2161.             L17_hi = T2_hi * 2^10 + (T2_lo % 2^32 - T2_lo % 2^22) / 2^22
  2162.             L22_lo = T4_lo * 2^2 + (T4_hi % 2^32 - T4_hi % 2^30) / 2^30
  2163.             L22_hi = T4_hi * 2^2 + (T4_lo % 2^32 - T4_lo % 2^30) / 2^30
  2164.             D_lo = XOR(C2_lo, C4_lo * 2 + (C4_hi % 2^32 - C4_hi % 2^31) / 2^31)
  2165.             D_hi = XOR(C2_hi, C4_hi * 2 + (C4_lo % 2^32 - C4_lo % 2^31) / 2^31)
  2166.             T0_lo = XOR(D_lo, L03_lo)
  2167.             T0_hi = XOR(D_hi, L03_hi)
  2168.             T1_lo = XOR(D_lo, L08_lo)
  2169.             T1_hi = XOR(D_hi, L08_hi)
  2170.             T2_lo = XOR(D_lo, L13_lo)
  2171.             T2_hi = XOR(D_hi, L13_hi)
  2172.             T3_lo = XOR(D_lo, L18_lo)
  2173.             T3_hi = XOR(D_hi, L18_hi)
  2174.             T4_lo = XOR(D_lo, L23_lo)
  2175.             T4_hi = XOR(D_hi, L23_hi)
  2176.             L03_lo = (T2_lo % 2^32 - T2_lo % 2^21) / 2^21 + T2_hi * 2^11
  2177.             L03_hi = (T2_hi % 2^32 - T2_hi % 2^21) / 2^21 + T2_lo * 2^11
  2178.             L08_lo = (T4_lo % 2^32 - T4_lo % 2^3) / 2^3 + T4_hi * 2^29 % 2^32
  2179.             L08_hi = (T4_hi % 2^32 - T4_hi % 2^3) / 2^3 + T4_lo * 2^29 % 2^32
  2180.             L13_lo = T1_lo * 2^6 + (T1_hi % 2^32 - T1_hi % 2^26) / 2^26
  2181.             L13_hi = T1_hi * 2^6 + (T1_lo % 2^32 - T1_lo % 2^26) / 2^26
  2182.             L18_lo = T3_lo * 2^15 + (T3_hi % 2^32 - T3_hi % 2^17) / 2^17
  2183.             L18_hi = T3_hi * 2^15 + (T3_lo % 2^32 - T3_lo % 2^17) / 2^17
  2184.             L23_lo = (T0_lo % 2^32 - T0_lo % 2^2) / 2^2 + T0_hi * 2^30 % 2^32
  2185.             L23_hi = (T0_hi % 2^32 - T0_hi % 2^2) / 2^2 + T0_lo * 2^30 % 2^32
  2186.             D_lo = XOR(C3_lo, C5_lo * 2 + (C5_hi % 2^32 - C5_hi % 2^31) / 2^31)
  2187.             D_hi = XOR(C3_hi, C5_hi * 2 + (C5_lo % 2^32 - C5_lo % 2^31) / 2^31)
  2188.             T0_lo = XOR(D_lo, L04_lo)
  2189.             T0_hi = XOR(D_hi, L04_hi)
  2190.             T1_lo = XOR(D_lo, L09_lo)
  2191.             T1_hi = XOR(D_hi, L09_hi)
  2192.             T2_lo = XOR(D_lo, L14_lo)
  2193.             T2_hi = XOR(D_hi, L14_hi)
  2194.             T3_lo = XOR(D_lo, L19_lo)
  2195.             T3_hi = XOR(D_hi, L19_hi)
  2196.             T4_lo = XOR(D_lo, L24_lo)
  2197.             T4_hi = XOR(D_hi, L24_hi)
  2198.             L04_lo = T3_lo * 2^21 % 2^32 + (T3_hi % 2^32 - T3_hi % 2^11) / 2^11
  2199.             L04_hi = T3_hi * 2^21 % 2^32 + (T3_lo % 2^32 - T3_lo % 2^11) / 2^11
  2200.             L09_lo = T0_lo * 2^28 % 2^32 + (T0_hi % 2^32 - T0_hi % 2^4) / 2^4
  2201.             L09_hi = T0_hi * 2^28 % 2^32 + (T0_lo % 2^32 - T0_lo % 2^4) / 2^4
  2202.             L14_lo = T2_lo * 2^25 % 2^32 + (T2_hi % 2^32 - T2_hi % 2^7) / 2^7
  2203.             L14_hi = T2_hi * 2^25 % 2^32 + (T2_lo % 2^32 - T2_lo % 2^7) / 2^7
  2204.             L19_lo = (T4_lo % 2^32 - T4_lo % 2^8) / 2^8 + T4_hi * 2^24 % 2^32
  2205.             L19_hi = (T4_hi % 2^32 - T4_hi % 2^8) / 2^8 + T4_lo * 2^24 % 2^32
  2206.             L24_lo = (T1_lo % 2^32 - T1_lo % 2^9) / 2^9 + T1_hi * 2^23 % 2^32
  2207.             L24_hi = (T1_hi % 2^32 - T1_hi % 2^9) / 2^9 + T1_lo * 2^23 % 2^32
  2208.             D_lo = XOR(C4_lo, C1_lo * 2 + (C1_hi % 2^32 - C1_hi % 2^31) / 2^31)
  2209.             D_hi = XOR(C4_hi, C1_hi * 2 + (C1_lo % 2^32 - C1_lo % 2^31) / 2^31)
  2210.             T0_lo = XOR(D_lo, L05_lo)
  2211.             T0_hi = XOR(D_hi, L05_hi)
  2212.             T1_lo = XOR(D_lo, L10_lo)
  2213.             T1_hi = XOR(D_hi, L10_hi)
  2214.             T2_lo = XOR(D_lo, L15_lo)
  2215.             T2_hi = XOR(D_hi, L15_hi)
  2216.             T3_lo = XOR(D_lo, L20_lo)
  2217.             T3_hi = XOR(D_hi, L20_hi)
  2218.             T4_lo = XOR(D_lo, L25_lo)
  2219.             T4_hi = XOR(D_hi, L25_hi)
  2220.             L05_lo = T4_lo * 2^14 + (T4_hi % 2^32 - T4_hi % 2^18) / 2^18
  2221.             L05_hi = T4_hi * 2^14 + (T4_lo % 2^32 - T4_lo % 2^18) / 2^18
  2222.             L10_lo = T1_lo * 2^20 % 2^32 + (T1_hi % 2^32 - T1_hi % 2^12) / 2^12
  2223.             L10_hi = T1_hi * 2^20 % 2^32 + (T1_lo % 2^32 - T1_lo % 2^12) / 2^12
  2224.             L15_lo = T3_lo * 2^8 + (T3_hi % 2^32 - T3_hi % 2^24) / 2^24
  2225.             L15_hi = T3_hi * 2^8 + (T3_lo % 2^32 - T3_lo % 2^24) / 2^24
  2226.             L20_lo = T0_lo * 2^27 % 2^32 + (T0_hi % 2^32 - T0_hi % 2^5) / 2^5
  2227.             L20_hi = T0_hi * 2^27 % 2^32 + (T0_lo % 2^32 - T0_lo % 2^5) / 2^5
  2228.             L25_lo = (T2_lo % 2^32 - T2_lo % 2^25) / 2^25 + T2_hi * 2^7
  2229.             L25_hi = (T2_hi % 2^32 - T2_hi % 2^25) / 2^25 + T2_lo * 2^7
  2230.             D_lo = XOR(C5_lo, C2_lo * 2 + (C2_hi % 2^32 - C2_hi % 2^31) / 2^31)
  2231.             D_hi = XOR(C5_hi, C2_hi * 2 + (C2_lo % 2^32 - C2_lo % 2^31) / 2^31)
  2232.             T1_lo = XOR(D_lo, L06_lo)
  2233.             T1_hi = XOR(D_hi, L06_hi)
  2234.             T2_lo = XOR(D_lo, L11_lo)
  2235.             T2_hi = XOR(D_hi, L11_hi)
  2236.             T3_lo = XOR(D_lo, L16_lo)
  2237.             T3_hi = XOR(D_hi, L16_hi)
  2238.             T4_lo = XOR(D_lo, L21_lo)
  2239.             T4_hi = XOR(D_hi, L21_hi)
  2240.             L06_lo = T2_lo * 2^3 + (T2_hi % 2^32 - T2_hi % 2^29) / 2^29
  2241.             L06_hi = T2_hi * 2^3 + (T2_lo % 2^32 - T2_lo % 2^29) / 2^29
  2242.             L11_lo = T4_lo * 2^18 + (T4_hi % 2^32 - T4_hi % 2^14) / 2^14
  2243.             L11_hi = T4_hi * 2^18 + (T4_lo % 2^32 - T4_lo % 2^14) / 2^14
  2244.             L16_lo = (T1_lo % 2^32 - T1_lo % 2^28) / 2^28 + T1_hi * 2^4
  2245.             L16_hi = (T1_hi % 2^32 - T1_hi % 2^28) / 2^28 + T1_lo * 2^4
  2246.             L21_lo = (T3_lo % 2^32 - T3_lo % 2^23) / 2^23 + T3_hi * 2^9
  2247.             L21_hi = (T3_hi % 2^32 - T3_hi % 2^23) / 2^23 + T3_lo * 2^9
  2248.             L01_lo = XOR(D_lo, L01_lo)
  2249.             L01_hi = XOR(D_hi, L01_hi)
  2250.             L01_lo, L02_lo, L03_lo, L04_lo, L05_lo = XOR(L01_lo, AND(-1-L02_lo, L03_lo)), XOR(L02_lo, AND(-1-L03_lo, L04_lo)), XOR(L03_lo, AND(-1-L04_lo, L05_lo)), XOR(L04_lo, AND(-1-L05_lo, L01_lo)), XOR(L05_lo, AND(-1-L01_lo, L02_lo))
  2251.             L01_hi, L02_hi, L03_hi, L04_hi, L05_hi = XOR(L01_hi, AND(-1-L02_hi, L03_hi)), XOR(L02_hi, AND(-1-L03_hi, L04_hi)), XOR(L03_hi, AND(-1-L04_hi, L05_hi)), XOR(L04_hi, AND(-1-L05_hi, L01_hi)), XOR(L05_hi, AND(-1-L01_hi, L02_hi))
  2252.             L06_lo, L07_lo, L08_lo, L09_lo, L10_lo = XOR(L09_lo, AND(-1-L10_lo, L06_lo)), XOR(L10_lo, AND(-1-L06_lo, L07_lo)), XOR(L06_lo, AND(-1-L07_lo, L08_lo)), XOR(L07_lo, AND(-1-L08_lo, L09_lo)), XOR(L08_lo, AND(-1-L09_lo, L10_lo))
  2253.             L06_hi, L07_hi, L08_hi, L09_hi, L10_hi = XOR(L09_hi, AND(-1-L10_hi, L06_hi)), XOR(L10_hi, AND(-1-L06_hi, L07_hi)), XOR(L06_hi, AND(-1-L07_hi, L08_hi)), XOR(L07_hi, AND(-1-L08_hi, L09_hi)), XOR(L08_hi, AND(-1-L09_hi, L10_hi))
  2254.             L11_lo, L12_lo, L13_lo, L14_lo, L15_lo = XOR(L12_lo, AND(-1-L13_lo, L14_lo)), XOR(L13_lo, AND(-1-L14_lo, L15_lo)), XOR(L14_lo, AND(-1-L15_lo, L11_lo)), XOR(L15_lo, AND(-1-L11_lo, L12_lo)), XOR(L11_lo, AND(-1-L12_lo, L13_lo))
  2255.             L11_hi, L12_hi, L13_hi, L14_hi, L15_hi = XOR(L12_hi, AND(-1-L13_hi, L14_hi)), XOR(L13_hi, AND(-1-L14_hi, L15_hi)), XOR(L14_hi, AND(-1-L15_hi, L11_hi)), XOR(L15_hi, AND(-1-L11_hi, L12_hi)), XOR(L11_hi, AND(-1-L12_hi, L13_hi))
  2256.             L16_lo, L17_lo, L18_lo, L19_lo, L20_lo = XOR(L20_lo, AND(-1-L16_lo, L17_lo)), XOR(L16_lo, AND(-1-L17_lo, L18_lo)), XOR(L17_lo, AND(-1-L18_lo, L19_lo)), XOR(L18_lo, AND(-1-L19_lo, L20_lo)), XOR(L19_lo, AND(-1-L20_lo, L16_lo))
  2257.             L16_hi, L17_hi, L18_hi, L19_hi, L20_hi = XOR(L20_hi, AND(-1-L16_hi, L17_hi)), XOR(L16_hi, AND(-1-L17_hi, L18_hi)), XOR(L17_hi, AND(-1-L18_hi, L19_hi)), XOR(L18_hi, AND(-1-L19_hi, L20_hi)), XOR(L19_hi, AND(-1-L20_hi, L16_hi))
  2258.             L21_lo, L22_lo, L23_lo, L24_lo, L25_lo = XOR(L23_lo, AND(-1-L24_lo, L25_lo)), XOR(L24_lo, AND(-1-L25_lo, L21_lo)), XOR(L25_lo, AND(-1-L21_lo, L22_lo)), XOR(L21_lo, AND(-1-L22_lo, L23_lo)), XOR(L22_lo, AND(-1-L23_lo, L24_lo))
  2259.             L21_hi, L22_hi, L23_hi, L24_hi, L25_hi = XOR(L23_hi, AND(-1-L24_hi, L25_hi)), XOR(L24_hi, AND(-1-L25_hi, L21_hi)), XOR(L25_hi, AND(-1-L21_hi, L22_hi)), XOR(L21_hi, AND(-1-L22_hi, L23_hi)), XOR(L22_hi, AND(-1-L23_hi, L24_hi))
  2260.             L01_lo = XOR(L01_lo, RC_lo[round_idx])
  2261.             L01_hi = L01_hi + RC_hi[round_idx]      -- RC_hi[] is either 0 or 0x80000000, so we could use fast addition instead of slow XOR
  2262.          end
  2263.          lanes_lo[1]  = L01_lo
  2264.          lanes_hi[1]  = L01_hi
  2265.          lanes_lo[2]  = L02_lo
  2266.          lanes_hi[2]  = L02_hi
  2267.          lanes_lo[3]  = L03_lo
  2268.          lanes_hi[3]  = L03_hi
  2269.          lanes_lo[4]  = L04_lo
  2270.          lanes_hi[4]  = L04_hi
  2271.          lanes_lo[5]  = L05_lo
  2272.          lanes_hi[5]  = L05_hi
  2273.          lanes_lo[6]  = L06_lo
  2274.          lanes_hi[6]  = L06_hi
  2275.          lanes_lo[7]  = L07_lo
  2276.          lanes_hi[7]  = L07_hi
  2277.          lanes_lo[8]  = L08_lo
  2278.          lanes_hi[8]  = L08_hi
  2279.          lanes_lo[9]  = L09_lo
  2280.          lanes_hi[9]  = L09_hi
  2281.          lanes_lo[10] = L10_lo
  2282.          lanes_hi[10] = L10_hi
  2283.          lanes_lo[11] = L11_lo
  2284.          lanes_hi[11] = L11_hi
  2285.          lanes_lo[12] = L12_lo
  2286.          lanes_hi[12] = L12_hi
  2287.          lanes_lo[13] = L13_lo
  2288.          lanes_hi[13] = L13_hi
  2289.          lanes_lo[14] = L14_lo
  2290.          lanes_hi[14] = L14_hi
  2291.          lanes_lo[15] = L15_lo
  2292.          lanes_hi[15] = L15_hi
  2293.          lanes_lo[16] = L16_lo
  2294.          lanes_hi[16] = L16_hi
  2295.          lanes_lo[17] = L17_lo
  2296.          lanes_hi[17] = L17_hi
  2297.          lanes_lo[18] = L18_lo
  2298.          lanes_hi[18] = L18_hi
  2299.          lanes_lo[19] = L19_lo
  2300.          lanes_hi[19] = L19_hi
  2301.          lanes_lo[20] = L20_lo
  2302.          lanes_hi[20] = L20_hi
  2303.          lanes_lo[21] = L21_lo
  2304.          lanes_hi[21] = L21_hi
  2305.          lanes_lo[22] = L22_lo
  2306.          lanes_hi[22] = L22_hi
  2307.          lanes_lo[23] = L23_lo
  2308.          lanes_hi[23] = L23_hi
  2309.          lanes_lo[24] = L24_lo
  2310.          lanes_hi[24] = L24_hi
  2311.          lanes_lo[25] = L25_lo
  2312.          lanes_hi[25] = L25_hi
  2313.       end
  2314.    end
  2315.  
  2316. end
  2317.  
  2318.  
  2319. --------------------------------------------------------------------------------
  2320. -- MAGIC NUMBERS CALCULATOR
  2321. --------------------------------------------------------------------------------
  2322. -- Q:
  2323. --    Is 53-bit "double" math enough to calculate square roots and cube roots of primes with 64 correct bits after decimal point?
  2324. -- A:
  2325. --    Yes, 53-bit "double" arithmetic is enough.
  2326. --    We could obtain first 40 bits by direct calculation of p^(1/3) and next 40 bits by one step of Newton's method.
  2327.  
  2328. do
  2329.    local function mul(src1, src2, factor, result_length)
  2330.       -- src1, src2 - long integers (arrays of digits in base 2^24)
  2331.       -- factor - small integer
  2332.       -- returns long integer result (src1 * src2 * factor) and its floating point approximation
  2333.       local result, carry, value, weight = {}, 0.0, 0.0, 1.0
  2334.       for j = 1, result_length do
  2335.          for k = math_max(1, j + 1 - #src2), math_min(j, #src1) do
  2336.             carry = carry + factor * src1[k] * src2[j + 1 - k]  -- "int32" is not enough for multiplication result, that's why "factor" must be of type "double"
  2337.          end
  2338.          local digit = carry % 2^24
  2339.          result[j] = floor(digit)
  2340.          carry = (carry - digit) / 2^24
  2341.          value = value + digit * weight
  2342.          weight = weight * 2^24
  2343.       end
  2344.       return result, value
  2345.    end
  2346.  
  2347.    local idx, step, p, one, sqrt_hi, sqrt_lo = 0, {4, 1, 2, -2, 2}, 4, {1}, sha2_H_hi, sha2_H_lo
  2348.    repeat
  2349.       p = p + step[p % 6]
  2350.       local d = 1
  2351.       repeat
  2352.          d = d + step[d % 6]
  2353.          if d*d > p then -- next prime number is found
  2354.             local root = p^(1/3)
  2355.             local R = root * 2^40
  2356.             R = mul({R - R % 1}, one, 1.0, 2)
  2357.             local _, delta = mul(R, mul(R, R, 1.0, 4), -1.0, 4)
  2358.             local hi = R[2] % 65536 * 65536 + floor(R[1] / 256)
  2359.             local lo = R[1] % 256 * 16777216 + floor(delta * (2^-56 / 3) * root / p)
  2360.             if idx < 16 then
  2361.                root = p^(1/2)
  2362.                R = root * 2^40
  2363.                R = mul({R - R % 1}, one, 1.0, 2)
  2364.                _, delta = mul(R, R, -1.0, 2)
  2365.                local hi = R[2] % 65536 * 65536 + floor(R[1] / 256)
  2366.                local lo = R[1] % 256 * 16777216 + floor(delta * 2^-17 / root)
  2367.                local idx = idx % 8 + 1
  2368.                sha2_H_ext256[224][idx] = lo
  2369.                sqrt_hi[idx], sqrt_lo[idx] = hi, lo + hi * hi_factor
  2370.                if idx > 7 then
  2371.                   sqrt_hi, sqrt_lo = sha2_H_ext512_hi[384], sha2_H_ext512_lo[384]
  2372.                end
  2373.             end
  2374.             idx = idx + 1
  2375.             sha2_K_hi[idx], sha2_K_lo[idx] = hi, lo % K_lo_modulo + hi * hi_factor
  2376.             break
  2377.          end
  2378.       until p % d == 0
  2379.    until idx > 79
  2380. end
  2381.  
  2382. -- Calculating IVs for SHA512/224 and SHA512/256
  2383. for width = 224, 256, 32 do
  2384.    local H_lo, H_hi = {}
  2385.    if XOR64A5 then
  2386.       for j = 1, 8 do
  2387.          H_lo[j] = XOR64A5(sha2_H_lo[j])
  2388.       end
  2389.    else
  2390.       H_hi = {}
  2391.       for j = 1, 8 do
  2392.          H_lo[j] = XOR32A5(sha2_H_lo[j])
  2393.          H_hi[j] = XOR32A5(sha2_H_hi[j])
  2394.       end
  2395.    end
  2396.    sha512_feed_128(H_lo, H_hi, "SHA-512/"..tostring(width).."\128"..string_rep("\0", 115).."\88", 0, 128)
  2397.    sha2_H_ext512_lo[width] = H_lo
  2398.    sha2_H_ext512_hi[width] = H_hi
  2399. end
  2400.  
  2401. -- Constants for MD5
  2402. do
  2403.    local sin, abs, modf = math.sin, math.abs, math.modf
  2404.    for idx = 1, 64 do
  2405.       -- we can't use formula floor(abs(sin(idx))*2^32) because its result may be beyond integer range on Lua built with 32-bit integers
  2406.       local hi, lo = modf(abs(sin(idx)) * 2^16)
  2407.       md5_K[idx] = hi * 65536 + floor(lo * 2^16)
  2408.    end
  2409. end
  2410.  
  2411. -- Constants for SHA3
  2412. do
  2413.    local sh_reg = 29
  2414.    local function next_bit()
  2415.       local r = sh_reg % 2
  2416.       sh_reg = XOR_BYTE((sh_reg - r) / 2, 142 * r)
  2417.       return r
  2418.    end
  2419.    for idx = 1, 24 do
  2420.       local lo, m = 0
  2421.       for _ = 1, 6 do
  2422.          m = m and m * m * 2 or 1
  2423.          lo = lo + next_bit() * m
  2424.       end
  2425.       local hi = next_bit() * m
  2426.       sha3_RC_hi[idx], sha3_RC_lo[idx] = hi, lo + hi * hi_factor_keccak
  2427.    end
  2428. end
  2429.  
  2430.  
  2431. --------------------------------------------------------------------------------
  2432. -- MAIN FUNCTIONS
  2433. --------------------------------------------------------------------------------
  2434.  
  2435. local function sha256ext(width, message)
  2436.  
  2437.    -- Create an instance (private objects for current calculation)
  2438.    local H, length, tail = {unpack(sha2_H_ext256[width])}, 0.0, ""
  2439.  
  2440.    local function partial(message_part)
  2441.       if message_part then
  2442.          if tail then
  2443.             length = length + #message_part
  2444.             local offs = 0
  2445.             if tail ~= "" and #tail + #message_part >= 64 then
  2446.                offs = 64 - #tail
  2447.                sha256_feed_64(H, tail..sub(message_part, 1, offs), 0, 64)
  2448.                tail = ""
  2449.             end
  2450.             local size = #message_part - offs
  2451.             local size_tail = size % 64
  2452.             sha256_feed_64(H, message_part, offs, size - size_tail)
  2453.             tail = tail..sub(message_part, #message_part + 1 - size_tail)
  2454.             return partial
  2455.          else
  2456.             error("Adding more chunks is not allowed after receiving the result", 2)
  2457.          end
  2458.       else
  2459.          if tail then
  2460.             local final_blocks = {tail, "\128", string_rep("\0", (-9 - length) % 64 + 1)}
  2461.             tail = nil
  2462.             -- Assuming user data length is shorter than (2^53)-9 bytes
  2463.             -- Anyway, it looks very unrealistic that someone would spend more than a year of calculations to process 2^53 bytes of data by using this Lua script :-)
  2464.             -- 2^53 bytes = 2^56 bits, so "bit-counter" fits in 7 bytes
  2465.             length = length * (8 / 256^7)  -- convert "byte-counter" to "bit-counter" and move decimal point to the left
  2466.             for j = 4, 10 do
  2467.                length = length % 1 * 256
  2468.                final_blocks[j] = char(floor(length))
  2469.             end
  2470.             final_blocks = table_concat(final_blocks)
  2471.             sha256_feed_64(H, final_blocks, 0, #final_blocks)
  2472.             local max_reg = width / 32
  2473.             for j = 1, max_reg do
  2474.                H[j] = HEX(H[j])
  2475.             end
  2476.             H = table_concat(H, "", 1, max_reg)
  2477.          end
  2478.          return H
  2479.       end
  2480.    end
  2481.  
  2482.    if message then
  2483.       -- Actually perform calculations and return the SHA256 digest of a message
  2484.       return partial(message)()
  2485.    else
  2486.       -- Return function for chunk-by-chunk loading
  2487.       -- User should feed every chunk of input data as single argument to this function and finally get SHA256 digest by invoking this function without an argument
  2488.       return partial
  2489.    end
  2490.  
  2491. end
  2492.  
  2493.  
  2494. local function sha512ext(width, message)
  2495.  
  2496.    -- Create an instance (private objects for current calculation)
  2497.    local length, tail, H_lo, H_hi = 0.0, "", {unpack(sha2_H_ext512_lo[width])}, not HEX64 and {unpack(sha2_H_ext512_hi[width])}
  2498.  
  2499.    local function partial(message_part)
  2500.       if message_part then
  2501.          if tail then
  2502.             length = length + #message_part
  2503.             local offs = 0
  2504.             if tail ~= "" and #tail + #message_part >= 128 then
  2505.                offs = 128 - #tail
  2506.                sha512_feed_128(H_lo, H_hi, tail..sub(message_part, 1, offs), 0, 128)
  2507.                tail = ""
  2508.             end
  2509.             local size = #message_part - offs
  2510.             local size_tail = size % 128
  2511.             sha512_feed_128(H_lo, H_hi, message_part, offs, size - size_tail)
  2512.             tail = tail..sub(message_part, #message_part + 1 - size_tail)
  2513.             return partial
  2514.          else
  2515.             error("Adding more chunks is not allowed after receiving the result", 2)
  2516.          end
  2517.       else
  2518.          if tail then
  2519.             local final_blocks = {tail, "\128", string_rep("\0", (-17-length) % 128 + 9)}
  2520.             tail = nil
  2521.             -- Assuming user data length is shorter than (2^53)-17 bytes
  2522.             -- 2^53 bytes = 2^56 bits, so "bit-counter" fits in 7 bytes
  2523.             length = length * (8 / 256^7)  -- convert "byte-counter" to "bit-counter" and move floating point to the left
  2524.             for j = 4, 10 do
  2525.                length = length % 1 * 256
  2526.                final_blocks[j] = char(floor(length))
  2527.             end
  2528.             final_blocks = table_concat(final_blocks)
  2529.             sha512_feed_128(H_lo, H_hi, final_blocks, 0, #final_blocks)
  2530.             local max_reg = ceil(width / 64)
  2531.             if HEX64 then
  2532.                for j = 1, max_reg do
  2533.                   H_lo[j] = HEX64(H_lo[j])
  2534.                end
  2535.             else
  2536.                for j = 1, max_reg do
  2537.                   H_lo[j] = HEX(H_hi[j])..HEX(H_lo[j])
  2538.                end
  2539.                H_hi = nil
  2540.             end
  2541.             H_lo = sub(table_concat(H_lo, "", 1, max_reg), 1, width / 4)
  2542.          end
  2543.          return H_lo
  2544.       end
  2545.    end
  2546.  
  2547.    if message then
  2548.       -- Actually perform calculations and return the SHA512 digest of a message
  2549.       return partial(message)()
  2550.    else
  2551.       -- Return function for chunk-by-chunk loading
  2552.       -- User should feed every chunk of input data as single argument to this function and finally get SHA512 digest by invoking this function without an argument
  2553.       return partial
  2554.    end
  2555.  
  2556. end
  2557.  
  2558.  
  2559. local function md5(message)
  2560.  
  2561.    -- Create an instance (private objects for current calculation)
  2562.    local H, length, tail = {unpack(md5_sha1_H, 1, 4)}, 0.0, ""
  2563.  
  2564.    local function partial(message_part)
  2565.       if message_part then
  2566.          if tail then
  2567.             length = length + #message_part
  2568.             local offs = 0
  2569.             if tail ~= "" and #tail + #message_part >= 64 then
  2570.                offs = 64 - #tail
  2571.                md5_feed_64(H, tail..sub(message_part, 1, offs), 0, 64)
  2572.                tail = ""
  2573.             end
  2574.             local size = #message_part - offs
  2575.             local size_tail = size % 64
  2576.             md5_feed_64(H, message_part, offs, size - size_tail)
  2577.             tail = tail..sub(message_part, #message_part + 1 - size_tail)
  2578.             return partial
  2579.          else
  2580.             error("Adding more chunks is not allowed after receiving the result", 2)
  2581.          end
  2582.       else
  2583.          if tail then
  2584.             local final_blocks = {tail, "\128", string_rep("\0", (-9 - length) % 64)}
  2585.             tail = nil
  2586.             length = length * 8  -- convert "byte-counter" to "bit-counter"
  2587.             for j = 4, 11 do
  2588.                local low_byte = length % 256
  2589.                final_blocks[j] = char(low_byte)
  2590.                length = (length - low_byte) / 256
  2591.             end
  2592.             final_blocks = table_concat(final_blocks)
  2593.             md5_feed_64(H, final_blocks, 0, #final_blocks)
  2594.             for j = 1, 4 do
  2595.                H[j] = HEX(H[j])
  2596.             end
  2597.             H = gsub(table_concat(H), "(..)(..)(..)(..)", "%4%3%2%1")
  2598.          end
  2599.          return H
  2600.       end
  2601.    end
  2602.  
  2603.    if message then
  2604.       -- Actually perform calculations and return the MD5 digest of a message
  2605.       return partial(message)()
  2606.    else
  2607.       -- Return function for chunk-by-chunk loading
  2608.       -- User should feed every chunk of input data as single argument to this function and finally get MD5 digest by invoking this function without an argument
  2609.       return partial
  2610.    end
  2611.  
  2612. end
  2613.  
  2614.  
  2615. local function sha1(message)
  2616.  
  2617.    -- Create an instance (private objects for current calculation)
  2618.    local H, length, tail = {unpack(md5_sha1_H)}, 0.0, ""
  2619.  
  2620.    local function partial(message_part)
  2621.       if message_part then
  2622.          if tail then
  2623.             length = length + #message_part
  2624.             local offs = 0
  2625.             if tail ~= "" and #tail + #message_part >= 64 then
  2626.                offs = 64 - #tail
  2627.                sha1_feed_64(H, tail..sub(message_part, 1, offs), 0, 64)
  2628.                tail = ""
  2629.             end
  2630.             local size = #message_part - offs
  2631.             local size_tail = size % 64
  2632.             sha1_feed_64(H, message_part, offs, size - size_tail)
  2633.             tail = tail..sub(message_part, #message_part + 1 - size_tail)
  2634.             return partial
  2635.          else
  2636.             error("Adding more chunks is not allowed after receiving the result", 2)
  2637.          end
  2638.       else
  2639.          if tail then
  2640.             local final_blocks = {tail, "\128", string_rep("\0", (-9 - length) % 64 + 1)}
  2641.             tail = nil
  2642.             -- Assuming user data length is shorter than (2^53)-9 bytes
  2643.             -- 2^53 bytes = 2^56 bits, so "bit-counter" fits in 7 bytes
  2644.             length = length * (8 / 256^7)  -- convert "byte-counter" to "bit-counter" and move decimal point to the left
  2645.             for j = 4, 10 do
  2646.                length = length % 1 * 256
  2647.                final_blocks[j] = char(floor(length))
  2648.             end
  2649.             final_blocks = table_concat(final_blocks)
  2650.             sha1_feed_64(H, final_blocks, 0, #final_blocks)
  2651.             for j = 1, 5 do
  2652.                H[j] = HEX(H[j])
  2653.             end
  2654.             H = table_concat(H)
  2655.          end
  2656.          return H
  2657.       end
  2658.    end
  2659.  
  2660.    if message then
  2661.       -- Actually perform calculations and return the SHA-1 digest of a message
  2662.       return partial(message)()
  2663.    else
  2664.       -- Return function for chunk-by-chunk loading
  2665.       -- User should feed every chunk of input data as single argument to this function and finally get SHA-1 digest by invoking this function without an argument
  2666.       return partial
  2667.    end
  2668.  
  2669. end
  2670.  
  2671.  
  2672. local function keccak(block_size_in_bytes, digest_size_in_bytes, is_SHAKE, message)
  2673.    -- "block_size_in_bytes" is multiple of 8
  2674.    if type(digest_size_in_bytes) ~= "number" then
  2675.       -- arguments in SHAKE are swapped:
  2676.       --    NIST FIPS 202 defines SHAKE(message,num_bits)
  2677.       --    this module   defines SHAKE(num_bytes,message)
  2678.       -- it's easy to forget about this swap, hence the check
  2679.       error("Argument 'digest_size_in_bytes' must be a number", 2)
  2680.    end
  2681.  
  2682.    -- Create an instance (private objects for current calculation)
  2683.    local tail, lanes_lo, lanes_hi = "", create_array_of_lanes(), hi_factor_keccak == 0 and create_array_of_lanes()
  2684.    local result
  2685.  
  2686. --~     pad the input N using the pad function, yielding a padded bit string P with a length divisible by r (such that n = len(P)/r is integer),
  2687. --~     break P into n consecutive r-bit pieces P0, ..., Pn-1 (last is zero-padded)
  2688. --~     initialize the state S to a string of b 0 bits.
  2689. --~     absorb the input into the state: For each block Pi,
  2690. --~         extend Pi at the end by a string of c 0 bits, yielding one of length b,
  2691. --~         XOR that with S and
  2692. --~         apply the block permutation f to the result, yielding a new state S
  2693. --~     initialize Z to be the empty string
  2694. --~     while the length of Z is less than d:
  2695. --~         append the first r bits of S to Z
  2696. --~         if Z is still less than d bits long, apply f to S, yielding a new state S.
  2697. --~     truncate Z to d bits
  2698.  
  2699.    local function partial(message_part)
  2700.       if message_part then
  2701.          if tail then
  2702.             local offs = 0
  2703.             if tail ~= "" and #tail + #message_part >= block_size_in_bytes then
  2704.                offs = block_size_in_bytes - #tail
  2705.                keccak_feed(lanes_lo, lanes_hi, tail..sub(message_part, 1, offs), 0, block_size_in_bytes, block_size_in_bytes)
  2706.                tail = ""
  2707.             end
  2708.             local size = #message_part - offs
  2709.             local size_tail = size % block_size_in_bytes
  2710.             keccak_feed(lanes_lo, lanes_hi, message_part, offs, size - size_tail, block_size_in_bytes)
  2711.             tail = tail..sub(message_part, #message_part + 1 - size_tail)
  2712.             return partial
  2713.          else
  2714.             error("Adding more chunks is not allowed after receiving the result", 2)
  2715.          end
  2716.       else
  2717.          if tail then
  2718.             -- append the following bits to the message: for usual SHA3: 011(0*)1, for SHAKE: 11111(0*)1
  2719.             local gap_start = is_SHAKE and 31 or 6
  2720.             tail = tail..(#tail + 1 == block_size_in_bytes and char(gap_start + 128) or char(gap_start)..string_rep("\0", (-2 - #tail) % block_size_in_bytes).."\128")
  2721.             keccak_feed(lanes_lo, lanes_hi, tail, 0, #tail, block_size_in_bytes)
  2722.             tail = nil
  2723.  
  2724.             local lanes_used = 0
  2725.             local total_lanes = floor(block_size_in_bytes / 8)
  2726.             local qwords = {}
  2727.  
  2728.             local function get_next_qwords_of_digest(qwords_qty)
  2729.                -- returns not more than 'qwords_qty' qwords ('qwords_qty' might be non-integer)
  2730.                -- doesn't go across keccak-buffer boundary
  2731.                -- block_size_in_bytes is a multiple of 8, so, keccak-buffer contains integer number of qwords
  2732.                if lanes_used >= total_lanes then
  2733.                   keccak_feed(lanes_lo, lanes_hi, "\0\0\0\0\0\0\0\0", 0, 8, 8)
  2734.                   lanes_used = 0
  2735.                end
  2736.                qwords_qty = floor(math_min(qwords_qty, total_lanes - lanes_used))
  2737.                if hi_factor_keccak ~= 0 then
  2738.                   for j = 1, qwords_qty do
  2739.                      qwords[j] = HEX64(lanes_lo[lanes_used + j - 1 + lanes_index_base])
  2740.                   end
  2741.                else
  2742.                   for j = 1, qwords_qty do
  2743.                      qwords[j] = HEX(lanes_hi[lanes_used + j])..HEX(lanes_lo[lanes_used + j])
  2744.                   end
  2745.                end
  2746.                lanes_used = lanes_used + qwords_qty
  2747.                return
  2748.                   gsub(table_concat(qwords, "", 1, qwords_qty), "(..)(..)(..)(..)(..)(..)(..)(..)", "%8%7%6%5%4%3%2%1"),
  2749.                   qwords_qty * 8
  2750.             end
  2751.  
  2752.             local parts = {}      -- digest parts
  2753.             local last_part, last_part_size = "", 0
  2754.  
  2755.             local function get_next_part_of_digest(bytes_needed)
  2756.                -- returns 'bytes_needed' bytes, for arbitrary integer 'bytes_needed'
  2757.                bytes_needed = bytes_needed or 1
  2758.                if bytes_needed <= last_part_size then
  2759.                   last_part_size = last_part_size - bytes_needed
  2760.                   local part_size_in_nibbles = bytes_needed * 2
  2761.                   local result = sub(last_part, 1, part_size_in_nibbles)
  2762.                   last_part = sub(last_part, part_size_in_nibbles + 1)
  2763.                   return result
  2764.                end
  2765.                local parts_qty = 0
  2766.                if last_part_size > 0 then
  2767.                   parts_qty = 1
  2768.                   parts[parts_qty] = last_part
  2769.                   bytes_needed = bytes_needed - last_part_size
  2770.                end
  2771.                -- repeats until the length is enough
  2772.                while bytes_needed >= 8 do
  2773.                   local next_part, next_part_size = get_next_qwords_of_digest(bytes_needed / 8)
  2774.                   parts_qty = parts_qty + 1
  2775.                   parts[parts_qty] = next_part
  2776.                   bytes_needed = bytes_needed - next_part_size
  2777.                end
  2778.                if bytes_needed > 0 then
  2779.                   last_part, last_part_size = get_next_qwords_of_digest(1)
  2780.                   parts_qty = parts_qty + 1
  2781.                   parts[parts_qty] = get_next_part_of_digest(bytes_needed)
  2782.                else
  2783.                   last_part, last_part_size = "", 0
  2784.                end
  2785.                return table_concat(parts, "", 1, parts_qty)
  2786.             end
  2787.  
  2788.             if digest_size_in_bytes < 0 then
  2789.                result = get_next_part_of_digest
  2790.             else
  2791.                result = get_next_part_of_digest(digest_size_in_bytes)
  2792.             end
  2793.  
  2794.          end
  2795.          return result
  2796.       end
  2797.    end
  2798.  
  2799.    if message then
  2800.       -- Actually perform calculations and return the SHA3 digest of a message
  2801.       return partial(message)()
  2802.    else
  2803.       -- Return function for chunk-by-chunk loading
  2804.       -- User should feed every chunk of input data as single argument to this function and finally get SHA3 digest by invoking this function without an argument
  2805.       return partial
  2806.    end
  2807.  
  2808. end
  2809.  
  2810.  
  2811. local hex2bin, bin2base64, base642bin
  2812. do
  2813.  
  2814.    function hex2bin(hex_string)
  2815.       return (gsub(hex_string, "%x%x",
  2816.          function (hh)
  2817.             return char(tonumber(hh, 16))
  2818.          end
  2819.       ))
  2820.    end
  2821.  
  2822.    local base64_symbols = {
  2823.       ['+'] = 62, ['-'] = 62,  [62] = '+',
  2824.       ['/'] = 63, ['_'] = 63,  [63] = '/',
  2825.       ['='] = -1, ['.'] = -1,  [-1] = '='
  2826.    }
  2827.    local symbol_index = 0
  2828.    for j, pair in ipairs{'AZ', 'az', '09'} do
  2829.       for ascii = byte(pair), byte(pair, 2) do
  2830.          local ch = char(ascii)
  2831.          base64_symbols[ch] = symbol_index
  2832.          base64_symbols[symbol_index] = ch
  2833.          symbol_index = symbol_index + 1
  2834.       end
  2835.    end
  2836.  
  2837.    function bin2base64(binary_string)
  2838.       local result = {}
  2839.       for pos = 1, #binary_string, 3 do
  2840.          local c1, c2, c3, c4 = byte(sub(binary_string, pos, pos + 2)..'\0', 1, -1)
  2841.          result[#result + 1] =
  2842.             base64_symbols[floor(c1 / 4)]
  2843.             ..base64_symbols[c1 % 4 * 16 + floor(c2 / 16)]
  2844.             ..base64_symbols[c3 and c2 % 16 * 4 + floor(c3 / 64) or -1]
  2845.             ..base64_symbols[c4 and c3 % 64 or -1]
  2846.       end
  2847.       return table_concat(result)
  2848.    end
  2849.  
  2850.    function base642bin(base64_string)
  2851.       local result, chars_qty = {}, 3
  2852.       for pos, ch in gmatch(gsub(base64_string, '%s+', ''), '()(.)') do
  2853.          local code = base64_symbols[ch]
  2854.          if code < 0 then
  2855.             chars_qty = chars_qty - 1
  2856.             code = 0
  2857.          end
  2858.          local idx = pos % 4
  2859.          if idx > 0 then
  2860.             result[-idx] = code
  2861.          else
  2862.             local c1 = result[-1] * 4 + floor(result[-2] / 16)
  2863.             local c2 = (result[-2] % 16) * 16 + floor(result[-3] / 4)
  2864.             local c3 = (result[-3] % 4) * 64 + code
  2865.             result[#result + 1] = sub(char(c1, c2, c3), 1, chars_qty)
  2866.          end
  2867.       end
  2868.       return table_concat(result)
  2869.    end
  2870.  
  2871. end
  2872.  
  2873.  
  2874. local block_size_for_HMAC  -- this table will be initialized at the end of the module
  2875.  
  2876. local function pad_and_xor(str, result_length, byte_for_xor)
  2877.    return gsub(str, ".",
  2878.       function(c)
  2879.          return char(XOR_BYTE(byte(c), byte_for_xor))
  2880.       end
  2881.    )..string_rep(char(byte_for_xor), result_length - #str)
  2882. end
  2883.  
  2884. local function hmac(hash_func, key, message)
  2885.  
  2886.    -- Create an instance (private objects for current calculation)
  2887.    local block_size = block_size_for_HMAC[hash_func]
  2888.    if not block_size then
  2889.       error("Unknown hash function", 2)
  2890.    end
  2891.    if #key > block_size then
  2892.       key = hex2bin(hash_func(key))
  2893.    end
  2894.    local append = hash_func()(pad_and_xor(key, block_size, 0x36))
  2895.    local result
  2896.  
  2897.    local function partial(message_part)
  2898.       if not message_part then
  2899.          result = result or hash_func(pad_and_xor(key, block_size, 0x5C)..hex2bin(append()))
  2900.          return result
  2901.       elseif result then
  2902.          error("Adding more chunks is not allowed after receiving the result", 2)
  2903.       else
  2904.          append(message_part)
  2905.          return partial
  2906.       end
  2907.    end
  2908.  
  2909.    if message then
  2910.       -- Actually perform calculations and return the HMAC of a message
  2911.       return partial(message)()
  2912.    else
  2913.       -- Return function for chunk-by-chunk loading of a message
  2914.       -- User should feed every chunk of the message as single argument to this function and finally get HMAC by invoking this function without an argument
  2915.       return partial
  2916.    end
  2917.  
  2918. end
  2919.  
  2920.  
  2921. local sha = {
  2922.    md5        = md5,                                                                                                                   -- MD5
  2923.    sha1       = sha1,                                                                                                                  -- SHA-1
  2924.    -- SHA2 hash functions:
  2925.    sha224     = function (message)                       return sha256ext(224, message)                                           end, -- SHA-224
  2926.    sha256     = function (message)                       return sha256ext(256, message)                                           end, -- SHA-256
  2927.    sha512_224 = function (message)                       return sha512ext(224, message)                                           end, -- SHA-512/224
  2928.    sha512_256 = function (message)                       return sha512ext(256, message)                                           end, -- SHA-512/256
  2929.    sha384     = function (message)                       return sha512ext(384, message)                                           end, -- SHA-384
  2930.    sha512     = function (message)                       return sha512ext(512, message)                                           end, -- SHA-512
  2931.    -- SHA3 hash functions:
  2932.    sha3_224   = function (message)                       return keccak((1600 - 2 * 224) / 8, 224 / 8, false, message)             end, -- SHA3-224
  2933.    sha3_256   = function (message)                       return keccak((1600 - 2 * 256) / 8, 256 / 8, false, message)             end, -- SHA3-256
  2934.    sha3_384   = function (message)                       return keccak((1600 - 2 * 384) / 8, 384 / 8, false, message)             end, -- SHA3-384
  2935.    sha3_512   = function (message)                       return keccak((1600 - 2 * 512) / 8, 512 / 8, false, message)             end, -- SHA3-512
  2936.    shake128   = function (digest_size_in_bytes, message) return keccak((1600 - 2 * 128) / 8, digest_size_in_bytes, true, message) end, -- SHAKE128
  2937.    shake256   = function (digest_size_in_bytes, message) return keccak((1600 - 2 * 256) / 8, digest_size_in_bytes, true, message) end, -- SHAKE256
  2938.    -- misc utilities:
  2939.    hmac       = hmac,       -- HMAC(hash_func, key, message) is applicable to any hash function from this module except SHAKE*
  2940.    hex2bin    = hex2bin,    -- converts hexadecimal representation to binary string
  2941.    base642bin = base642bin, -- converts base64 representation to binary string
  2942.    bin2base64 = bin2base64, -- converts binary string to base64 representation
  2943. }
  2944.  
  2945.  
  2946. block_size_for_HMAC = {
  2947.    [sha.md5]        = 64,
  2948.    [sha.sha1]       = 64,
  2949.    [sha.sha224]     = 64,
  2950.    [sha.sha256]     = 64,
  2951.    [sha.sha512_224] = 128,
  2952.    [sha.sha512_256] = 128,
  2953.    [sha.sha384]     = 128,
  2954.    [sha.sha512]     = 128,
  2955.    [sha.sha3_224]   = (1600 - 2 * 224) / 8,
  2956.    [sha.sha3_256]   = (1600 - 2 * 256) / 8,
  2957.    [sha.sha3_384]   = (1600 - 2 * 384) / 8,
  2958.    [sha.sha3_512]   = (1600 - 2 * 512) / 8,
  2959. }
  2960.  
  2961.  
  2962. return sha
  2963.  
Add Comment
Please, Sign In to add comment