Advertisement
ds84182

Untitled

Aug 16th, 2014
342
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lua 161.90 KB | None | 0 0
  1. --[[--------------------------------------------------------------------
  2.  
  3.   LuaSrcDiet
  4.   Compresses Lua source code by removing unnecessary characters.
  5.   For Lua 5.1.x source code.
  6.  
  7.   Copyright (c) 2008,2011,2012 Kein-Hong Man <[email protected]>
  8.   The COPYRIGHT file describes the conditions
  9.   under which this software may be distributed.
  10.  
  11. ----------------------------------------------------------------------]]
  12.  
  13. --[[--------------------------------------------------------------------
  14. -- NOTES:
  15. -- * Remember to update version and date information below (MSG_TITLE)
  16. -- * TODO: passing data tables around is a horrific mess
  17. -- * TODO: to implement pcall() to properly handle lexer etc. errors
  18. -- * TODO: need some automatic testing for a semblance of sanity
  19. -- * TODO: the plugin module is highly experimental and unstable
  20. ----------------------------------------------------------------------]]
  21.  
  22. -- standard libraries, functions
  23. local string = string
  24. local math = math
  25. local table = table
  26. local require = require
  27. local print = print
  28. local sub = string.sub
  29. local gmatch = string.gmatch
  30. local match = string.match
  31.  
  32. -- modules incorporated as preload functions follows
  33. local preload = package.preload
  34. local base = _G
  35.  
  36. local plugin_info = {
  37.   html = "html    generates a HTML file for checking globals",
  38.   sloc = "sloc    calculates SLOC for given source file",
  39. }
  40.  
  41. local p_embedded = {
  42.   'html',
  43.   'sloc',
  44. }
  45.  
  46. -- preload function for module llex
  47. preload.llex =
  48. function()
  49. --start of inserted module
  50. module "llex"
  51.  
  52. local string = base.require "string"
  53. local find = string.find
  54. local match = string.match
  55. local sub = string.sub
  56.  
  57. ----------------------------------------------------------------------
  58. -- initialize keyword list, variables
  59. ----------------------------------------------------------------------
  60.  
  61. local kw = {}
  62. for v in string.gmatch([[
  63. and break do else elseif end false for function if in
  64. local nil not or repeat return then true until while]], "%S+") do
  65.   kw[v] = true
  66. end
  67.  
  68. -- see init() for module variables (externally visible):
  69. --       tok, seminfo, tokln
  70.  
  71. local z,                -- source stream
  72.       sourceid,         -- name of source
  73.       I,                -- position of lexer
  74.       buff,             -- buffer for strings
  75.       ln                -- line number
  76.  
  77. ----------------------------------------------------------------------
  78. -- add information to token listing
  79. ----------------------------------------------------------------------
  80.  
  81. local function addtoken(token, info)
  82.   local i = #tok + 1
  83.   tok[i] = token
  84.   seminfo[i] = info
  85.   tokln[i] = ln
  86. end
  87.  
  88. ----------------------------------------------------------------------
  89. -- handles line number incrementation and end-of-line characters
  90. ----------------------------------------------------------------------
  91.  
  92. local function inclinenumber(i, is_tok)
  93.   local sub = sub
  94.   local old = sub(z, i, i)
  95.   i = i + 1  -- skip '\n' or '\r'
  96.   local c = sub(z, i, i)
  97.   if (c == "\n" or c == "\r") and (c ~= old) then
  98.     i = i + 1  -- skip '\n\r' or '\r\n'
  99.     old = old..c
  100.   end
  101.   if is_tok then addtoken("TK_EOL", old) end
  102.   ln = ln + 1
  103.   I = i
  104.   return i
  105. end
  106.  
  107. ----------------------------------------------------------------------
  108. -- initialize lexer for given source _z and source name _sourceid
  109. ----------------------------------------------------------------------
  110.  
  111. function init(_z, _sourceid)
  112.   z = _z                        -- source
  113.   sourceid = _sourceid          -- name of source
  114.   I = 1                         -- lexer's position in source
  115.   ln = 1                        -- line number
  116.   tok = {}                      -- lexed token list*
  117.   seminfo = {}                  -- lexed semantic information list*
  118.   tokln = {}                    -- line numbers for messages*
  119.                                 -- (*) externally visible thru' module
  120.   --------------------------------------------------------------------
  121.   -- initial processing (shbang handling)
  122.   --------------------------------------------------------------------
  123.   local p, _, q, r = find(z, "^(#[^\r\n]*)(\r?\n?)")
  124.   if p then                             -- skip first line
  125.     I = I + #q
  126.     addtoken("TK_COMMENT", q)
  127.     if #r > 0 then inclinenumber(I, true) end
  128.   end
  129. end
  130.  
  131. ----------------------------------------------------------------------
  132. -- returns a chunk name or id, no truncation for long names
  133. ----------------------------------------------------------------------
  134.  
  135. function chunkid()
  136.   if sourceid and match(sourceid, "^[=@]") then
  137.     return sub(sourceid, 2)  -- remove first char
  138.   end
  139.   return "[string]"
  140. end
  141.  
  142. ----------------------------------------------------------------------
  143. -- formats error message and throws error
  144. -- * a simplified version, does not report what token was responsible
  145. ----------------------------------------------------------------------
  146.  
  147. function errorline(s, line)
  148.   local e = error or base.error
  149.   e(string.format("%s:%d: %s", chunkid(), line or ln, s))
  150. end
  151. local errorline = errorline
  152.  
  153. ------------------------------------------------------------------------
  154. -- count separators ("=") in a long string delimiter
  155. ------------------------------------------------------------------------
  156.  
  157. local function skip_sep(i)
  158.   local sub = sub
  159.   local s = sub(z, i, i)
  160.   i = i + 1
  161.   local count = #match(z, "=*", i)
  162.   i = i + count
  163.   I = i
  164.   return (sub(z, i, i) == s) and count or (-count) - 1
  165. end
  166.  
  167. ----------------------------------------------------------------------
  168. -- reads a long string or long comment
  169. ----------------------------------------------------------------------
  170.  
  171. local function read_long_string(is_str, sep)
  172.   local i = I + 1  -- skip 2nd '['
  173.   local sub = sub
  174.   local c = sub(z, i, i)
  175.   if c == "\r" or c == "\n" then  -- string starts with a newline?
  176.     i = inclinenumber(i)  -- skip it
  177.   end
  178.   while true do
  179.     local p, q, r = find(z, "([\r\n%]])", i) -- (long range match)
  180.     if not p then
  181.       errorline(is_str and "unfinished long string" or
  182.                 "unfinished long comment")
  183.     end
  184.     i = p
  185.     if r == "]" then                    -- delimiter test
  186.       if skip_sep(i) == sep then
  187.         buff = sub(z, buff, I)
  188.         I = I + 1  -- skip 2nd ']'
  189.         return buff
  190.       end
  191.       i = I
  192.     else                                -- newline
  193.       buff = buff.."\n"
  194.       i = inclinenumber(i)
  195.     end
  196.   end--while
  197. end
  198.  
  199. ----------------------------------------------------------------------
  200. -- reads a string
  201. ----------------------------------------------------------------------
  202.  
  203. local function read_string(del)
  204.   local i = I
  205.   local find = find
  206.   local sub = sub
  207.   while true do
  208.     local p, q, r = find(z, "([\n\r\\\"\'])", i) -- (long range match)
  209.     if p then
  210.       if r == "\n" or r == "\r" then
  211.         errorline("unfinished string")
  212.       end
  213.       i = p
  214.       if r == "\\" then                         -- handle escapes
  215.         i = i + 1
  216.         r = sub(z, i, i)
  217.         if r == "" then break end -- (EOZ error)
  218.         p = find("abfnrtv\n\r", r, 1, true)
  219.         ------------------------------------------------------
  220.         if p then                               -- special escapes
  221.           if p > 7 then
  222.             i = inclinenumber(i)
  223.           else
  224.             i = i + 1
  225.           end
  226.         ------------------------------------------------------
  227.         elseif find(r, "%D") then               -- other non-digits
  228.           i = i + 1
  229.         ------------------------------------------------------
  230.         else                                    -- \xxx sequence
  231.           local p, q, s = find(z, "^(%d%d?%d?)", i)
  232.           i = q + 1
  233.           if s + 1 > 256 then -- UCHAR_MAX
  234.             errorline("escape sequence too large")
  235.           end
  236.         ------------------------------------------------------
  237.         end--if p
  238.       else
  239.         i = i + 1
  240.         if r == del then                        -- ending delimiter
  241.           I = i
  242.           return sub(z, buff, i - 1)            -- return string
  243.         end
  244.       end--if r
  245.     else
  246.       break -- (error)
  247.     end--if p
  248.   end--while
  249.   errorline("unfinished string")
  250. end
  251.  
  252. ------------------------------------------------------------------------
  253. -- main lexer function
  254. ------------------------------------------------------------------------
  255.  
  256. function llex()
  257.   local find = find
  258.   local match = match
  259.   while true do--outer
  260.     local i = I
  261.     -- inner loop allows break to be used to nicely section tests
  262.     while true do--inner
  263.       ----------------------------------------------------------------
  264.       local p, _, r = find(z, "^([_%a][_%w]*)", i)
  265.       if p then
  266.         I = i + #r
  267.         if kw[r] then
  268.           addtoken("TK_KEYWORD", r)             -- reserved word (keyword)
  269.         else
  270.           addtoken("TK_NAME", r)                -- identifier
  271.         end
  272.         break -- (continue)
  273.       end
  274.       ----------------------------------------------------------------
  275.       local p, _, r = find(z, "^(%.?)%d", i)
  276.       if p then                                 -- numeral
  277.         if r == "." then i = i + 1 end
  278.         local _, q, r = find(z, "^%d*[%.%d]*([eE]?)", i)
  279.         i = q + 1
  280.         if #r == 1 then                         -- optional exponent
  281.           if match(z, "^[%+%-]", i) then        -- optional sign
  282.             i = i + 1
  283.           end
  284.         end
  285.         local _, q = find(z, "^[_%w]*", i)
  286.         I = q + 1
  287.         local v = sub(z, p, q)                  -- string equivalent
  288.         if not base.tonumber(v) then            -- handles hex test also
  289.           errorline("malformed number")
  290.         end
  291.         addtoken("TK_NUMBER", v)
  292.         break -- (continue)
  293.       end
  294.       ----------------------------------------------------------------
  295.       local p, q, r, t = find(z, "^((%s)[ \t\v\f]*)", i)
  296.       if p then
  297.         if t == "\n" or t == "\r" then          -- newline
  298.           inclinenumber(i, true)
  299.         else
  300.           I = q + 1                             -- whitespace
  301.           addtoken("TK_SPACE", r)
  302.         end
  303.         break -- (continue)
  304.       end
  305.       ----------------------------------------------------------------
  306.       local r = match(z, "^%p", i)
  307.       if r then
  308.         buff = i
  309.         local p = find("-[\"\'.=<>~", r, 1, true)
  310.         if p then
  311.           -- two-level if block for punctuation/symbols
  312.           --------------------------------------------------------
  313.           if p <= 2 then
  314.             if p == 1 then                      -- minus
  315.               local c = match(z, "^%-%-(%[?)", i)
  316.               if c then
  317.                 i = i + 2
  318.                 local sep = -1
  319.                 if c == "[" then
  320.                   sep = skip_sep(i)
  321.                 end
  322.                 if sep >= 0 then                -- long comment
  323.                   addtoken("TK_LCOMMENT", read_long_string(false, sep))
  324.                 else                            -- short comment
  325.                   I = find(z, "[\n\r]", i) or (#z + 1)
  326.                   addtoken("TK_COMMENT", sub(z, buff, I - 1))
  327.                 end
  328.                 break -- (continue)
  329.               end
  330.               -- (fall through for "-")
  331.             else                                -- [ or long string
  332.               local sep = skip_sep(i)
  333.               if sep >= 0 then
  334.                 addtoken("TK_LSTRING", read_long_string(true, sep))
  335.               elseif sep == -1 then
  336.                 addtoken("TK_OP", "[")
  337.               else
  338.                 errorline("invalid long string delimiter")
  339.               end
  340.               break -- (continue)
  341.             end
  342.           --------------------------------------------------------
  343.           elseif p <= 5 then
  344.             if p < 5 then                       -- strings
  345.               I = i + 1
  346.               addtoken("TK_STRING", read_string(r))
  347.               break -- (continue)
  348.             end
  349.             r = match(z, "^%.%.?%.?", i)        -- .|..|... dots
  350.             -- (fall through)
  351.           --------------------------------------------------------
  352.           else                                  -- relational
  353.             r = match(z, "^%p=?", i)
  354.             -- (fall through)
  355.           end
  356.         end
  357.         I = i + #r
  358.         addtoken("TK_OP", r)  -- for other symbols, fall through
  359.         break -- (continue)
  360.       end
  361.       ----------------------------------------------------------------
  362.       local r = sub(z, i, i)
  363.       if r ~= "" then
  364.         I = i + 1
  365.         addtoken("TK_OP", r)                    -- other single-char tokens
  366.         break
  367.       end
  368.       addtoken("TK_EOS", "")                    -- end of stream,
  369.       return                                    -- exit here
  370.       ----------------------------------------------------------------
  371.     end--while inner
  372.   end--while outer
  373. end
  374. --end of inserted module
  375. end
  376.  
  377. -- preload function for module lparser
  378. preload.lparser =
  379. function()
  380. --start of inserted module
  381. module "lparser"
  382.  
  383. local string = base.require "string"
  384.  
  385. --[[--------------------------------------------------------------------
  386. -- variable and data structure initialization
  387. ----------------------------------------------------------------------]]
  388.  
  389. ----------------------------------------------------------------------
  390. -- initialization: main variables
  391. ----------------------------------------------------------------------
  392.  
  393. local toklist,                  -- grammar-only token tables (token table,
  394.       seminfolist,              -- semantic information table, line number
  395.       toklnlist,                -- table, cross-reference table)
  396.       xreflist,
  397.       tpos,                     -- token position
  398.  
  399.       line,                     -- start line # for error messages
  400.       lastln,                   -- last line # for ambiguous syntax chk
  401.       tok, seminfo, ln, xref,   -- token, semantic info, line
  402.       nameref,                  -- proper position of <name> token
  403.       fs,                       -- current function state
  404.       top_fs,                   -- top-level function state
  405.  
  406.       globalinfo,               -- global variable information table
  407.       globallookup,             -- global variable name lookup table
  408.       localinfo,                -- local variable information table
  409.       ilocalinfo,               -- inactive locals (prior to activation)
  410.       ilocalrefs,               -- corresponding references to activate
  411.       statinfo                  -- statements labeled by type
  412.  
  413. -- forward references for local functions
  414. local explist1, expr, block, exp1, body, chunk
  415.  
  416. ----------------------------------------------------------------------
  417. -- initialization: data structures
  418. ----------------------------------------------------------------------
  419.  
  420. local gmatch = string.gmatch
  421.  
  422. local block_follow = {}         -- lookahead check in chunk(), returnstat()
  423. for v in gmatch("else elseif end until <eof>", "%S+") do
  424.   block_follow[v] = true
  425. end
  426.  
  427. local binopr_left = {}          -- binary operators, left priority
  428. local binopr_right = {}         -- binary operators, right priority
  429. for op, lt, rt in gmatch([[
  430. {+ 6 6}{- 6 6}{* 7 7}{/ 7 7}{% 7 7}
  431. {^ 10 9}{.. 5 4}
  432. {~= 3 3}{== 3 3}
  433. {< 3 3}{<= 3 3}{> 3 3}{>= 3 3}
  434. {and 2 2}{or 1 1}
  435. ]], "{(%S+)%s(%d+)%s(%d+)}") do
  436.   binopr_left[op] = lt + 0
  437.   binopr_right[op] = rt + 0
  438. end
  439.  
  440. local unopr = { ["not"] = true, ["-"] = true,
  441.                 ["#"] = true, } -- unary operators
  442. local UNARY_PRIORITY = 8        -- priority for unary operators
  443.  
  444. --[[--------------------------------------------------------------------
  445. -- support functions
  446. ----------------------------------------------------------------------]]
  447.  
  448. ----------------------------------------------------------------------
  449. -- formats error message and throws error (duplicated from llex)
  450. -- * a simplified version, does not report what token was responsible
  451. ----------------------------------------------------------------------
  452.  
  453. local function errorline(s, line)
  454.   local e = error or base.error
  455.   e(string.format("(source):%d: %s", line or ln, s))
  456. end
  457.  
  458. ----------------------------------------------------------------------
  459. -- handles incoming token, semantic information pairs
  460. -- * NOTE: 'nextt' is named 'next' originally
  461. ----------------------------------------------------------------------
  462.  
  463. -- reads in next token
  464. local function nextt()
  465.   lastln = toklnlist[tpos]
  466.   tok, seminfo, ln, xref
  467.     = toklist[tpos], seminfolist[tpos], toklnlist[tpos], xreflist[tpos]
  468.   tpos = tpos + 1
  469. end
  470.  
  471. -- peek at next token (single lookahead for table constructor)
  472. local function lookahead()
  473.   return toklist[tpos]
  474. end
  475.  
  476. ----------------------------------------------------------------------
  477. -- throws a syntax error, or if token expected is not there
  478. ----------------------------------------------------------------------
  479.  
  480. local function syntaxerror(msg)
  481.   local tok = tok
  482.   if tok ~= "<number>" and tok ~= "<string>" then
  483.     if tok == "<name>" then tok = seminfo end
  484.     tok = "'"..tok.."'"
  485.   end
  486.   errorline(msg.." near "..tok)
  487. end
  488.  
  489. local function error_expected(token)
  490.   syntaxerror("'"..token.."' expected")
  491. end
  492.  
  493. ----------------------------------------------------------------------
  494. -- tests for a token, returns outcome
  495. -- * return value changed to boolean
  496. ----------------------------------------------------------------------
  497.  
  498. local function testnext(c)
  499.   if tok == c then nextt(); return true end
  500. end
  501.  
  502. ----------------------------------------------------------------------
  503. -- check for existence of a token, throws error if not found
  504. ----------------------------------------------------------------------
  505.  
  506. local function check(c)
  507.   if tok ~= c then error_expected(c) end
  508. end
  509.  
  510. ----------------------------------------------------------------------
  511. -- verify existence of a token, then skip it
  512. ----------------------------------------------------------------------
  513.  
  514. local function checknext(c)
  515.   check(c); nextt()
  516. end
  517.  
  518. ----------------------------------------------------------------------
  519. -- throws error if condition not matched
  520. ----------------------------------------------------------------------
  521.  
  522. local function check_condition(c, msg)
  523.   if not c then syntaxerror(msg) end
  524. end
  525.  
  526. ----------------------------------------------------------------------
  527. -- verifies token conditions are met or else throw error
  528. ----------------------------------------------------------------------
  529.  
  530. local function check_match(what, who, where)
  531.   if not testnext(what) then
  532.     if where == ln then
  533.       error_expected(what)
  534.     else
  535.       syntaxerror("'"..what.."' expected (to close '"..who.."' at line "..where..")")
  536.     end
  537.   end
  538. end
  539.  
  540. ----------------------------------------------------------------------
  541. -- expect that token is a name, return the name
  542. ----------------------------------------------------------------------
  543.  
  544. local function str_checkname()
  545.   check("<name>")
  546.   local ts = seminfo
  547.   nameref = xref
  548.   nextt()
  549.   return ts
  550. end
  551.  
  552. ----------------------------------------------------------------------
  553. -- adds given string s in string pool, sets e as VK
  554. ----------------------------------------------------------------------
  555.  
  556. local function codestring(e, s)
  557.   e.k = "VK"
  558. end
  559.  
  560. ----------------------------------------------------------------------
  561. -- consume a name token, adds it to string pool
  562. ----------------------------------------------------------------------
  563.  
  564. local function checkname(e)
  565.   codestring(e, str_checkname())
  566. end
  567.  
  568. --[[--------------------------------------------------------------------
  569. -- variable (global|local|upvalue) handling
  570. -- * to track locals and globals, variable management code needed
  571. -- * entry point is singlevar() for variable lookups
  572. -- * lookup tables (bl.locallist) are maintained awkwardly in the basic
  573. --   block data structures, PLUS the function data structure (this is
  574. --   an inelegant hack, since bl is nil for the top level of a function)
  575. ----------------------------------------------------------------------]]
  576.  
  577. ----------------------------------------------------------------------
  578. -- register a local variable, create local variable object, set in
  579. -- to-activate variable list
  580. -- * used in new_localvarliteral(), parlist(), fornum(), forlist(),
  581. --   localfunc(), localstat()
  582. ----------------------------------------------------------------------
  583.  
  584. local function new_localvar(name, special)
  585.   local bl = fs.bl
  586.   local locallist
  587.   -- locate locallist in current block object or function root object
  588.   if bl then
  589.     locallist = bl.locallist
  590.   else
  591.     locallist = fs.locallist
  592.   end
  593.   -- build local variable information object and set localinfo
  594.   local id = #localinfo + 1
  595.   localinfo[id] = {             -- new local variable object
  596.     name = name,                -- local variable name
  597.     xref = { nameref },         -- xref, first value is declaration
  598.     decl = nameref,             -- location of declaration, = xref[1]
  599.   }
  600.   if special then               -- "self" must be not be changed
  601.     localinfo[id].isself = true
  602.   end
  603.   -- this can override a local with the same name in the same scope
  604.   -- but first, keep it inactive until it gets activated
  605.   local i = #ilocalinfo + 1
  606.   ilocalinfo[i] = id
  607.   ilocalrefs[i] = locallist
  608. end
  609.  
  610. ----------------------------------------------------------------------
  611. -- actually activate the variables so that they are visible
  612. -- * remember Lua semantics, e.g. RHS is evaluated first, then LHS
  613. -- * used in parlist(), forbody(), localfunc(), localstat(), body()
  614. ----------------------------------------------------------------------
  615.  
  616. local function adjustlocalvars(nvars)
  617.   local sz = #ilocalinfo
  618.   -- i goes from left to right, in order of local allocation, because
  619.   -- of something like: local a,a,a = 1,2,3 which gives a = 3
  620.   while nvars > 0 do
  621.     nvars = nvars - 1
  622.     local i = sz - nvars
  623.     local id = ilocalinfo[i]            -- local's id
  624.     local obj = localinfo[id]
  625.     local name = obj.name               -- name of local
  626.     obj.act = xref                      -- set activation location
  627.     ilocalinfo[i] = nil
  628.     local locallist = ilocalrefs[i]     -- ref to lookup table to update
  629.     ilocalrefs[i] = nil
  630.     local existing = locallist[name]    -- if existing, remove old first!
  631.     if existing then                    -- do not overlap, set special
  632.       obj = localinfo[existing]         -- form of rem, as -id
  633.       obj.rem = -id
  634.     end
  635.     locallist[name] = id                -- activate, now visible to Lua
  636.   end
  637. end
  638.  
  639. ----------------------------------------------------------------------
  640. -- remove (deactivate) variables in current scope (before scope exits)
  641. -- * zap entire locallist tables since we are not allocating registers
  642. -- * used in leaveblock(), close_func()
  643. ----------------------------------------------------------------------
  644.  
  645. local function removevars()
  646.   local bl = fs.bl
  647.   local locallist
  648.   -- locate locallist in current block object or function root object
  649.   if bl then
  650.     locallist = bl.locallist
  651.   else
  652.     locallist = fs.locallist
  653.   end
  654.   -- enumerate the local list at current scope and deactivate 'em
  655.   for name, id in base.pairs(locallist) do
  656.     local obj = localinfo[id]
  657.     obj.rem = xref                      -- set deactivation location
  658.   end
  659. end
  660.  
  661. ----------------------------------------------------------------------
  662. -- creates a new local variable given a name
  663. -- * skips internal locals (those starting with '('), so internal
  664. --   locals never needs a corresponding adjustlocalvars() call
  665. -- * special is true for "self" which must not be optimized
  666. -- * used in fornum(), forlist(), parlist(), body()
  667. ----------------------------------------------------------------------
  668.  
  669. local function new_localvarliteral(name, special)
  670.   if string.sub(name, 1, 1) == "(" then  -- can skip internal locals
  671.     return
  672.   end
  673.   new_localvar(name, special)
  674. end
  675.  
  676. ----------------------------------------------------------------------
  677. -- search the local variable namespace of the given fs for a match
  678. -- * returns localinfo index
  679. -- * used only in singlevaraux()
  680. ----------------------------------------------------------------------
  681.  
  682. local function searchvar(fs, n)
  683.   local bl = fs.bl
  684.   local locallist
  685.   if bl then
  686.     locallist = bl.locallist
  687.     while locallist do
  688.       if locallist[n] then return locallist[n] end  -- found
  689.       bl = bl.prev
  690.       locallist = bl and bl.locallist
  691.     end
  692.   end
  693.   locallist = fs.locallist
  694.   return locallist[n] or -1  -- found or not found (-1)
  695. end
  696.  
  697. ----------------------------------------------------------------------
  698. -- handle locals, globals and upvalues and related processing
  699. -- * search mechanism is recursive, calls itself to search parents
  700. -- * used only in singlevar()
  701. ----------------------------------------------------------------------
  702.  
  703. local function singlevaraux(fs, n, var)
  704.   if fs == nil then  -- no more levels?
  705.     var.k = "VGLOBAL"  -- default is global variable
  706.     return "VGLOBAL"
  707.   else
  708.     local v = searchvar(fs, n)  -- look up at current level
  709.     if v >= 0 then
  710.       var.k = "VLOCAL"
  711.       var.id = v
  712.       --  codegen may need to deal with upvalue here
  713.       return "VLOCAL"
  714.     else  -- not found at current level; try upper one
  715.       if singlevaraux(fs.prev, n, var) == "VGLOBAL" then
  716.         return "VGLOBAL"
  717.       end
  718.       -- else was LOCAL or UPVAL, handle here
  719.       var.k = "VUPVAL"  -- upvalue in this level
  720.       return "VUPVAL"
  721.     end--if v
  722.   end--if fs
  723. end
  724.  
  725. ----------------------------------------------------------------------
  726. -- consume a name token, creates a variable (global|local|upvalue)
  727. -- * used in prefixexp(), funcname()
  728. ----------------------------------------------------------------------
  729.  
  730. local function singlevar(v)
  731.   local name = str_checkname()
  732.   singlevaraux(fs, name, v)
  733.   ------------------------------------------------------------------
  734.   -- variable tracking
  735.   ------------------------------------------------------------------
  736.   if v.k == "VGLOBAL" then
  737.     -- if global being accessed, keep track of it by creating an object
  738.     local id = globallookup[name]
  739.     if not id then
  740.       id = #globalinfo + 1
  741.       globalinfo[id] = {                -- new global variable object
  742.         name = name,                    -- global variable name
  743.         xref = { nameref },             -- xref, first value is declaration
  744.       }
  745.       globallookup[name] = id           -- remember it
  746.     else
  747.       local obj = globalinfo[id].xref
  748.       obj[#obj + 1] = nameref           -- add xref
  749.     end
  750.   else
  751.     -- local/upvalue is being accessed, keep track of it
  752.     local id = v.id
  753.     local obj = localinfo[id].xref
  754.     obj[#obj + 1] = nameref             -- add xref
  755.   end
  756. end
  757.  
  758. --[[--------------------------------------------------------------------
  759. -- state management functions with open/close pairs
  760. ----------------------------------------------------------------------]]
  761.  
  762. ----------------------------------------------------------------------
  763. -- enters a code unit, initializes elements
  764. ----------------------------------------------------------------------
  765.  
  766. local function enterblock(isbreakable)
  767.   local bl = {}  -- per-block state
  768.   bl.isbreakable = isbreakable
  769.   bl.prev = fs.bl
  770.   bl.locallist = {}
  771.   fs.bl = bl
  772. end
  773.  
  774. ----------------------------------------------------------------------
  775. -- leaves a code unit, close any upvalues
  776. ----------------------------------------------------------------------
  777.  
  778. local function leaveblock()
  779.   local bl = fs.bl
  780.   removevars()
  781.   fs.bl = bl.prev
  782. end
  783.  
  784. ----------------------------------------------------------------------
  785. -- opening of a function
  786. -- * top_fs is only for anchoring the top fs, so that parser() can
  787. --   return it to the caller function along with useful output
  788. -- * used in parser() and body()
  789. ----------------------------------------------------------------------
  790.  
  791. local function open_func()
  792.   local new_fs  -- per-function state
  793.   if not fs then  -- top_fs is created early
  794.     new_fs = top_fs
  795.   else
  796.     new_fs = {}
  797.   end
  798.   new_fs.prev = fs  -- linked list of function states
  799.   new_fs.bl = nil
  800.   new_fs.locallist = {}
  801.   fs = new_fs
  802. end
  803.  
  804. ----------------------------------------------------------------------
  805. -- closing of a function
  806. -- * used in parser() and body()
  807. ----------------------------------------------------------------------
  808.  
  809. local function close_func()
  810.   removevars()
  811.   fs = fs.prev
  812. end
  813.  
  814. --[[--------------------------------------------------------------------
  815. -- other parsing functions
  816. -- * for table constructor, parameter list, argument list
  817. ----------------------------------------------------------------------]]
  818.  
  819. ----------------------------------------------------------------------
  820. -- parse a function name suffix, for function call specifications
  821. -- * used in primaryexp(), funcname()
  822. ----------------------------------------------------------------------
  823.  
  824. local function field(v)
  825.   -- field -> ['.' | ':'] NAME
  826.   local key = {}
  827.   nextt()  -- skip the dot or colon
  828.   checkname(key)
  829.   v.k = "VINDEXED"
  830. end
  831.  
  832. ----------------------------------------------------------------------
  833. -- parse a table indexing suffix, for constructors, expressions
  834. -- * used in recfield(), primaryexp()
  835. ----------------------------------------------------------------------
  836.  
  837. local function yindex(v)
  838.   -- index -> '[' expr ']'
  839.   nextt()  -- skip the '['
  840.   expr(v)
  841.   checknext("]")
  842. end
  843.  
  844. ----------------------------------------------------------------------
  845. -- parse a table record (hash) field
  846. -- * used in constructor()
  847. ----------------------------------------------------------------------
  848.  
  849. local function recfield(cc)
  850.   -- recfield -> (NAME | '['exp1']') = exp1
  851.   local key, val = {}, {}
  852.   if tok == "<name>" then
  853.     checkname(key)
  854.   else-- tok == '['
  855.     yindex(key)
  856.   end
  857.   checknext("=")
  858.   expr(val)
  859. end
  860.  
  861. ----------------------------------------------------------------------
  862. -- emit a set list instruction if enough elements (LFIELDS_PER_FLUSH)
  863. -- * note: retained in this skeleton because it modifies cc.v.k
  864. -- * used in constructor()
  865. ----------------------------------------------------------------------
  866.  
  867. local function closelistfield(cc)
  868.   if cc.v.k == "VVOID" then return end  -- there is no list item
  869.   cc.v.k = "VVOID"
  870. end
  871.  
  872. ----------------------------------------------------------------------
  873. -- parse a table list (array) field
  874. -- * used in constructor()
  875. ----------------------------------------------------------------------
  876.  
  877. local function listfield(cc)
  878.   expr(cc.v)
  879. end
  880.  
  881. ----------------------------------------------------------------------
  882. -- parse a table constructor
  883. -- * used in funcargs(), simpleexp()
  884. ----------------------------------------------------------------------
  885.  
  886. local function constructor(t)
  887.   -- constructor -> '{' [ field { fieldsep field } [ fieldsep ] ] '}'
  888.   -- field -> recfield | listfield
  889.   -- fieldsep -> ',' | ';'
  890.   local line = ln
  891.   local cc = {}
  892.   cc.v = {}
  893.   cc.t = t
  894.   t.k = "VRELOCABLE"
  895.   cc.v.k = "VVOID"
  896.   checknext("{")
  897.   repeat
  898.     if tok == "}" then break end
  899.     -- closelistfield(cc) here
  900.     local c = tok
  901.     if c == "<name>" then  -- may be listfields or recfields
  902.       if lookahead() ~= "=" then  -- look ahead: expression?
  903.         listfield(cc)
  904.       else
  905.         recfield(cc)
  906.       end
  907.     elseif c == "[" then  -- constructor_item -> recfield
  908.       recfield(cc)
  909.     else  -- constructor_part -> listfield
  910.       listfield(cc)
  911.     end
  912.   until not testnext(",") and not testnext(";")
  913.   check_match("}", "{", line)
  914.   -- lastlistfield(cc) here
  915. end
  916.  
  917. ----------------------------------------------------------------------
  918. -- parse the arguments (parameters) of a function declaration
  919. -- * used in body()
  920. ----------------------------------------------------------------------
  921.  
  922. local function parlist()
  923.   -- parlist -> [ param { ',' param } ]
  924.   local nparams = 0
  925.   if tok ~= ")" then  -- is 'parlist' not empty?
  926.     repeat
  927.       local c = tok
  928.       if c == "<name>" then  -- param -> NAME
  929.         new_localvar(str_checkname())
  930.         nparams = nparams + 1
  931.       elseif c == "..." then
  932.         nextt()
  933.         fs.is_vararg = true
  934.       else
  935.         syntaxerror("<name> or '...' expected")
  936.       end
  937.     until fs.is_vararg or not testnext(",")
  938.   end--if
  939.   adjustlocalvars(nparams)
  940. end
  941.  
  942. ----------------------------------------------------------------------
  943. -- parse the parameters of a function call
  944. -- * contrast with parlist(), used in function declarations
  945. -- * used in primaryexp()
  946. ----------------------------------------------------------------------
  947.  
  948. local function funcargs(f)
  949.   local args = {}
  950.   local line = ln
  951.   local c = tok
  952.   if c == "(" then  -- funcargs -> '(' [ explist1 ] ')'
  953.     if line ~= lastln then
  954.       syntaxerror("ambiguous syntax (function call x new statement)")
  955.     end
  956.     nextt()
  957.     if tok == ")" then  -- arg list is empty?
  958.       args.k = "VVOID"
  959.     else
  960.       explist1(args)
  961.     end
  962.     check_match(")", "(", line)
  963.   elseif c == "{" then  -- funcargs -> constructor
  964.     constructor(args)
  965.   elseif c == "<string>" then  -- funcargs -> STRING
  966.     codestring(args, seminfo)
  967.     nextt()  -- must use 'seminfo' before 'next'
  968.   else
  969.     syntaxerror("function arguments expected")
  970.     return
  971.   end--if c
  972.   f.k = "VCALL"
  973. end
  974.  
  975. --[[--------------------------------------------------------------------
  976. -- mostly expression functions
  977. ----------------------------------------------------------------------]]
  978.  
  979. ----------------------------------------------------------------------
  980. -- parses an expression in parentheses or a single variable
  981. -- * used in primaryexp()
  982. ----------------------------------------------------------------------
  983.  
  984. local function prefixexp(v)
  985.   -- prefixexp -> NAME | '(' expr ')'
  986.   local c = tok
  987.   if c == "(" then
  988.     local line = ln
  989.     nextt()
  990.     expr(v)
  991.     check_match(")", "(", line)
  992.   elseif c == "<name>" then
  993.     singlevar(v)
  994.   else
  995.     syntaxerror("unexpected symbol")
  996.   end--if c
  997. end
  998.  
  999. ----------------------------------------------------------------------
  1000. -- parses a prefixexp (an expression in parentheses or a single
  1001. -- variable) or a function call specification
  1002. -- * used in simpleexp(), assignment(), expr_stat()
  1003. ----------------------------------------------------------------------
  1004.  
  1005. local function primaryexp(v)
  1006.   -- primaryexp ->
  1007.   --    prefixexp { '.' NAME | '[' exp ']' | ':' NAME funcargs | funcargs }
  1008.   prefixexp(v)
  1009.   while true do
  1010.     local c = tok
  1011.     if c == "." then  -- field
  1012.       field(v)
  1013.     elseif c == "[" then  -- '[' exp1 ']'
  1014.       local key = {}
  1015.       yindex(key)
  1016.     elseif c == ":" then  -- ':' NAME funcargs
  1017.       local key = {}
  1018.       nextt()
  1019.       checkname(key)
  1020.       funcargs(v)
  1021.     elseif c == "(" or c == "<string>" or c == "{" then  -- funcargs
  1022.       funcargs(v)
  1023.     else
  1024.       return
  1025.     end--if c
  1026.   end--while
  1027. end
  1028.  
  1029. ----------------------------------------------------------------------
  1030. -- parses general expression types, constants handled here
  1031. -- * used in subexpr()
  1032. ----------------------------------------------------------------------
  1033.  
  1034. local function simpleexp(v)
  1035.   -- simpleexp -> NUMBER | STRING | NIL | TRUE | FALSE | ... |
  1036.   --              constructor | FUNCTION body | primaryexp
  1037.   local c = tok
  1038.   if c == "<number>" then
  1039.     v.k = "VKNUM"
  1040.   elseif c == "<string>" then
  1041.     codestring(v, seminfo)
  1042.   elseif c == "nil" then
  1043.     v.k = "VNIL"
  1044.   elseif c == "true" then
  1045.     v.k = "VTRUE"
  1046.   elseif c == "false" then
  1047.     v.k = "VFALSE"
  1048.   elseif c == "..." then  -- vararg
  1049.     check_condition(fs.is_vararg == true,
  1050.                     "cannot use '...' outside a vararg function");
  1051.     v.k = "VVARARG"
  1052.   elseif c == "{" then  -- constructor
  1053.     constructor(v)
  1054.     return
  1055.   elseif c == "function" then
  1056.     nextt()
  1057.     body(v, false, ln)
  1058.     return
  1059.   else
  1060.     primaryexp(v)
  1061.     return
  1062.   end--if c
  1063.   nextt()
  1064. end
  1065.  
  1066. ------------------------------------------------------------------------
  1067. -- Parse subexpressions. Includes handling of unary operators and binary
  1068. -- operators. A subexpr is given the rhs priority level of the operator
  1069. -- immediately left of it, if any (limit is -1 if none,) and if a binop
  1070. -- is found, limit is compared with the lhs priority level of the binop
  1071. -- in order to determine which executes first.
  1072. -- * recursively called
  1073. -- * used in expr()
  1074. ------------------------------------------------------------------------
  1075.  
  1076. local function subexpr(v, limit)
  1077.   -- subexpr -> (simpleexp | unop subexpr) { binop subexpr }
  1078.   --   * where 'binop' is any binary operator with a priority
  1079.   --     higher than 'limit'
  1080.   local op = tok
  1081.   local uop = unopr[op]
  1082.   if uop then
  1083.     nextt()
  1084.     subexpr(v, UNARY_PRIORITY)
  1085.   else
  1086.     simpleexp(v)
  1087.   end
  1088.   -- expand while operators have priorities higher than 'limit'
  1089.   op = tok
  1090.   local binop = binopr_left[op]
  1091.   while binop and binop > limit do
  1092.     local v2 = {}
  1093.     nextt()
  1094.     -- read sub-expression with higher priority
  1095.     local nextop = subexpr(v2, binopr_right[op])
  1096.     op = nextop
  1097.     binop = binopr_left[op]
  1098.   end
  1099.   return op  -- return first untreated operator
  1100. end
  1101.  
  1102. ----------------------------------------------------------------------
  1103. -- Expression parsing starts here. Function subexpr is entered with the
  1104. -- left operator (which is non-existent) priority of -1, which is lower
  1105. -- than all actual operators. Expr information is returned in parm v.
  1106. -- * used in cond(), explist1(), index(), recfield(), listfield(),
  1107. --   prefixexp(), while_stat(), exp1()
  1108. ----------------------------------------------------------------------
  1109.  
  1110. -- this is a forward-referenced local
  1111. function expr(v)
  1112.   -- expr -> subexpr
  1113.   subexpr(v, 0)
  1114. end
  1115.  
  1116. --[[--------------------------------------------------------------------
  1117. -- third level parsing functions
  1118. ----------------------------------------------------------------------]]
  1119.  
  1120. ------------------------------------------------------------------------
  1121. -- parse a variable assignment sequence
  1122. -- * recursively called
  1123. -- * used in expr_stat()
  1124. ------------------------------------------------------------------------
  1125.  
  1126. local function assignment(v)
  1127.   local e = {}
  1128.   local c = v.v.k
  1129.   check_condition(c == "VLOCAL" or c == "VUPVAL" or c == "VGLOBAL"
  1130.                   or c == "VINDEXED", "syntax error")
  1131.   if testnext(",") then  -- assignment -> ',' primaryexp assignment
  1132.     local nv = {}  -- expdesc
  1133.     nv.v = {}
  1134.     primaryexp(nv.v)
  1135.     -- lparser.c deals with some register usage conflict here
  1136.     assignment(nv)
  1137.   else  -- assignment -> '=' explist1
  1138.     checknext("=")
  1139.     explist1(e)
  1140.     return  -- avoid default
  1141.   end
  1142.   e.k = "VNONRELOC"
  1143. end
  1144.  
  1145. ----------------------------------------------------------------------
  1146. -- parse a for loop body for both versions of the for loop
  1147. -- * used in fornum(), forlist()
  1148. ----------------------------------------------------------------------
  1149.  
  1150. local function forbody(nvars, isnum)
  1151.   -- forbody -> DO block
  1152.   checknext("do")
  1153.   enterblock(false)  -- scope for declared variables
  1154.   adjustlocalvars(nvars)
  1155.   block()
  1156.   leaveblock()  -- end of scope for declared variables
  1157. end
  1158.  
  1159. ----------------------------------------------------------------------
  1160. -- parse a numerical for loop, calls forbody()
  1161. -- * used in for_stat()
  1162. ----------------------------------------------------------------------
  1163.  
  1164. local function fornum(varname)
  1165.   -- fornum -> NAME = exp1, exp1 [, exp1] DO body
  1166.   local line = line
  1167.   new_localvarliteral("(for index)")
  1168.   new_localvarliteral("(for limit)")
  1169.   new_localvarliteral("(for step)")
  1170.   new_localvar(varname)
  1171.   checknext("=")
  1172.   exp1()  -- initial value
  1173.   checknext(",")
  1174.   exp1()  -- limit
  1175.   if testnext(",") then
  1176.     exp1()  -- optional step
  1177.   else
  1178.     -- default step = 1
  1179.   end
  1180.   forbody(1, true)
  1181. end
  1182.  
  1183. ----------------------------------------------------------------------
  1184. -- parse a generic for loop, calls forbody()
  1185. -- * used in for_stat()
  1186. ----------------------------------------------------------------------
  1187.  
  1188. local function forlist(indexname)
  1189.   -- forlist -> NAME {, NAME} IN explist1 DO body
  1190.   local e = {}
  1191.   -- create control variables
  1192.   new_localvarliteral("(for generator)")
  1193.   new_localvarliteral("(for state)")
  1194.   new_localvarliteral("(for control)")
  1195.   -- create declared variables
  1196.   new_localvar(indexname)
  1197.   local nvars = 1
  1198.   while testnext(",") do
  1199.     new_localvar(str_checkname())
  1200.     nvars = nvars + 1
  1201.   end
  1202.   checknext("in")
  1203.   local line = line
  1204.   explist1(e)
  1205.   forbody(nvars, false)
  1206. end
  1207.  
  1208. ----------------------------------------------------------------------
  1209. -- parse a function name specification
  1210. -- * used in func_stat()
  1211. ----------------------------------------------------------------------
  1212.  
  1213. local function funcname(v)
  1214.   -- funcname -> NAME {field} [':' NAME]
  1215.   local needself = false
  1216.   singlevar(v)
  1217.   while tok == "." do
  1218.     field(v)
  1219.   end
  1220.   if tok == ":" then
  1221.     needself = true
  1222.     field(v)
  1223.   end
  1224.   return needself
  1225. end
  1226.  
  1227. ----------------------------------------------------------------------
  1228. -- parse the single expressions needed in numerical for loops
  1229. -- * used in fornum()
  1230. ----------------------------------------------------------------------
  1231.  
  1232. -- this is a forward-referenced local
  1233. function exp1()
  1234.   -- exp1 -> expr
  1235.   local e = {}
  1236.   expr(e)
  1237. end
  1238.  
  1239. ----------------------------------------------------------------------
  1240. -- parse condition in a repeat statement or an if control structure
  1241. -- * used in repeat_stat(), test_then_block()
  1242. ----------------------------------------------------------------------
  1243.  
  1244. local function cond()
  1245.   -- cond -> expr
  1246.   local v = {}
  1247.   expr(v)  -- read condition
  1248. end
  1249.  
  1250. ----------------------------------------------------------------------
  1251. -- parse part of an if control structure, including the condition
  1252. -- * used in if_stat()
  1253. ----------------------------------------------------------------------
  1254.  
  1255. local function test_then_block()
  1256.   -- test_then_block -> [IF | ELSEIF] cond THEN block
  1257.   nextt()  -- skip IF or ELSEIF
  1258.   cond()
  1259.   checknext("then")
  1260.   block()  -- 'then' part
  1261. end
  1262.  
  1263. ----------------------------------------------------------------------
  1264. -- parse a local function statement
  1265. -- * used in local_stat()
  1266. ----------------------------------------------------------------------
  1267.  
  1268. local function localfunc()
  1269.   -- localfunc -> NAME body
  1270.   local v, b = {}
  1271.   new_localvar(str_checkname())
  1272.   v.k = "VLOCAL"
  1273.   adjustlocalvars(1)
  1274.   body(b, false, ln)
  1275. end
  1276.  
  1277. ----------------------------------------------------------------------
  1278. -- parse a local variable declaration statement
  1279. -- * used in local_stat()
  1280. ----------------------------------------------------------------------
  1281.  
  1282. local function localstat()
  1283.   -- localstat -> NAME {',' NAME} ['=' explist1]
  1284.   local nvars = 0
  1285.   local e = {}
  1286.   repeat
  1287.     new_localvar(str_checkname())
  1288.     nvars = nvars + 1
  1289.   until not testnext(",")
  1290.   if testnext("=") then
  1291.     explist1(e)
  1292.   else
  1293.     e.k = "VVOID"
  1294.   end
  1295.   adjustlocalvars(nvars)
  1296. end
  1297.  
  1298. ----------------------------------------------------------------------
  1299. -- parse a list of comma-separated expressions
  1300. -- * used in return_stat(), localstat(), funcargs(), assignment(),
  1301. --   forlist()
  1302. ----------------------------------------------------------------------
  1303.  
  1304. -- this is a forward-referenced local
  1305. function explist1(e)
  1306.   -- explist1 -> expr { ',' expr }
  1307.   expr(e)
  1308.   while testnext(",") do
  1309.     expr(e)
  1310.   end
  1311. end
  1312.  
  1313. ----------------------------------------------------------------------
  1314. -- parse function declaration body
  1315. -- * used in simpleexp(), localfunc(), func_stat()
  1316. ----------------------------------------------------------------------
  1317.  
  1318. -- this is a forward-referenced local
  1319. function body(e, needself, line)
  1320.   -- body ->  '(' parlist ')' chunk END
  1321.   open_func()
  1322.   checknext("(")
  1323.   if needself then
  1324.     new_localvarliteral("self", true)
  1325.     adjustlocalvars(1)
  1326.   end
  1327.   parlist()
  1328.   checknext(")")
  1329.   chunk()
  1330.   check_match("end", "function", line)
  1331.   close_func()
  1332. end
  1333.  
  1334. ----------------------------------------------------------------------
  1335. -- parse a code block or unit
  1336. -- * used in do_stat(), while_stat(), forbody(), test_then_block(),
  1337. --   if_stat()
  1338. ----------------------------------------------------------------------
  1339.  
  1340. -- this is a forward-referenced local
  1341. function block()
  1342.   -- block -> chunk
  1343.   enterblock(false)
  1344.   chunk()
  1345.   leaveblock()
  1346. end
  1347.  
  1348. --[[--------------------------------------------------------------------
  1349. -- second level parsing functions, all with '_stat' suffix
  1350. -- * since they are called via a table lookup, they cannot be local
  1351. --   functions (a lookup table of local functions might be smaller...)
  1352. -- * stat() -> *_stat()
  1353. ----------------------------------------------------------------------]]
  1354.  
  1355. ----------------------------------------------------------------------
  1356. -- initial parsing for a for loop, calls fornum() or forlist()
  1357. -- * removed 'line' parameter (used to set debug information only)
  1358. -- * used in stat()
  1359. ----------------------------------------------------------------------
  1360.  
  1361. local function for_stat()
  1362.   -- stat -> for_stat -> FOR (fornum | forlist) END
  1363.   local line = line
  1364.   enterblock(true)  -- scope for loop and control variables
  1365.   nextt()  -- skip 'for'
  1366.   local varname = str_checkname()  -- first variable name
  1367.   local c = tok
  1368.   if c == "=" then
  1369.     fornum(varname)
  1370.   elseif c == "," or c == "in" then
  1371.     forlist(varname)
  1372.   else
  1373.     syntaxerror("'=' or 'in' expected")
  1374.   end
  1375.   check_match("end", "for", line)
  1376.   leaveblock()  -- loop scope (`break' jumps to this point)
  1377. end
  1378.  
  1379. ----------------------------------------------------------------------
  1380. -- parse a while-do control structure, body processed by block()
  1381. -- * used in stat()
  1382. ----------------------------------------------------------------------
  1383.  
  1384. local function while_stat()
  1385.   -- stat -> while_stat -> WHILE cond DO block END
  1386.   local line = line
  1387.   nextt()  -- skip WHILE
  1388.   cond()  -- parse condition
  1389.   enterblock(true)
  1390.   checknext("do")
  1391.   block()
  1392.   check_match("end", "while", line)
  1393.   leaveblock()
  1394. end
  1395.  
  1396. ----------------------------------------------------------------------
  1397. -- parse a repeat-until control structure, body parsed by chunk()
  1398. -- * originally, repeatstat() calls breakstat() too if there is an
  1399. --   upvalue in the scope block; nothing is actually lexed, it is
  1400. --   actually the common code in breakstat() for closing of upvalues
  1401. -- * used in stat()
  1402. ----------------------------------------------------------------------
  1403.  
  1404. local function repeat_stat()
  1405.   -- stat -> repeat_stat -> REPEAT block UNTIL cond
  1406.   local line = line
  1407.   enterblock(true)  -- loop block
  1408.   enterblock(false)  -- scope block
  1409.   nextt()  -- skip REPEAT
  1410.   chunk()
  1411.   check_match("until", "repeat", line)
  1412.   cond()
  1413.   -- close upvalues at scope level below
  1414.   leaveblock()  -- finish scope
  1415.   leaveblock()  -- finish loop
  1416. end
  1417.  
  1418. ----------------------------------------------------------------------
  1419. -- parse an if control structure
  1420. -- * used in stat()
  1421. ----------------------------------------------------------------------
  1422.  
  1423. local function if_stat()
  1424.   -- stat -> if_stat -> IF cond THEN block
  1425.   --                    {ELSEIF cond THEN block} [ELSE block] END
  1426.   local line = line
  1427.   local v = {}
  1428.   test_then_block()  -- IF cond THEN block
  1429.   while tok == "elseif" do
  1430.     test_then_block()  -- ELSEIF cond THEN block
  1431.   end
  1432.   if tok == "else" then
  1433.     nextt()  -- skip ELSE
  1434.     block()  -- 'else' part
  1435.   end
  1436.   check_match("end", "if", line)
  1437. end
  1438.  
  1439. ----------------------------------------------------------------------
  1440. -- parse a return statement
  1441. -- * used in stat()
  1442. ----------------------------------------------------------------------
  1443.  
  1444. local function return_stat()
  1445.   -- stat -> return_stat -> RETURN explist
  1446.   local e = {}
  1447.   nextt()  -- skip RETURN
  1448.   local c = tok
  1449.   if block_follow[c] or c == ";" then
  1450.     -- return no values
  1451.   else
  1452.     explist1(e)  -- optional return values
  1453.   end
  1454. end
  1455.  
  1456. ----------------------------------------------------------------------
  1457. -- parse a break statement
  1458. -- * used in stat()
  1459. ----------------------------------------------------------------------
  1460.  
  1461. local function break_stat()
  1462.   -- stat -> break_stat -> BREAK
  1463.   local bl = fs.bl
  1464.   nextt()  -- skip BREAK
  1465.   while bl and not bl.isbreakable do -- find a breakable block
  1466.     bl = bl.prev
  1467.   end
  1468.   if not bl then
  1469.     syntaxerror("no loop to break")
  1470.   end
  1471. end
  1472.  
  1473. ----------------------------------------------------------------------
  1474. -- parse a function call with no returns or an assignment statement
  1475. -- * the struct with .prev is used for name searching in lparse.c,
  1476. --   so it is retained for now; present in assignment() also
  1477. -- * used in stat()
  1478. ----------------------------------------------------------------------
  1479.  
  1480. local function expr_stat()
  1481.   local id = tpos - 1
  1482.   -- stat -> expr_stat -> func | assignment
  1483.   local v = {}
  1484.   v.v = {}
  1485.   primaryexp(v.v)
  1486.   if v.v.k == "VCALL" then  -- stat -> func
  1487.     -- call statement uses no results
  1488.     statinfo[id] = "call"
  1489.   else  -- stat -> assignment
  1490.     v.prev = nil
  1491.     assignment(v)
  1492.     statinfo[id] = "assign"
  1493.   end
  1494. end
  1495.  
  1496. ----------------------------------------------------------------------
  1497. -- parse a function statement
  1498. -- * used in stat()
  1499. ----------------------------------------------------------------------
  1500.  
  1501. local function function_stat()
  1502.   -- stat -> function_stat -> FUNCTION funcname body
  1503.   local line = line
  1504.   local v, b = {}, {}
  1505.   nextt()  -- skip FUNCTION
  1506.   local needself = funcname(v)
  1507.   body(b, needself, line)
  1508. end
  1509.  
  1510. ----------------------------------------------------------------------
  1511. -- parse a simple block enclosed by a DO..END pair
  1512. -- * used in stat()
  1513. ----------------------------------------------------------------------
  1514.  
  1515. local function do_stat()
  1516.   -- stat -> do_stat -> DO block END
  1517.   local line = line
  1518.   nextt()  -- skip DO
  1519.   block()
  1520.   check_match("end", "do", line)
  1521. end
  1522.  
  1523. ----------------------------------------------------------------------
  1524. -- parse a statement starting with LOCAL
  1525. -- * used in stat()
  1526. ----------------------------------------------------------------------
  1527.  
  1528. local function local_stat()
  1529.   -- stat -> local_stat -> LOCAL FUNCTION localfunc
  1530.   --                    -> LOCAL localstat
  1531.   nextt()  -- skip LOCAL
  1532.   if testnext("function") then  -- local function?
  1533.     localfunc()
  1534.   else
  1535.     localstat()
  1536.   end
  1537. end
  1538.  
  1539. --[[--------------------------------------------------------------------
  1540. -- main functions, top level parsing functions
  1541. -- * accessible functions are: init(lexer), parser()
  1542. -- * [entry] -> parser() -> chunk() -> stat()
  1543. ----------------------------------------------------------------------]]
  1544.  
  1545. ----------------------------------------------------------------------
  1546. -- initial parsing for statements, calls '_stat' suffixed functions
  1547. -- * used in chunk()
  1548. ----------------------------------------------------------------------
  1549.  
  1550. local stat_call = {             -- lookup for calls in stat()
  1551.   ["if"] = if_stat,
  1552.   ["while"] = while_stat,
  1553.   ["do"] = do_stat,
  1554.   ["for"] = for_stat,
  1555.   ["repeat"] = repeat_stat,
  1556.   ["function"] = function_stat,
  1557.   ["local"] = local_stat,
  1558.   ["return"] = return_stat,
  1559.   ["break"] = break_stat,
  1560. }
  1561.  
  1562. local function stat()
  1563.   -- stat -> if_stat while_stat do_stat for_stat repeat_stat
  1564.   --         function_stat local_stat return_stat break_stat
  1565.   --         expr_stat
  1566.   line = ln  -- may be needed for error messages
  1567.   local c = tok
  1568.   local fn = stat_call[c]
  1569.   -- handles: if while do for repeat function local return break
  1570.   if fn then
  1571.     statinfo[tpos - 1] = c
  1572.     fn()
  1573.     -- return or break must be last statement
  1574.     if c == "return" or c == "break" then return true end
  1575.   else
  1576.     expr_stat()
  1577.   end
  1578.   return false
  1579. end
  1580.  
  1581. ----------------------------------------------------------------------
  1582. -- parse a chunk, which consists of a bunch of statements
  1583. -- * used in parser(), body(), block(), repeat_stat()
  1584. ----------------------------------------------------------------------
  1585.  
  1586. -- this is a forward-referenced local
  1587. function chunk()
  1588.   -- chunk -> { stat [';'] }
  1589.   local islast = false
  1590.   while not islast and not block_follow[tok] do
  1591.     islast = stat()
  1592.     testnext(";")
  1593.   end
  1594. end
  1595.  
  1596. ----------------------------------------------------------------------
  1597. -- performs parsing, returns parsed data structure
  1598. ----------------------------------------------------------------------
  1599.  
  1600. function parser()
  1601.   open_func()
  1602.   fs.is_vararg = true  -- main func. is always vararg
  1603.   nextt()  -- read first token
  1604.   chunk()
  1605.   check("<eof>")
  1606.   close_func()
  1607.   return {  -- return everything
  1608.     globalinfo = globalinfo,
  1609.     localinfo = localinfo,
  1610.     statinfo = statinfo,
  1611.     toklist = toklist,
  1612.     seminfolist = seminfolist,
  1613.     toklnlist = toklnlist,
  1614.     xreflist = xreflist,
  1615.   }
  1616. end
  1617.  
  1618. ----------------------------------------------------------------------
  1619. -- initialization function
  1620. ----------------------------------------------------------------------
  1621.  
  1622. function init(tokorig, seminfoorig, toklnorig)
  1623.   tpos = 1                      -- token position
  1624.   top_fs = {}                   -- reset top level function state
  1625.   ------------------------------------------------------------------
  1626.   -- set up grammar-only token tables; impedance-matching...
  1627.   -- note that constants returned by the lexer is source-level, so
  1628.   -- for now, fake(!) constant tokens (TK_NUMBER|TK_STRING|TK_LSTRING)
  1629.   ------------------------------------------------------------------
  1630.   local j = 1
  1631.   toklist, seminfolist, toklnlist, xreflist = {}, {}, {}, {}
  1632.   for i = 1, #tokorig do
  1633.     local tok = tokorig[i]
  1634.     local yep = true
  1635.     if tok == "TK_KEYWORD" or tok == "TK_OP" then
  1636.       tok = seminfoorig[i]
  1637.     elseif tok == "TK_NAME" then
  1638.       tok = "<name>"
  1639.       seminfolist[j] = seminfoorig[i]
  1640.     elseif tok == "TK_NUMBER" then
  1641.       tok = "<number>"
  1642.       seminfolist[j] = 0  -- fake!
  1643.     elseif tok == "TK_STRING" or tok == "TK_LSTRING" then
  1644.       tok = "<string>"
  1645.       seminfolist[j] = ""  -- fake!
  1646.     elseif tok == "TK_EOS" then
  1647.       tok = "<eof>"
  1648.     else
  1649.       -- non-grammar tokens; ignore them
  1650.       yep = false
  1651.     end
  1652.     if yep then  -- set rest of the information
  1653.       toklist[j] = tok
  1654.       toklnlist[j] = toklnorig[i]
  1655.       xreflist[j] = i
  1656.       j = j + 1
  1657.     end
  1658.   end--for
  1659.   ------------------------------------------------------------------
  1660.   -- initialize data structures for variable tracking
  1661.   ------------------------------------------------------------------
  1662.   globalinfo, globallookup, localinfo = {}, {}, {}
  1663.   ilocalinfo, ilocalrefs = {}, {}
  1664.   statinfo = {}  -- experimental
  1665. end
  1666. --end of inserted module
  1667. end
  1668.  
  1669. -- preload function for module optlex
  1670. preload.optlex =
  1671. function()
  1672. --start of inserted module
  1673. module "optlex"
  1674.  
  1675. local string = base.require "string"
  1676. local match = string.match
  1677. local sub = string.sub
  1678. local find = string.find
  1679. local rep = string.rep
  1680. local print
  1681.  
  1682. ------------------------------------------------------------------------
  1683. -- variables and data structures
  1684. ------------------------------------------------------------------------
  1685.  
  1686. -- error function, can override by setting own function into module
  1687. error = base.error
  1688.  
  1689. warn = {}                       -- table for warning flags
  1690.  
  1691. local stoks, sinfos, stoklns    -- source lists
  1692.  
  1693. local is_realtoken = {          -- significant (grammar) tokens
  1694.   TK_KEYWORD = true,
  1695.   TK_NAME = true,
  1696.   TK_NUMBER = true,
  1697.   TK_STRING = true,
  1698.   TK_LSTRING = true,
  1699.   TK_OP = true,
  1700.   TK_EOS = true,
  1701. }
  1702. local is_faketoken = {          -- whitespace (non-grammar) tokens
  1703.   TK_COMMENT = true,
  1704.   TK_LCOMMENT = true,
  1705.   TK_EOL = true,
  1706.   TK_SPACE = true,
  1707. }
  1708.  
  1709. local opt_details               -- for extra information
  1710.  
  1711. ------------------------------------------------------------------------
  1712. -- true if current token is at the start of a line
  1713. -- * skips over deleted tokens via recursion
  1714. ------------------------------------------------------------------------
  1715.  
  1716. local function atlinestart(i)
  1717.   local tok = stoks[i - 1]
  1718.   if i <= 1 or tok == "TK_EOL" then
  1719.     return true
  1720.   elseif tok == "" then
  1721.     return atlinestart(i - 1)
  1722.   end
  1723.   return false
  1724. end
  1725.  
  1726. ------------------------------------------------------------------------
  1727. -- true if current token is at the end of a line
  1728. -- * skips over deleted tokens via recursion
  1729. ------------------------------------------------------------------------
  1730.  
  1731. local function atlineend(i)
  1732.   local tok = stoks[i + 1]
  1733.   if i >= #stoks or tok == "TK_EOL" or tok == "TK_EOS" then
  1734.     return true
  1735.   elseif tok == "" then
  1736.     return atlineend(i + 1)
  1737.   end
  1738.   return false
  1739. end
  1740.  
  1741. ------------------------------------------------------------------------
  1742. -- counts comment EOLs inside a long comment
  1743. -- * in order to keep line numbering, EOLs need to be reinserted
  1744. ------------------------------------------------------------------------
  1745.  
  1746. local function commenteols(lcomment)
  1747.   local sep = #match(lcomment, "^%-%-%[=*%[")
  1748.   local z = sub(lcomment, sep + 1, -(sep - 1))  -- remove delims
  1749.   local i, c = 1, 0
  1750.   while true do
  1751.     local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
  1752.     if not p then break end     -- if no matches, done
  1753.     i = p + 1
  1754.     c = c + 1
  1755.     if #s > 0 and r ~= s then   -- skip CRLF or LFCR
  1756.       i = i + 1
  1757.     end
  1758.   end
  1759.   return c
  1760. end
  1761.  
  1762. ------------------------------------------------------------------------
  1763. -- compares two tokens (i, j) and returns the whitespace required
  1764. -- * see documentation for a reference table of interactions
  1765. -- * only two grammar/real tokens are being considered
  1766. -- * if "", no separation is needed
  1767. -- * if " ", then at least one whitespace (or EOL) is required
  1768. -- * NOTE: this doesn't work at the start or the end or for EOS!
  1769. ------------------------------------------------------------------------
  1770.  
  1771. local function checkpair(i, j)
  1772.   local match = match
  1773.   local t1, t2 = stoks[i], stoks[j]
  1774.   --------------------------------------------------------------------
  1775.   if t1 == "TK_STRING" or t1 == "TK_LSTRING" or
  1776.      t2 == "TK_STRING" or t2 == "TK_LSTRING" then
  1777.     return ""
  1778.   --------------------------------------------------------------------
  1779.   elseif t1 == "TK_OP" or t2 == "TK_OP" then
  1780.     if (t1 == "TK_OP" and (t2 == "TK_KEYWORD" or t2 == "TK_NAME")) or
  1781.        (t2 == "TK_OP" and (t1 == "TK_KEYWORD" or t1 == "TK_NAME")) then
  1782.       return ""
  1783.     end
  1784.     if t1 == "TK_OP" and t2 == "TK_OP" then
  1785.       -- for TK_OP/TK_OP pairs, see notes in technotes.txt
  1786.       local op, op2 = sinfos[i], sinfos[j]
  1787.       if (match(op, "^%.%.?$") and match(op2, "^%.")) or
  1788.          (match(op, "^[~=<>]$") and op2 == "=") or
  1789.          (op == "[" and (op2 == "[" or op2 == "=")) then
  1790.         return " "
  1791.       end
  1792.       return ""
  1793.     end
  1794.     -- "TK_OP" + "TK_NUMBER" case
  1795.     local op = sinfos[i]
  1796.     if t2 == "TK_OP" then op = sinfos[j] end
  1797.     if match(op, "^%.%.?%.?$") then
  1798.       return " "
  1799.     end
  1800.     return ""
  1801.   --------------------------------------------------------------------
  1802.   else-- "TK_KEYWORD" | "TK_NAME" | "TK_NUMBER" then
  1803.     return " "
  1804.   --------------------------------------------------------------------
  1805.   end
  1806. end
  1807.  
  1808. ------------------------------------------------------------------------
  1809. -- repack tokens, removing deletions caused by optimization process
  1810. ------------------------------------------------------------------------
  1811.  
  1812. local function repack_tokens()
  1813.   local dtoks, dinfos, dtoklns = {}, {}, {}
  1814.   local j = 1
  1815.   for i = 1, #stoks do
  1816.     local tok = stoks[i]
  1817.     if tok ~= "" then
  1818.       dtoks[j], dinfos[j], dtoklns[j] = tok, sinfos[i], stoklns[i]
  1819.       j = j + 1
  1820.     end
  1821.   end
  1822.   stoks, sinfos, stoklns = dtoks, dinfos, dtoklns
  1823. end
  1824.  
  1825. ------------------------------------------------------------------------
  1826. -- number optimization
  1827. -- * optimization using string formatting functions is one way of doing
  1828. --   this, but here, we consider all cases and handle them separately
  1829. --   (possibly an idiotic approach...)
  1830. -- * scientific notation being generated is not in canonical form, this
  1831. --   may or may not be a bad thing
  1832. -- * note: intermediate portions need to fit into a normal number range
  1833. -- * optimizations can be divided based on number patterns:
  1834. -- * hexadecimal:
  1835. --   (1) no need to remove leading zeros, just skip to (2)
  1836. --   (2) convert to integer if size equal or smaller
  1837. --       * change if equal size -> lose the 'x' to reduce entropy
  1838. --   (3) number is then processed as an integer
  1839. --   (4) note: does not make 0[xX] consistent
  1840. -- * integer:
  1841. --   (1) note: includes anything with trailing ".", ".0", ...
  1842. --   (2) remove useless fractional part, if present, e.g. 123.000
  1843. --   (3) remove leading zeros, e.g. 000123
  1844. --   (4) switch to scientific if shorter, e.g. 123000 -> 123e3
  1845. -- * with fraction:
  1846. --   (1) split into digits dot digits
  1847. --   (2) if no integer portion, take as zero (can omit later)
  1848. --   (3) handle degenerate .000 case, after which the fractional part
  1849. --       must be non-zero (if zero, it's matched as an integer)
  1850. --   (4) remove trailing zeros for fractional portion
  1851. --   (5) p.q where p > 0 and q > 0 cannot be shortened any more
  1852. --   (6) otherwise p == 0 and the form is .q, e.g. .000123
  1853. --   (7) if scientific shorter, convert, e.g. .000123 -> 123e-6
  1854. -- * scientific:
  1855. --   (1) split into (digits dot digits) [eE] ([+-] digits)
  1856. --   (2) if significand has ".", shift it out so it becomes an integer
  1857. --   (3) if significand is zero, just use zero
  1858. --   (4) remove leading zeros for significand
  1859. --   (5) shift out trailing zeros for significand
  1860. --   (6) examine exponent and determine which format is best:
  1861. --       integer, with fraction, scientific
  1862. ------------------------------------------------------------------------
  1863.  
  1864. local function do_number(i)
  1865.   local before = sinfos[i]      -- 'before'
  1866.   local z = before              -- working representation
  1867.   local y                       -- 'after', if better
  1868.   --------------------------------------------------------------------
  1869.   if match(z, "^0[xX]") then            -- hexadecimal number
  1870.     local v = base.tostring(base.tonumber(z))
  1871.     if #v <= #z then
  1872.       z = v  -- change to integer, AND continue
  1873.     else
  1874.       return  -- no change; stick to hex
  1875.     end
  1876.   end
  1877.   --------------------------------------------------------------------
  1878.   if match(z, "^%d+%.?0*$") then        -- integer or has useless frac
  1879.     z = match(z, "^(%d+)%.?0*$")  -- int portion only
  1880.     if z + 0 > 0 then
  1881.       z = match(z, "^0*([1-9]%d*)$")  -- remove leading zeros
  1882.       local v = #match(z, "0*$")
  1883.       local nv = base.tostring(v)
  1884.       if v > #nv + 1 then  -- scientific is shorter
  1885.         z = sub(z, 1, #z - v).."e"..nv
  1886.       end
  1887.       y = z
  1888.     else
  1889.       y = "0"  -- basic zero
  1890.     end
  1891.   --------------------------------------------------------------------
  1892.   elseif not match(z, "[eE]") then      -- number with fraction part
  1893.     local p, q = match(z, "^(%d*)%.(%d+)$")  -- split
  1894.     if p == "" then p = 0 end  -- int part zero
  1895.     if q + 0 == 0 and p == 0 then
  1896.       y = "0"  -- degenerate .000 case
  1897.     else
  1898.       -- now, q > 0 holds and p is a number
  1899.       local v = #match(q, "0*$")  -- remove trailing zeros
  1900.       if v > 0 then
  1901.         q = sub(q, 1, #q - v)
  1902.       end
  1903.       -- if p > 0, nothing else we can do to simplify p.q case
  1904.       if p + 0 > 0 then
  1905.         y = p.."."..q
  1906.       else
  1907.         y = "."..q  -- tentative, e.g. .000123
  1908.         local v = #match(q, "^0*")  -- # leading spaces
  1909.         local w = #q - v            -- # significant digits
  1910.         local nv = base.tostring(#q)
  1911.         -- e.g. compare 123e-6 versus .000123
  1912.         if w + 2 + #nv < 1 + #q then
  1913.           y = sub(q, -w).."e-"..nv
  1914.         end
  1915.       end
  1916.     end
  1917.   --------------------------------------------------------------------
  1918.   else                                  -- scientific number
  1919.     local sig, ex = match(z, "^([^eE]+)[eE]([%+%-]?%d+)$")
  1920.     ex = base.tonumber(ex)
  1921.     -- if got ".", shift out fractional portion of significand
  1922.     local p, q = match(sig, "^(%d*)%.(%d*)$")
  1923.     if p then
  1924.       ex = ex - #q
  1925.       sig = p..q
  1926.     end
  1927.     if sig + 0 == 0 then
  1928.       y = "0"  -- basic zero
  1929.     else
  1930.       local v = #match(sig, "^0*")  -- remove leading zeros
  1931.       sig = sub(sig, v + 1)
  1932.       v = #match(sig, "0*$") -- shift out trailing zeros
  1933.       if v > 0 then
  1934.         sig = sub(sig, 1, #sig - v)
  1935.         ex = ex + v
  1936.       end
  1937.       -- examine exponent and determine which format is best
  1938.       local nex = base.tostring(ex)
  1939.       if ex == 0 then  -- it's just an integer
  1940.         y = sig
  1941.       elseif ex > 0 and (ex <= 1 + #nex) then  -- a number
  1942.         y = sig..rep("0", ex)
  1943.       elseif ex < 0 and (ex >= -#sig) then  -- fraction, e.g. .123
  1944.         v = #sig + ex
  1945.         y = sub(sig, 1, v).."."..sub(sig, v + 1)
  1946.       elseif ex < 0 and (#nex >= -ex - #sig) then
  1947.         -- e.g. compare 1234e-5 versus .01234
  1948.         -- gives: #sig + 1 + #nex >= 1 + (-ex - #sig) + #sig
  1949.         --     -> #nex >= -ex - #sig
  1950.         v = -ex - #sig
  1951.         y = "."..rep("0", v)..sig
  1952.       else  -- non-canonical scientific representation
  1953.         y = sig.."e"..ex
  1954.       end
  1955.     end--if sig
  1956.   end
  1957.   --------------------------------------------------------------------
  1958.   if y and y ~= sinfos[i] then
  1959.     if opt_details then
  1960.       print("<number> (line "..stoklns[i]..") "..sinfos[i].." -> "..y)
  1961.       opt_details = opt_details + 1
  1962.     end
  1963.     sinfos[i] = y
  1964.   end
  1965. end
  1966.  
  1967. ------------------------------------------------------------------------
  1968. -- string optimization
  1969. -- * note: works on well-formed strings only!
  1970. -- * optimizations on characters can be summarized as follows:
  1971. --   \a\b\f\n\r\t\v -- no change
  1972. --   \\ -- no change
  1973. --   \"\' -- depends on delim, other can remove \
  1974. --   \[\] -- remove \
  1975. --   \<char> -- general escape, remove \
  1976. --   \<eol> -- normalize the EOL only
  1977. --   \ddd -- if \a\b\f\n\r\t\v, change to latter
  1978. --           if other < ascii 32, keep ddd but zap leading zeros
  1979. --                                but cannot have following digits
  1980. --           if >= ascii 32, translate it into the literal, then also
  1981. --                           do escapes for \\,\",\' cases
  1982. --   <other> -- no change
  1983. -- * switch delimiters if string becomes shorter
  1984. ------------------------------------------------------------------------
  1985.  
  1986. local function do_string(I)
  1987.   local info = sinfos[I]
  1988.   local delim = sub(info, 1, 1)                 -- delimiter used
  1989.   local ndelim = (delim == "'") and '"' or "'"  -- opposite " <-> '
  1990.   local z = sub(info, 2, -2)                    -- actual string
  1991.   local i = 1
  1992.   local c_delim, c_ndelim = 0, 0                -- "/' counts
  1993.   --------------------------------------------------------------------
  1994.   while i <= #z do
  1995.     local c = sub(z, i, i)
  1996.     ----------------------------------------------------------------
  1997.     if c == "\\" then                   -- escaped stuff
  1998.       local j = i + 1
  1999.       local d = sub(z, j, j)
  2000.       local p = find("abfnrtv\\\n\r\"\'0123456789", d, 1, true)
  2001.       ------------------------------------------------------------
  2002.       if not p then                     -- \<char> -- remove \
  2003.         z = sub(z, 1, i - 1)..sub(z, j)
  2004.         i = i + 1
  2005.       ------------------------------------------------------------
  2006.       elseif p <= 8 then                -- \a\b\f\n\r\t\v\\
  2007.         i = i + 2                       -- no change
  2008.       ------------------------------------------------------------
  2009.       elseif p <= 10 then               -- \<eol> -- normalize EOL
  2010.         local eol = sub(z, j, j + 1)
  2011.         if eol == "\r\n" or eol == "\n\r" then
  2012.           z = sub(z, 1, i).."\n"..sub(z, j + 2)
  2013.         elseif p == 10 then  -- \r case
  2014.           z = sub(z, 1, i).."\n"..sub(z, j + 1)
  2015.         end
  2016.         i = i + 2
  2017.       ------------------------------------------------------------
  2018.       elseif p <= 12 then               -- \"\' -- remove \ for ndelim
  2019.         if d == delim then
  2020.           c_delim = c_delim + 1
  2021.           i = i + 2
  2022.         else
  2023.           c_ndelim = c_ndelim + 1
  2024.           z = sub(z, 1, i - 1)..sub(z, j)
  2025.           i = i + 1
  2026.         end
  2027.       ------------------------------------------------------------
  2028.       else                              -- \ddd -- various steps
  2029.         local s = match(z, "^(%d%d?%d?)", j)
  2030.         j = i + 1 + #s                  -- skip to location
  2031.         local cv = s + 0
  2032.         local cc = string.char(cv)
  2033.         local p = find("\a\b\f\n\r\t\v", cc, 1, true)
  2034.         if p then                       -- special escapes
  2035.           s = "\\"..sub("abfnrtv", p, p)
  2036.         elseif cv < 32 then             -- normalized \ddd
  2037.           if match(sub(z, j, j), "%d") then
  2038.             -- if a digit follows, \ddd cannot be shortened
  2039.             s = "\\"..s
  2040.           else
  2041.             s = "\\"..cv
  2042.           end
  2043.         elseif cc == delim then         -- \<delim>
  2044.           s = "\\"..cc
  2045.           c_delim = c_delim + 1
  2046.         elseif cc == "\\" then          -- \\
  2047.           s = "\\\\"
  2048.         else                            -- literal character
  2049.           s = cc
  2050.           if cc == ndelim then
  2051.             c_ndelim = c_ndelim + 1
  2052.           end
  2053.         end
  2054.         z = sub(z, 1, i - 1)..s..sub(z, j)
  2055.         i = i + #s
  2056.       ------------------------------------------------------------
  2057.       end--if p
  2058.     ----------------------------------------------------------------
  2059.     else-- c ~= "\\"                    -- <other> -- no change
  2060.       i = i + 1
  2061.       if c == ndelim then  -- count ndelim, for switching delimiters
  2062.         c_ndelim = c_ndelim + 1
  2063.       end
  2064.     ----------------------------------------------------------------
  2065.     end--if c
  2066.   end--while
  2067.   --------------------------------------------------------------------
  2068.   -- switching delimiters, a long-winded derivation:
  2069.   -- (1) delim takes 2+2*c_delim bytes, ndelim takes c_ndelim bytes
  2070.   -- (2) delim becomes c_delim bytes, ndelim becomes 2+2*c_ndelim bytes
  2071.   -- simplifying the condition (1)>(2) --> c_delim > c_ndelim
  2072.   if c_delim > c_ndelim then
  2073.     i = 1
  2074.     while i <= #z do
  2075.       local p, q, r = find(z, "([\'\"])", i)
  2076.       if not p then break end
  2077.       if r == delim then                -- \<delim> -> <delim>
  2078.         z = sub(z, 1, p - 2)..sub(z, p)
  2079.         i = p
  2080.       else-- r == ndelim                -- <ndelim> -> \<ndelim>
  2081.         z = sub(z, 1, p - 1).."\\"..sub(z, p)
  2082.         i = p + 2
  2083.       end
  2084.     end--while
  2085.     delim = ndelim  -- actually change delimiters
  2086.   end
  2087.   --------------------------------------------------------------------
  2088.   z = delim..z..delim
  2089.   if z ~= sinfos[I] then
  2090.     if opt_details then
  2091.       print("<string> (line "..stoklns[I]..") "..sinfos[I].." -> "..z)
  2092.       opt_details = opt_details + 1
  2093.     end
  2094.     sinfos[I] = z
  2095.   end
  2096. end
  2097.  
  2098. ------------------------------------------------------------------------
  2099. -- long string optimization
  2100. -- * note: warning flagged if trailing whitespace found, not trimmed
  2101. -- * remove first optional newline
  2102. -- * normalize embedded newlines
  2103. -- * reduce '=' separators in delimiters if possible
  2104. ------------------------------------------------------------------------
  2105.  
  2106. local function do_lstring(I)
  2107.   local info = sinfos[I]
  2108.   local delim1 = match(info, "^%[=*%[")  -- cut out delimiters
  2109.   local sep = #delim1
  2110.   local delim2 = sub(info, -sep, -1)
  2111.   local z = sub(info, sep + 1, -(sep + 1))  -- lstring without delims
  2112.   local y = ""
  2113.   local i = 1
  2114.   --------------------------------------------------------------------
  2115.   while true do
  2116.     local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
  2117.     -- deal with a single line
  2118.     local ln
  2119.     if not p then
  2120.       ln = sub(z, i)
  2121.     elseif p >= i then
  2122.       ln = sub(z, i, p - 1)
  2123.     end
  2124.     if ln ~= "" then
  2125.       -- flag a warning if there are trailing spaces, won't optimize!
  2126.       if match(ln, "%s+$") then
  2127.         warn.LSTRING = "trailing whitespace in long string near line "..stoklns[I]
  2128.       end
  2129.       y = y..ln
  2130.     end
  2131.     if not p then  -- done if no more EOLs
  2132.       break
  2133.     end
  2134.     -- deal with line endings, normalize them
  2135.     i = p + 1
  2136.     if p then
  2137.       if #s > 0 and r ~= s then  -- skip CRLF or LFCR
  2138.         i = i + 1
  2139.       end
  2140.       -- skip first newline, which can be safely deleted
  2141.       if not(i == 1 and i == p) then
  2142.         y = y.."\n"
  2143.       end
  2144.     end
  2145.   end--while
  2146.   --------------------------------------------------------------------
  2147.   -- handle possible deletion of one or more '=' separators
  2148.   if sep >= 3 then
  2149.     local chk, okay = sep - 1
  2150.     -- loop to test ending delimiter with less of '=' down to zero
  2151.     while chk >= 2 do
  2152.       local delim = "%]"..rep("=", chk - 2).."%]"
  2153.       if not match(y, delim) then okay = chk end
  2154.       chk = chk - 1
  2155.     end
  2156.     if okay then  -- change delimiters
  2157.       sep = rep("=", okay - 2)
  2158.       delim1, delim2 = "["..sep.."[", "]"..sep.."]"
  2159.     end
  2160.   end
  2161.   --------------------------------------------------------------------
  2162.   sinfos[I] = delim1..y..delim2
  2163. end
  2164.  
  2165. ------------------------------------------------------------------------
  2166. -- long comment optimization
  2167. -- * note: does not remove first optional newline
  2168. -- * trim trailing whitespace
  2169. -- * normalize embedded newlines
  2170. -- * reduce '=' separators in delimiters if possible
  2171. ------------------------------------------------------------------------
  2172.  
  2173. local function do_lcomment(I)
  2174.   local info = sinfos[I]
  2175.   local delim1 = match(info, "^%-%-%[=*%[")  -- cut out delimiters
  2176.   local sep = #delim1
  2177.   local delim2 = sub(info, -(sep - 2), -1)
  2178.   local z = sub(info, sep + 1, -(sep - 1))  -- comment without delims
  2179.   local y = ""
  2180.   local i = 1
  2181.   --------------------------------------------------------------------
  2182.   while true do
  2183.     local p, q, r, s = find(z, "([\r\n])([\r\n]?)", i)
  2184.     -- deal with a single line, extract and check trailing whitespace
  2185.     local ln
  2186.     if not p then
  2187.       ln = sub(z, i)
  2188.     elseif p >= i then
  2189.       ln = sub(z, i, p - 1)
  2190.     end
  2191.     if ln ~= "" then
  2192.       -- trim trailing whitespace if non-empty line
  2193.       local ws = match(ln, "%s*$")
  2194.       if #ws > 0 then ln = sub(ln, 1, -(ws + 1)) end
  2195.       y = y..ln
  2196.     end
  2197.     if not p then  -- done if no more EOLs
  2198.       break
  2199.     end
  2200.     -- deal with line endings, normalize them
  2201.     i = p + 1
  2202.     if p then
  2203.       if #s > 0 and r ~= s then  -- skip CRLF or LFCR
  2204.         i = i + 1
  2205.       end
  2206.       y = y.."\n"
  2207.     end
  2208.   end--while
  2209.   --------------------------------------------------------------------
  2210.   -- handle possible deletion of one or more '=' separators
  2211.   sep = sep - 2
  2212.   if sep >= 3 then
  2213.     local chk, okay = sep - 1
  2214.     -- loop to test ending delimiter with less of '=' down to zero
  2215.     while chk >= 2 do
  2216.       local delim = "%]"..rep("=", chk - 2).."%]"
  2217.       if not match(y, delim) then okay = chk end
  2218.       chk = chk - 1
  2219.     end
  2220.     if okay then  -- change delimiters
  2221.       sep = rep("=", okay - 2)
  2222.       delim1, delim2 = "--["..sep.."[", "]"..sep.."]"
  2223.     end
  2224.   end
  2225.   --------------------------------------------------------------------
  2226.   sinfos[I] = delim1..y..delim2
  2227. end
  2228.  
  2229. ------------------------------------------------------------------------
  2230. -- short comment optimization
  2231. -- * trim trailing whitespace
  2232. ------------------------------------------------------------------------
  2233.  
  2234. local function do_comment(i)
  2235.   local info = sinfos[i]
  2236.   local ws = match(info, "%s*$")        -- just look from end of string
  2237.   if #ws > 0 then
  2238.     info = sub(info, 1, -(ws + 1))      -- trim trailing whitespace
  2239.   end
  2240.   sinfos[i] = info
  2241. end
  2242.  
  2243. ------------------------------------------------------------------------
  2244. -- returns true if string found in long comment
  2245. -- * this is a feature to keep copyright or license texts
  2246. ------------------------------------------------------------------------
  2247.  
  2248. local function keep_lcomment(opt_keep, info)
  2249.   if not opt_keep then return false end  -- option not set
  2250.   local delim1 = match(info, "^%-%-%[=*%[")  -- cut out delimiters
  2251.   local sep = #delim1
  2252.   local delim2 = sub(info, -sep, -1)
  2253.   local z = sub(info, sep + 1, -(sep - 1))  -- comment without delims
  2254.   if find(z, opt_keep, 1, true) then  -- try to match
  2255.     return true
  2256.   end
  2257. end
  2258.  
  2259. ------------------------------------------------------------------------
  2260. -- main entry point
  2261. -- * currently, lexer processing has 2 passes
  2262. -- * processing is done on a line-oriented basis, which is easier to
  2263. --   grok due to the next point...
  2264. -- * since there are various options that can be enabled or disabled,
  2265. --   processing is a little messy or convoluted
  2266. ------------------------------------------------------------------------
  2267.  
  2268. function optimize(option, toklist, semlist, toklnlist)
  2269.   --------------------------------------------------------------------
  2270.   -- set option flags
  2271.   --------------------------------------------------------------------
  2272.   local opt_comments = option["opt-comments"]
  2273.   local opt_whitespace = option["opt-whitespace"]
  2274.   local opt_emptylines = option["opt-emptylines"]
  2275.   local opt_eols = option["opt-eols"]
  2276.   local opt_strings = option["opt-strings"]
  2277.   local opt_numbers = option["opt-numbers"]
  2278.   local opt_x = option["opt-experimental"]
  2279.   local opt_keep = option.KEEP
  2280.   opt_details = option.DETAILS and 0  -- upvalues for details display
  2281.   print = print or base.print
  2282.   if opt_eols then  -- forced settings, otherwise won't work properly
  2283.     opt_comments = true
  2284.     opt_whitespace = true
  2285.     opt_emptylines = true
  2286.   elseif opt_x then
  2287.     opt_whitespace = true
  2288.   end
  2289.   --------------------------------------------------------------------
  2290.   -- variable initialization
  2291.   --------------------------------------------------------------------
  2292.   stoks, sinfos, stoklns                -- set source lists
  2293.     = toklist, semlist, toklnlist
  2294.   local i = 1                           -- token position
  2295.   local tok, info                       -- current token
  2296.   local prev    -- position of last grammar token
  2297.                 -- on same line (for TK_SPACE stuff)
  2298.   --------------------------------------------------------------------
  2299.   -- changes a token, info pair
  2300.   --------------------------------------------------------------------
  2301.   local function settoken(tok, info, I)
  2302.     I = I or i
  2303.     stoks[I] = tok or ""
  2304.     sinfos[I] = info or ""
  2305.   end
  2306.   --------------------------------------------------------------------
  2307.   -- experimental optimization for ';' operator
  2308.   --------------------------------------------------------------------
  2309.   if opt_x then
  2310.     while true do
  2311.       tok, info = stoks[i], sinfos[i]
  2312.       if tok == "TK_EOS" then           -- end of stream/pass
  2313.         break
  2314.       elseif tok == "TK_OP" and info == ";" then
  2315.         -- ';' operator found, since it is entirely optional, set it
  2316.         -- as a space to let whitespace optimization do the rest
  2317.         settoken("TK_SPACE", " ")
  2318.       end
  2319.       i = i + 1
  2320.     end
  2321.     repack_tokens()
  2322.   end
  2323.   --------------------------------------------------------------------
  2324.   -- processing loop (PASS 1)
  2325.   --------------------------------------------------------------------
  2326.   i = 1
  2327.   while true do
  2328.     tok, info = stoks[i], sinfos[i]
  2329.     ----------------------------------------------------------------
  2330.     local atstart = atlinestart(i)      -- set line begin flag
  2331.     if atstart then prev = nil end
  2332.     ----------------------------------------------------------------
  2333.     if tok == "TK_EOS" then             -- end of stream/pass
  2334.       break
  2335.     ----------------------------------------------------------------
  2336.     elseif tok == "TK_KEYWORD" or       -- keywords, identifiers,
  2337.            tok == "TK_NAME" or          -- operators
  2338.            tok == "TK_OP" then
  2339.       -- TK_KEYWORD and TK_OP can't be optimized without a big
  2340.       -- optimization framework; it would be more of an optimizing
  2341.       -- compiler, not a source code compressor
  2342.       -- TK_NAME that are locals needs parser to analyze/optimize
  2343.       prev = i
  2344.     ----------------------------------------------------------------
  2345.     elseif tok == "TK_NUMBER" then      -- numbers
  2346.       if opt_numbers then
  2347.         do_number(i)  -- optimize
  2348.       end
  2349.       prev = i
  2350.     ----------------------------------------------------------------
  2351.     elseif tok == "TK_STRING" or        -- strings, long strings
  2352.            tok == "TK_LSTRING" then
  2353.       if opt_strings then
  2354.         if tok == "TK_STRING" then
  2355.           do_string(i)  -- optimize
  2356.         else
  2357.           do_lstring(i)  -- optimize
  2358.         end
  2359.       end
  2360.       prev = i
  2361.     ----------------------------------------------------------------
  2362.     elseif tok == "TK_COMMENT" then     -- short comments
  2363.       if opt_comments then
  2364.         if i == 1 and sub(info, 1, 1) == "#" then
  2365.           -- keep shbang comment, trim whitespace
  2366.           do_comment(i)
  2367.         else
  2368.           -- safe to delete, as a TK_EOL (or TK_EOS) always follows
  2369.           settoken()  -- remove entirely
  2370.         end
  2371.       elseif opt_whitespace then        -- trim whitespace only
  2372.         do_comment(i)
  2373.       end
  2374.     ----------------------------------------------------------------
  2375.     elseif tok == "TK_LCOMMENT" then    -- long comments
  2376.       if keep_lcomment(opt_keep, info) then
  2377.         ------------------------------------------------------------
  2378.         -- if --keep, we keep a long comment if <msg> is found;
  2379.         -- this is a feature to keep copyright or license texts
  2380.         if opt_whitespace then          -- trim whitespace only
  2381.           do_lcomment(i)
  2382.         end
  2383.         prev = i
  2384.       elseif opt_comments then
  2385.         local eols = commenteols(info)
  2386.         ------------------------------------------------------------
  2387.         -- prepare opt_emptylines case first, if a disposable token
  2388.         -- follows, current one is safe to dump, else keep a space;
  2389.         -- it is implied that the operation is safe for '-', because
  2390.         -- current is a TK_LCOMMENT, and must be separate from a '-'
  2391.         if is_faketoken[stoks[i + 1]] then
  2392.           settoken()  -- remove entirely
  2393.           tok = ""
  2394.         else
  2395.           settoken("TK_SPACE", " ")
  2396.         end
  2397.         ------------------------------------------------------------
  2398.         -- if there are embedded EOLs to keep and opt_emptylines is
  2399.         -- disabled, then switch the token into one or more EOLs
  2400.         if not opt_emptylines and eols > 0 then
  2401.           settoken("TK_EOL", rep("\n", eols))
  2402.         end
  2403.         ------------------------------------------------------------
  2404.         -- if optimizing whitespaces, force reinterpretation of the
  2405.         -- token to give a chance for the space to be optimized away
  2406.         if opt_whitespace and tok ~= "" then
  2407.           i = i - 1  -- to reinterpret
  2408.         end
  2409.         ------------------------------------------------------------
  2410.       else                              -- disabled case
  2411.         if opt_whitespace then          -- trim whitespace only
  2412.           do_lcomment(i)
  2413.         end
  2414.         prev = i
  2415.       end
  2416.     ----------------------------------------------------------------
  2417.     elseif tok == "TK_EOL" then         -- line endings
  2418.       if atstart and opt_emptylines then
  2419.         settoken()  -- remove entirely
  2420.       elseif info == "\r\n" or info == "\n\r" then
  2421.         -- normalize the rest of the EOLs for CRLF/LFCR only
  2422.         -- (note that TK_LCOMMENT can change into several EOLs)
  2423.         settoken("TK_EOL", "\n")
  2424.       end
  2425.     ----------------------------------------------------------------
  2426.     elseif tok == "TK_SPACE" then       -- whitespace
  2427.       if opt_whitespace then
  2428.         if atstart or atlineend(i) then
  2429.           -- delete leading and trailing whitespace
  2430.           settoken()  -- remove entirely
  2431.         else
  2432.           ------------------------------------------------------------
  2433.           -- at this point, since leading whitespace have been removed,
  2434.           -- there should be a either a real token or a TK_LCOMMENT
  2435.           -- prior to hitting this whitespace; the TK_LCOMMENT case
  2436.           -- only happens if opt_comments is disabled; so prev ~= nil
  2437.           local ptok = stoks[prev]
  2438.           if ptok == "TK_LCOMMENT" then
  2439.             -- previous TK_LCOMMENT can abut with anything
  2440.             settoken()  -- remove entirely
  2441.           else
  2442.             -- prev must be a grammar token; consecutive TK_SPACE
  2443.             -- tokens is impossible when optimizing whitespace
  2444.             local ntok = stoks[i + 1]
  2445.             if is_faketoken[ntok] then
  2446.               -- handle special case where a '-' cannot abut with
  2447.               -- either a short comment or a long comment
  2448.               if (ntok == "TK_COMMENT" or ntok == "TK_LCOMMENT") and
  2449.                  ptok == "TK_OP" and sinfos[prev] == "-" then
  2450.                 -- keep token
  2451.               else
  2452.                 settoken()  -- remove entirely
  2453.               end
  2454.             else--is_realtoken
  2455.               -- check a pair of grammar tokens, if can abut, then
  2456.               -- delete space token entirely, otherwise keep one space
  2457.               local s = checkpair(prev, i + 1)
  2458.               if s == "" then
  2459.                 settoken()  -- remove entirely
  2460.               else
  2461.                 settoken("TK_SPACE", " ")
  2462.               end
  2463.             end
  2464.           end
  2465.           ------------------------------------------------------------
  2466.         end
  2467.       end
  2468.     ----------------------------------------------------------------
  2469.     else
  2470.       error("unidentified token encountered")
  2471.     end
  2472.     ----------------------------------------------------------------
  2473.     i = i + 1
  2474.   end--while
  2475.   repack_tokens()
  2476.   --------------------------------------------------------------------
  2477.   -- processing loop (PASS 2)
  2478.   --------------------------------------------------------------------
  2479.   if opt_eols then
  2480.     i = 1
  2481.     -- aggressive EOL removal only works with most non-grammar tokens
  2482.     -- optimized away because it is a rather simple scheme -- basically
  2483.     -- it just checks 'real' token pairs around EOLs
  2484.     if stoks[1] == "TK_COMMENT" then
  2485.       -- first comment still existing must be shbang, skip whole line
  2486.       i = 3
  2487.     end
  2488.     while true do
  2489.       tok, info = stoks[i], sinfos[i]
  2490.       --------------------------------------------------------------
  2491.       if tok == "TK_EOS" then           -- end of stream/pass
  2492.         break
  2493.       --------------------------------------------------------------
  2494.       elseif tok == "TK_EOL" then       -- consider each TK_EOL
  2495.         local t1, t2 = stoks[i - 1], stoks[i + 1]
  2496.         if is_realtoken[t1] and is_realtoken[t2] then  -- sanity check
  2497.           local s = checkpair(i - 1, i + 1)
  2498.           if s == "" or t2 == "TK_EOS" then
  2499.             settoken()  -- remove entirely
  2500.           end
  2501.         end
  2502.       end--if tok
  2503.       --------------------------------------------------------------
  2504.       i = i + 1
  2505.     end--while
  2506.     repack_tokens()
  2507.   end
  2508.   --------------------------------------------------------------------
  2509.   if opt_details and opt_details > 0 then print() end -- spacing
  2510.   return stoks, sinfos, stoklns
  2511. end
  2512. --end of inserted module
  2513. end
  2514.  
  2515. -- preload function for module optparser
  2516. preload.optparser =
  2517. function()
  2518. --start of inserted module
  2519. module "optparser"
  2520.  
  2521. local string = base.require "string"
  2522. local table = base.require "table"
  2523.  
  2524. ----------------------------------------------------------------------
  2525. -- Letter frequencies for reducing symbol entropy (fixed version)
  2526. -- * Might help a wee bit when the output file is compressed
  2527. -- * See Wikipedia: http://en.wikipedia.org/wiki/Letter_frequencies
  2528. -- * We use letter frequencies according to a Linotype keyboard, plus
  2529. --   the underscore, and both lower case and upper case letters.
  2530. -- * The arrangement below (LC, underscore, %d, UC) is arbitrary.
  2531. -- * This is certainly not optimal, but is quick-and-dirty and the
  2532. --   process has no significant overhead
  2533. ----------------------------------------------------------------------
  2534.  
  2535. local LETTERS = "etaoinshrdlucmfwypvbgkqjxz_ETAOINSHRDLUCMFWYPVBGKQJXZ"
  2536. local ALPHANUM = "etaoinshrdlucmfwypvbgkqjxz_0123456789ETAOINSHRDLUCMFWYPVBGKQJXZ"
  2537.  
  2538. -- names or identifiers that must be skipped
  2539. -- * the first two lines are for keywords
  2540. local SKIP_NAME = {}
  2541. for v in string.gmatch([[
  2542. and break do else elseif end false for function if in
  2543. local nil not or repeat return then true until while
  2544. self]], "%S+") do
  2545.   SKIP_NAME[v] = true
  2546. end
  2547.  
  2548. ------------------------------------------------------------------------
  2549. -- variables and data structures
  2550. ------------------------------------------------------------------------
  2551.  
  2552. local toklist, seminfolist,             -- token lists (lexer output)
  2553.       tokpar, seminfopar, xrefpar,      -- token lists (parser output)
  2554.       globalinfo, localinfo,            -- variable information tables
  2555.       statinfo,                         -- statment type table
  2556.       globaluniq, localuniq,            -- unique name tables
  2557.       var_new,                          -- index of new variable names
  2558.       varlist                           -- list of output variables
  2559.  
  2560. ----------------------------------------------------------------------
  2561. -- preprocess information table to get lists of unique names
  2562. ----------------------------------------------------------------------
  2563.  
  2564. local function preprocess(infotable)
  2565.   local uniqtable = {}
  2566.   for i = 1, #infotable do              -- enumerate info table
  2567.     local obj = infotable[i]
  2568.     local name = obj.name
  2569.     --------------------------------------------------------------------
  2570.     if not uniqtable[name] then         -- not found, start an entry
  2571.       uniqtable[name] = {
  2572.         decl = 0, token = 0, size = 0,
  2573.       }
  2574.     end
  2575.     --------------------------------------------------------------------
  2576.     local uniq = uniqtable[name]        -- count declarations, tokens, size
  2577.     uniq.decl = uniq.decl + 1
  2578.     local xref = obj.xref
  2579.     local xcount = #xref
  2580.     uniq.token = uniq.token + xcount
  2581.     uniq.size = uniq.size + xcount * #name
  2582.     --------------------------------------------------------------------
  2583.     if obj.decl then            -- if local table, create first,last pairs
  2584.       obj.id = i
  2585.       obj.xcount = xcount
  2586.       if xcount > 1 then        -- if ==1, means local never accessed
  2587.         obj.first = xref[2]
  2588.         obj.last = xref[xcount]
  2589.       end
  2590.     --------------------------------------------------------------------
  2591.     else                        -- if global table, add a back ref
  2592.       uniq.id = i
  2593.     end
  2594.     --------------------------------------------------------------------
  2595.   end--for
  2596.   return uniqtable
  2597. end
  2598.  
  2599. ----------------------------------------------------------------------
  2600. -- calculate actual symbol frequencies, in order to reduce entropy
  2601. -- * this may help further reduce the size of compressed sources
  2602. -- * note that since parsing optimizations is put before lexing
  2603. --   optimizations, the frequency table is not exact!
  2604. -- * yes, this will miss --keep block comments too...
  2605. ----------------------------------------------------------------------
  2606.  
  2607. local function recalc_for_entropy(option)
  2608.   local byte = string.byte
  2609.   local char = string.char
  2610.   -- table of token classes to accept in calculating symbol frequency
  2611.   local ACCEPT = {
  2612.     TK_KEYWORD = true, TK_NAME = true, TK_NUMBER = true,
  2613.     TK_STRING = true, TK_LSTRING = true,
  2614.   }
  2615.   if not option["opt-comments"] then
  2616.     ACCEPT.TK_COMMENT = true
  2617.     ACCEPT.TK_LCOMMENT = true
  2618.   end
  2619.   --------------------------------------------------------------------
  2620.   -- create a new table and remove any original locals by filtering
  2621.   --------------------------------------------------------------------
  2622.   local filtered = {}
  2623.   for i = 1, #toklist do
  2624.     filtered[i] = seminfolist[i]
  2625.   end
  2626.   for i = 1, #localinfo do              -- enumerate local info table
  2627.     local obj = localinfo[i]
  2628.     local xref = obj.xref
  2629.     for j = 1, obj.xcount do
  2630.       local p = xref[j]
  2631.       filtered[p] = ""                  -- remove locals
  2632.     end
  2633.   end
  2634.   --------------------------------------------------------------------
  2635.   local freq = {}                       -- reset symbol frequency table
  2636.   for i = 0, 255 do freq[i] = 0 end
  2637.   for i = 1, #toklist do                -- gather symbol frequency
  2638.     local tok, info = toklist[i], filtered[i]
  2639.     if ACCEPT[tok] then
  2640.       for j = 1, #info do
  2641.         local c = byte(info, j)
  2642.         freq[c] = freq[c] + 1
  2643.       end
  2644.     end--if
  2645.   end--for
  2646.   --------------------------------------------------------------------
  2647.   -- function to re-sort symbols according to actual frequencies
  2648.   --------------------------------------------------------------------
  2649.   local function resort(symbols)
  2650.     local symlist = {}
  2651.     for i = 1, #symbols do              -- prepare table to sort
  2652.       local c = byte(symbols, i)
  2653.       symlist[i] = { c = c, freq = freq[c], }
  2654.     end
  2655.     table.sort(symlist,                 -- sort selected symbols
  2656.       function(v1, v2)
  2657.         return v1.freq > v2.freq
  2658.       end
  2659.     )
  2660.     local charlist = {}                 -- reconstitute the string
  2661.     for i = 1, #symlist do
  2662.       charlist[i] = char(symlist[i].c)
  2663.     end
  2664.     return table.concat(charlist)
  2665.   end
  2666.   --------------------------------------------------------------------
  2667.   LETTERS = resort(LETTERS)             -- change letter arrangement
  2668.   ALPHANUM = resort(ALPHANUM)
  2669. end
  2670.  
  2671. ----------------------------------------------------------------------
  2672. -- returns a string containing a new local variable name to use, and
  2673. -- a flag indicating whether it collides with a global variable
  2674. -- * trapping keywords and other names like 'self' is done elsewhere
  2675. ----------------------------------------------------------------------
  2676.  
  2677. local function new_var_name()
  2678.   local var
  2679.   local cletters, calphanum = #LETTERS, #ALPHANUM
  2680.   local v = var_new
  2681.   if v < cletters then                  -- single char
  2682.     v = v + 1
  2683.     var = string.sub(LETTERS, v, v)
  2684.   else                                  -- longer names
  2685.     local range, sz = cletters, 1       -- calculate # chars fit
  2686.     repeat
  2687.       v = v - range
  2688.       range = range * calphanum
  2689.       sz = sz + 1
  2690.     until range > v
  2691.     local n = v % cletters              -- left side cycles faster
  2692.     v = (v - n) / cletters              -- do first char first
  2693.     n = n + 1
  2694.     var = string.sub(LETTERS, n, n)
  2695.     while sz > 1 do
  2696.       local m = v % calphanum
  2697.       v = (v - m) / calphanum
  2698.       m = m + 1
  2699.       var = var..string.sub(ALPHANUM, m, m)
  2700.       sz = sz - 1
  2701.     end
  2702.   end
  2703.   var_new = var_new + 1
  2704.   return var, globaluniq[var] ~= nil
  2705. end
  2706.  
  2707. ----------------------------------------------------------------------
  2708. -- calculate and print some statistics
  2709. -- * probably better in main source, put here for now
  2710. ----------------------------------------------------------------------
  2711.  
  2712. local function stats_summary(globaluniq, localuniq, afteruniq, option)
  2713.   local print = print or base.print
  2714.   local fmt = string.format
  2715.   local opt_details = option.DETAILS
  2716.   if option.QUIET then return end
  2717.   local uniq_g , uniq_li, uniq_lo, uniq_ti, uniq_to,  -- stats needed
  2718.         decl_g, decl_li, decl_lo, decl_ti, decl_to,
  2719.         token_g, token_li, token_lo, token_ti, token_to,
  2720.         size_g, size_li, size_lo, size_ti, size_to
  2721.     = 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
  2722.       0, 0, 0, 0, 0, 0, 0, 0, 0, 0
  2723.   local function avg(c, l)              -- safe average function
  2724.     if c == 0 then return 0 end
  2725.     return l / c
  2726.   end
  2727.   --------------------------------------------------------------------
  2728.   -- collect statistics (note: globals do not have declarations!)
  2729.   --------------------------------------------------------------------
  2730.   for name, uniq in base.pairs(globaluniq) do
  2731.     uniq_g = uniq_g + 1
  2732.     token_g = token_g + uniq.token
  2733.     size_g = size_g + uniq.size
  2734.   end
  2735.   for name, uniq in base.pairs(localuniq) do
  2736.     uniq_li = uniq_li + 1
  2737.     decl_li = decl_li + uniq.decl
  2738.     token_li = token_li + uniq.token
  2739.     size_li = size_li + uniq.size
  2740.   end
  2741.   for name, uniq in base.pairs(afteruniq) do
  2742.     uniq_lo = uniq_lo + 1
  2743.     decl_lo = decl_lo + uniq.decl
  2744.     token_lo = token_lo + uniq.token
  2745.     size_lo = size_lo + uniq.size
  2746.   end
  2747.   uniq_ti = uniq_g + uniq_li
  2748.   decl_ti = decl_g + decl_li
  2749.   token_ti = token_g + token_li
  2750.   size_ti = size_g + size_li
  2751.   uniq_to = uniq_g + uniq_lo
  2752.   decl_to = decl_g + decl_lo
  2753.   token_to = token_g + token_lo
  2754.   size_to = size_g + size_lo
  2755.   --------------------------------------------------------------------
  2756.   -- detailed stats: global list
  2757.   --------------------------------------------------------------------
  2758.   if opt_details then
  2759.     local sorted = {} -- sort table of unique global names by size
  2760.     for name, uniq in base.pairs(globaluniq) do
  2761.       uniq.name = name
  2762.       sorted[#sorted + 1] = uniq
  2763.     end
  2764.     table.sort(sorted,
  2765.       function(v1, v2)
  2766.         return v1.size > v2.size
  2767.       end
  2768.     )
  2769.     local tabf1, tabf2 = "%8s%8s%10s  %s", "%8d%8d%10.2f  %s"
  2770.     local hl = string.rep("-", 44)
  2771.     print("*** global variable list (sorted by size) ***\n"..hl)
  2772.     print(fmt(tabf1, "Token",  "Input", "Input", "Global"))
  2773.     print(fmt(tabf1, "Count", "Bytes", "Average", "Name"))
  2774.     print(hl)
  2775.     for i = 1, #sorted do
  2776.       local uniq = sorted[i]
  2777.       print(fmt(tabf2, uniq.token, uniq.size, avg(uniq.token, uniq.size), uniq.name))
  2778.     end
  2779.     print(hl)
  2780.     print(fmt(tabf2, token_g, size_g, avg(token_g, size_g), "TOTAL"))
  2781.     print(hl.."\n")
  2782.   --------------------------------------------------------------------
  2783.   -- detailed stats: local list
  2784.   --------------------------------------------------------------------
  2785.     local tabf1, tabf2 = "%8s%8s%8s%10s%8s%10s  %s", "%8d%8d%8d%10.2f%8d%10.2f  %s"
  2786.     local hl = string.rep("-", 70)
  2787.     print("*** local variable list (sorted by allocation order) ***\n"..hl)
  2788.     print(fmt(tabf1, "Decl.", "Token",  "Input", "Input", "Output", "Output", "Global"))
  2789.     print(fmt(tabf1, "Count", "Count", "Bytes", "Average", "Bytes", "Average", "Name"))
  2790.     print(hl)
  2791.     for i = 1, #varlist do  -- iterate according to order assigned
  2792.       local name = varlist[i]
  2793.       local uniq = afteruniq[name]
  2794.       local old_t, old_s = 0, 0
  2795.       for j = 1, #localinfo do  -- find corresponding old names and calculate
  2796.         local obj = localinfo[j]
  2797.         if obj.name == name then
  2798.           old_t = old_t + obj.xcount
  2799.           old_s = old_s + obj.xcount * #obj.oldname
  2800.         end
  2801.       end
  2802.       print(fmt(tabf2, uniq.decl, uniq.token, old_s, avg(old_t, old_s),
  2803.                 uniq.size, avg(uniq.token, uniq.size), name))
  2804.     end
  2805.     print(hl)
  2806.     print(fmt(tabf2, decl_lo, token_lo, size_li, avg(token_li, size_li),
  2807.               size_lo, avg(token_lo, size_lo), "TOTAL"))
  2808.     print(hl.."\n")
  2809.   end--if opt_details
  2810.   --------------------------------------------------------------------
  2811.   -- display output
  2812.   --------------------------------------------------------------------
  2813.   local tabf1, tabf2 = "%-16s%8s%8s%8s%8s%10s", "%-16s%8d%8d%8d%8d%10.2f"
  2814.   local hl = string.rep("-", 58)
  2815.   print("*** local variable optimization summary ***\n"..hl)
  2816.   print(fmt(tabf1, "Variable",  "Unique", "Decl.", "Token", "Size", "Average"))
  2817.   print(fmt(tabf1, "Types", "Names", "Count", "Count", "Bytes", "Bytes"))
  2818.   print(hl)
  2819.   print(fmt(tabf2, "Global", uniq_g, decl_g, token_g, size_g, avg(token_g, size_g)))
  2820.   print(hl)
  2821.   print(fmt(tabf2, "Local (in)", uniq_li, decl_li, token_li, size_li, avg(token_li, size_li)))
  2822.   print(fmt(tabf2, "TOTAL (in)", uniq_ti, decl_ti, token_ti, size_ti, avg(token_ti, size_ti)))
  2823.   print(hl)
  2824.   print(fmt(tabf2, "Local (out)", uniq_lo, decl_lo, token_lo, size_lo, avg(token_lo, size_lo)))
  2825.   print(fmt(tabf2, "TOTAL (out)", uniq_to, decl_to, token_to, size_to, avg(token_to, size_to)))
  2826.   print(hl.."\n")
  2827. end
  2828.  
  2829. ----------------------------------------------------------------------
  2830. -- experimental optimization for f("string") statements
  2831. -- * safe to delete parentheses without adding whitespace, as both
  2832. --   kinds of strings can abut with anything else
  2833. ----------------------------------------------------------------------
  2834.  
  2835. local function optimize_func1()
  2836.   ------------------------------------------------------------------
  2837.   local function is_strcall(j)          -- find f("string") pattern
  2838.     local t1 = tokpar[j + 1] or ""
  2839.     local t2 = tokpar[j + 2] or ""
  2840.     local t3 = tokpar[j + 3] or ""
  2841.     if t1 == "(" and t2 == "<string>" and t3 == ")" then
  2842.       return true
  2843.     end
  2844.   end
  2845.   ------------------------------------------------------------------
  2846.   local del_list = {}           -- scan for function pattern,
  2847.   local i = 1                   -- tokens to be deleted are marked
  2848.   while i <= #tokpar do
  2849.     local id = statinfo[i]
  2850.     if id == "call" and is_strcall(i) then  -- found & mark ()
  2851.       del_list[i + 1] = true    -- '('
  2852.       del_list[i + 3] = true    -- ')'
  2853.       i = i + 3
  2854.     end
  2855.     i = i + 1
  2856.   end
  2857.   ------------------------------------------------------------------
  2858.   -- delete a token and adjust all relevant tables
  2859.   -- * currently invalidates globalinfo and localinfo (not updated),
  2860.   --   so any other optimization is done after processing locals
  2861.   --   (of course, we can also lex the source data again...)
  2862.   -- * faster one-pass token deletion
  2863.   ------------------------------------------------------------------
  2864.   local i, dst, idend = 1, 1, #tokpar
  2865.   local del_list2 = {}
  2866.   while dst <= idend do         -- process parser tables
  2867.     if del_list[i] then         -- found a token to delete?
  2868.       del_list2[xrefpar[i]] = true
  2869.       i = i + 1
  2870.     end
  2871.     if i > dst then
  2872.       if i <= idend then        -- shift table items lower
  2873.         tokpar[dst] = tokpar[i]
  2874.         seminfopar[dst] = seminfopar[i]
  2875.         xrefpar[dst] = xrefpar[i] - (i - dst)
  2876.         statinfo[dst] = statinfo[i]
  2877.       else                      -- nil out excess entries
  2878.         tokpar[dst] = nil
  2879.         seminfopar[dst] = nil
  2880.         xrefpar[dst] = nil
  2881.         statinfo[dst] = nil
  2882.       end
  2883.     end
  2884.     i = i + 1
  2885.     dst = dst + 1
  2886.   end
  2887.   local i, dst, idend = 1, 1, #toklist
  2888.   while dst <= idend do         -- process lexer tables
  2889.     if del_list2[i] then        -- found a token to delete?
  2890.       i = i + 1
  2891.     end
  2892.     if i > dst then
  2893.       if i <= idend then        -- shift table items lower
  2894.         toklist[dst] = toklist[i]
  2895.         seminfolist[dst] = seminfolist[i]
  2896.       else                      -- nil out excess entries
  2897.         toklist[dst] = nil
  2898.         seminfolist[dst] = nil
  2899.       end
  2900.     end
  2901.     i = i + 1
  2902.     dst = dst + 1
  2903.   end
  2904. end
  2905.  
  2906. ----------------------------------------------------------------------
  2907. -- local variable optimization
  2908. ----------------------------------------------------------------------
  2909.  
  2910. local function optimize_locals(option)
  2911.   var_new = 0                           -- reset variable name allocator
  2912.   varlist = {}
  2913.   ------------------------------------------------------------------
  2914.   -- preprocess global/local tables, handle entropy reduction
  2915.   ------------------------------------------------------------------
  2916.   globaluniq = preprocess(globalinfo)
  2917.   localuniq = preprocess(localinfo)
  2918.   if option["opt-entropy"] then         -- for entropy improvement
  2919.     recalc_for_entropy(option)
  2920.   end
  2921.   ------------------------------------------------------------------
  2922.   -- build initial declared object table, then sort according to
  2923.   -- token count, this might help assign more tokens to more common
  2924.   -- variable names such as 'e' thus possibly reducing entropy
  2925.   -- * an object knows its localinfo index via its 'id' field
  2926.   -- * special handling for "self" special local (parameter) here
  2927.   ------------------------------------------------------------------
  2928.   local object = {}
  2929.   for i = 1, #localinfo do
  2930.     object[i] = localinfo[i]
  2931.   end
  2932.   table.sort(object,                    -- sort largest first
  2933.     function(v1, v2)
  2934.       return v1.xcount > v2.xcount
  2935.     end
  2936.   )
  2937.   ------------------------------------------------------------------
  2938.   -- the special "self" function parameters must be preserved
  2939.   -- * the allocator below will never use "self", so it is safe to
  2940.   --   keep those implicit declarations as-is
  2941.   ------------------------------------------------------------------
  2942.   local temp, j, gotself = {}, 1, false
  2943.   for i = 1, #object do
  2944.     local obj = object[i]
  2945.     if not obj.isself then
  2946.       temp[j] = obj
  2947.       j = j + 1
  2948.     else
  2949.       gotself = true
  2950.     end
  2951.   end
  2952.   object = temp
  2953.   ------------------------------------------------------------------
  2954.   -- a simple first-come first-served heuristic name allocator,
  2955.   -- note that this is in no way optimal...
  2956.   -- * each object is a local variable declaration plus existence
  2957.   -- * the aim is to assign short names to as many tokens as possible,
  2958.   --   so the following tries to maximize name reuse
  2959.   -- * note that we preserve sort order
  2960.   ------------------------------------------------------------------
  2961.   local nobject = #object
  2962.   while nobject > 0 do
  2963.     local varname, gcollide
  2964.     repeat
  2965.       varname, gcollide = new_var_name()  -- collect a variable name
  2966.     until not SKIP_NAME[varname]          -- skip all special names
  2967.     varlist[#varlist + 1] = varname       -- keep a list
  2968.     local oleft = nobject
  2969.     ------------------------------------------------------------------
  2970.     -- if variable name collides with an existing global, the name
  2971.     -- cannot be used by a local when the name is accessed as a global
  2972.     -- during which the local is alive (between 'act' to 'rem'), so
  2973.     -- we drop objects that collides with the corresponding global
  2974.     ------------------------------------------------------------------
  2975.     if gcollide then
  2976.       -- find the xref table of the global
  2977.       local gref = globalinfo[globaluniq[varname].id].xref
  2978.       local ngref = #gref
  2979.       -- enumerate for all current objects; all are valid at this point
  2980.       for i = 1, nobject do
  2981.         local obj = object[i]
  2982.         local act, rem = obj.act, obj.rem  -- 'live' range of local
  2983.         -- if rem < 0, it is a -id to a local that had the same name
  2984.         -- so follow rem to extend it; does this make sense?
  2985.         while rem < 0 do
  2986.           rem = localinfo[-rem].rem
  2987.         end
  2988.         local drop
  2989.         for j = 1, ngref do
  2990.           local p = gref[j]
  2991.           if p >= act and p <= rem then drop = true end  -- in range?
  2992.         end
  2993.         if drop then
  2994.           obj.skip = true
  2995.           oleft = oleft - 1
  2996.         end
  2997.       end--for
  2998.     end--if gcollide
  2999.     ------------------------------------------------------------------
  3000.     -- now the first unassigned local (since it's sorted) will be the
  3001.     -- one with the most tokens to rename, so we set this one and then
  3002.     -- eliminate all others that collides, then any locals that left
  3003.     -- can then reuse the same variable name; this is repeated until
  3004.     -- all local declaration that can use this name is assigned
  3005.     -- * the criteria for local-local reuse/collision is:
  3006.     --   A is the local with a name already assigned
  3007.     --   B is the unassigned local under consideration
  3008.     --   => anytime A is accessed, it cannot be when B is 'live'
  3009.     --   => to speed up things, we have first/last accesses noted
  3010.     ------------------------------------------------------------------
  3011.     while oleft > 0 do
  3012.       local i = 1
  3013.       while object[i].skip do  -- scan for first object
  3014.         i = i + 1
  3015.       end
  3016.       ------------------------------------------------------------------
  3017.       -- first object is free for assignment of the variable name
  3018.       -- [first,last] gives the access range for collision checking
  3019.       ------------------------------------------------------------------
  3020.       oleft = oleft - 1
  3021.       local obja = object[i]
  3022.       i = i + 1
  3023.       obja.newname = varname
  3024.       obja.skip = true
  3025.       obja.done = true
  3026.       local first, last = obja.first, obja.last
  3027.       local xref = obja.xref
  3028.       ------------------------------------------------------------------
  3029.       -- then, scan all the rest and drop those colliding
  3030.       -- if A was never accessed then it'll never collide with anything
  3031.       -- otherwise trivial skip if:
  3032.       -- * B was activated after A's last access (last < act)
  3033.       -- * B was removed before A's first access (first > rem)
  3034.       -- if not, see detailed skip below...
  3035.       ------------------------------------------------------------------
  3036.       if first and oleft > 0 then  -- must have at least 1 access
  3037.         local scanleft = oleft
  3038.         while scanleft > 0 do
  3039.           while object[i].skip do  -- next valid object
  3040.             i = i + 1
  3041.           end
  3042.           scanleft = scanleft - 1
  3043.           local objb = object[i]
  3044.           i = i + 1
  3045.           local act, rem = objb.act, objb.rem  -- live range of B
  3046.           -- if rem < 0, extend range of rem thru' following local
  3047.           while rem < 0 do
  3048.             rem = localinfo[-rem].rem
  3049.           end
  3050.           --------------------------------------------------------
  3051.           if not(last < act or first > rem) then  -- possible collision
  3052.             --------------------------------------------------------
  3053.             -- B is activated later than A or at the same statement,
  3054.             -- this means for no collision, A cannot be accessed when B
  3055.             -- is alive, since B overrides A (or is a peer)
  3056.             --------------------------------------------------------
  3057.             if act >= obja.act then
  3058.               for j = 1, obja.xcount do  -- ... then check every access
  3059.                 local p = xref[j]
  3060.                 if p >= act and p <= rem then  -- A accessed when B live!
  3061.                   oleft = oleft - 1
  3062.                   objb.skip = true
  3063.                   break
  3064.                 end
  3065.               end--for
  3066.             --------------------------------------------------------
  3067.             -- A is activated later than B, this means for no collision,
  3068.             -- A's access is okay since it overrides B, but B's last
  3069.             -- access need to be earlier than A's activation time
  3070.             --------------------------------------------------------
  3071.             else
  3072.               if objb.last and objb.last >= obja.act then
  3073.                 oleft = oleft - 1
  3074.                 objb.skip = true
  3075.               end
  3076.             end
  3077.           end
  3078.           --------------------------------------------------------
  3079.           if oleft == 0 then break end
  3080.         end
  3081.       end--if first
  3082.       ------------------------------------------------------------------
  3083.     end--while
  3084.     ------------------------------------------------------------------
  3085.     -- after assigning all possible locals to one variable name, the
  3086.     -- unassigned locals/objects have the skip field reset and the table
  3087.     -- is compacted, to hopefully reduce iteration time
  3088.     ------------------------------------------------------------------
  3089.     local temp, j = {}, 1
  3090.     for i = 1, nobject do
  3091.       local obj = object[i]
  3092.       if not obj.done then
  3093.         obj.skip = false
  3094.         temp[j] = obj
  3095.         j = j + 1
  3096.       end
  3097.     end
  3098.     object = temp  -- new compacted object table
  3099.     nobject = #object  -- objects left to process
  3100.     ------------------------------------------------------------------
  3101.   end--while
  3102.   ------------------------------------------------------------------
  3103.   -- after assigning all locals with new variable names, we can
  3104.   -- patch in the new names, and reprocess to get 'after' stats
  3105.   ------------------------------------------------------------------
  3106.   for i = 1, #localinfo do  -- enumerate all locals
  3107.     local obj = localinfo[i]
  3108.     local xref = obj.xref
  3109.     if obj.newname then                 -- if got new name, patch it in
  3110.       for j = 1, obj.xcount do
  3111.         local p = xref[j]               -- xrefs indexes the token list
  3112.         seminfolist[p] = obj.newname
  3113.       end
  3114.       obj.name, obj.oldname             -- adjust names
  3115.         = obj.newname, obj.name
  3116.     else
  3117.       obj.oldname = obj.name            -- for cases like 'self'
  3118.     end
  3119.   end
  3120.   ------------------------------------------------------------------
  3121.   -- deal with statistics output
  3122.   ------------------------------------------------------------------
  3123.   if gotself then  -- add 'self' to end of list
  3124.     varlist[#varlist + 1] = "self"
  3125.   end
  3126.   local afteruniq = preprocess(localinfo)
  3127.   stats_summary(globaluniq, localuniq, afteruniq, option)
  3128. end
  3129.  
  3130.  
  3131. ----------------------------------------------------------------------
  3132. -- main entry point
  3133. ----------------------------------------------------------------------
  3134.  
  3135. function optimize(option, _toklist, _seminfolist, xinfo)
  3136.   -- set tables
  3137.   toklist, seminfolist                  -- from lexer
  3138.     = _toklist, _seminfolist
  3139.   tokpar, seminfopar, xrefpar           -- from parser
  3140.     = xinfo.toklist, xinfo.seminfolist, xinfo.xreflist
  3141.   globalinfo, localinfo, statinfo       -- from parser
  3142.     = xinfo.globalinfo, xinfo.localinfo, xinfo.statinfo
  3143.   ------------------------------------------------------------------
  3144.   -- optimize locals
  3145.   ------------------------------------------------------------------
  3146.   if option["opt-locals"] then
  3147.     optimize_locals(option)
  3148.   end
  3149.   ------------------------------------------------------------------
  3150.   -- other optimizations
  3151.   ------------------------------------------------------------------
  3152.   if option["opt-experimental"] then    -- experimental
  3153.     optimize_func1()
  3154.     -- WARNING globalinfo and localinfo now invalidated!
  3155.   end
  3156. end
  3157. --end of inserted module
  3158. end
  3159.  
  3160. -- preload function for module equiv
  3161. preload.equiv =
  3162. function()
  3163. --start of inserted module
  3164. module "equiv"
  3165.  
  3166. local string = base.require "string"
  3167. local loadstring = base.loadstring
  3168. local sub = string.sub
  3169. local match = string.match
  3170. local dump = string.dump
  3171. local byte = string.byte
  3172.  
  3173. --[[--------------------------------------------------------------------
  3174. -- variable and data initialization
  3175. ----------------------------------------------------------------------]]
  3176.  
  3177. local is_realtoken = {          -- significant (grammar) tokens
  3178.   TK_KEYWORD = true,
  3179.   TK_NAME = true,
  3180.   TK_NUMBER = true,
  3181.   TK_STRING = true,
  3182.   TK_LSTRING = true,
  3183.   TK_OP = true,
  3184.   TK_EOS = true,
  3185. }
  3186.  
  3187. local option, llex, warn
  3188.  
  3189. --[[--------------------------------------------------------------------
  3190. -- functions
  3191. ----------------------------------------------------------------------]]
  3192.  
  3193. ------------------------------------------------------------------------
  3194. -- initialization function
  3195. ------------------------------------------------------------------------
  3196.  
  3197. function init(_option, _llex, _warn)
  3198.   option = _option
  3199.   llex = _llex
  3200.   warn = _warn
  3201. end
  3202.  
  3203. ------------------------------------------------------------------------
  3204. -- function to build lists containing a 'normal' lexer stream
  3205. ------------------------------------------------------------------------
  3206.  
  3207. local function build_stream(s)
  3208.   llex.init(s)
  3209.   llex.llex()
  3210.   local stok, sseminfo -- source list (with whitespace elements)
  3211.     = llex.tok, llex.seminfo
  3212.   local tok, seminfo   -- processed list (real elements only)
  3213.     = {}, {}
  3214.   for i = 1, #stok do
  3215.     local t = stok[i]
  3216.     if is_realtoken[t] then
  3217.       tok[#tok + 1] = t
  3218.       seminfo[#seminfo + 1] = sseminfo[i]
  3219.     end
  3220.   end--for
  3221.   return tok, seminfo
  3222. end
  3223.  
  3224. ------------------------------------------------------------------------
  3225. -- test source (lexer stream) equivalence
  3226. ------------------------------------------------------------------------
  3227.  
  3228. function source(z, dat)
  3229.   --------------------------------------------------------------------
  3230.   -- function to return a dumped string for seminfo compares
  3231.   --------------------------------------------------------------------
  3232.   local function dumpsem(s)
  3233.     local sf = loadstring("return "..s, "z")
  3234.     if sf then
  3235.       return dump(sf)
  3236.     end
  3237.   end
  3238.   --------------------------------------------------------------------
  3239.   -- mark and optionally report non-equivalence
  3240.   --------------------------------------------------------------------
  3241.   local function bork(msg)
  3242.     if option.DETAILS then base.print("SRCEQUIV: "..msg) end
  3243.     warn.SRC_EQUIV = true
  3244.   end
  3245.   --------------------------------------------------------------------
  3246.   -- get lexer streams for both source strings, compare
  3247.   --------------------------------------------------------------------
  3248.   local tok1, seminfo1 = build_stream(z)        -- original
  3249.   local tok2, seminfo2 = build_stream(dat)      -- compressed
  3250.   --------------------------------------------------------------------
  3251.   -- compare shbang lines ignoring EOL
  3252.   --------------------------------------------------------------------
  3253.   local sh1 = match(z, "^(#[^\r\n]*)")
  3254.   local sh2 = match(dat, "^(#[^\r\n]*)")
  3255.   if sh1 or sh2 then
  3256.     if not sh1 or not sh2 or sh1 ~= sh2 then
  3257.       bork("shbang lines different")
  3258.     end
  3259.   end
  3260.   --------------------------------------------------------------------
  3261.   -- compare by simple count
  3262.   --------------------------------------------------------------------
  3263.   if #tok1 ~= #tok2 then
  3264.     bork("count "..#tok1.." "..#tok2)
  3265.     return
  3266.   end
  3267.   --------------------------------------------------------------------
  3268.   -- compare each element the best we can
  3269.   --------------------------------------------------------------------
  3270.   for i = 1, #tok1 do
  3271.     local t1, t2 = tok1[i], tok2[i]
  3272.     local s1, s2 = seminfo1[i], seminfo2[i]
  3273.     if t1 ~= t2 then  -- by type
  3274.       bork("type ["..i.."] "..t1.." "..t2)
  3275.       break
  3276.     end
  3277.     if t1 == "TK_KEYWORD" or t1 == "TK_NAME" or t1 == "TK_OP" then
  3278.       if t1 == "TK_NAME" and option["opt-locals"] then
  3279.         -- can't compare identifiers of locals that are optimized
  3280.       elseif s1 ~= s2 then  -- by semantic info (simple)
  3281.         bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2)
  3282.         break
  3283.       end
  3284.     elseif t1 == "TK_EOS" then
  3285.       -- no seminfo to compare
  3286.     else-- "TK_NUMBER" or "TK_STRING" or "TK_LSTRING"
  3287.       -- compare 'binary' form, so dump a function
  3288.       local s1b,s2b = dumpsem(s1), dumpsem(s2)
  3289.       if not s1b or not s2b or s1b ~= s2b then
  3290.         bork("seminfo ["..i.."] "..t1.." "..s1.." "..s2)
  3291.         break
  3292.       end
  3293.     end
  3294.   end--for
  3295.   --------------------------------------------------------------------
  3296.   -- successful comparison if end is reached with no borks
  3297.   --------------------------------------------------------------------
  3298. end
  3299.  
  3300. ------------------------------------------------------------------------
  3301. -- test binary chunk equivalence
  3302. ------------------------------------------------------------------------
  3303.  
  3304. function binary(z, dat)
  3305.   local TNIL     = 0
  3306.   local TBOOLEAN = 1
  3307.   local TNUMBER  = 3
  3308.   local TSTRING  = 4
  3309.   --------------------------------------------------------------------
  3310.   -- mark and optionally report non-equivalence
  3311.   --------------------------------------------------------------------
  3312.   local function bork(msg)
  3313.     if option.DETAILS then base.print("BINEQUIV: "..msg) end
  3314.     warn.BIN_EQUIV = true
  3315.   end
  3316.   --------------------------------------------------------------------
  3317.   -- function to remove shbang line so that loadstring runs
  3318.   --------------------------------------------------------------------
  3319.   local function zap_shbang(s)
  3320.     local shbang = match(s, "^(#[^\r\n]*\r?\n?)")
  3321.     if shbang then                      -- cut out shbang
  3322.       s = sub(s, #shbang + 1)
  3323.     end
  3324.     return s
  3325.   end
  3326.   --------------------------------------------------------------------
  3327.   -- attempt to compile, then dump to get binary chunk string
  3328.   --------------------------------------------------------------------
  3329.   local cz = loadstring(zap_shbang(z), "z")
  3330.   if not cz then
  3331.     bork("failed to compile original sources for binary chunk comparison")
  3332.     return
  3333.   end
  3334.   local cdat = loadstring(zap_shbang(dat), "z")
  3335.   if not cdat then
  3336.     bork("failed to compile compressed result for binary chunk comparison")
  3337.   end
  3338.   -- if loadstring() works, dump assuming string.dump() is error-free
  3339.   local c1 = { i = 1, dat = dump(cz) }
  3340.   c1.len = #c1.dat
  3341.   local c2 = { i = 1, dat = dump(cdat) }
  3342.   c2.len = #c2.dat
  3343.   --------------------------------------------------------------------
  3344.   -- support functions to handle binary chunk reading
  3345.   --------------------------------------------------------------------
  3346.   local endian,
  3347.         sz_int, sz_sizet,               -- sizes of data types
  3348.         sz_inst, sz_number,
  3349.         getint, getsizet
  3350.   --------------------------------------------------------------------
  3351.   local function ensure(c, sz)          -- check if bytes exist
  3352.     if c.i + sz - 1 > c.len then return end
  3353.     return true
  3354.   end
  3355.   --------------------------------------------------------------------
  3356.   local function skip(c, sz)            -- skip some bytes
  3357.     if not sz then sz = 1 end
  3358.     c.i = c.i + sz
  3359.   end
  3360.   --------------------------------------------------------------------
  3361.   local function getbyte(c)             -- return a byte value
  3362.     local i = c.i
  3363.     if i > c.len then return end
  3364.     local d = sub(c.dat, i, i)
  3365.     c.i = i + 1
  3366.     return byte(d)
  3367.   end
  3368.   --------------------------------------------------------------------
  3369.   local function getint_l(c)            -- return an int value (little-endian)
  3370.     local n, scale = 0, 1
  3371.     if not ensure(c, sz_int) then return end
  3372.     for j = 1, sz_int do
  3373.       n = n + scale * getbyte(c)
  3374.       scale = scale * 256
  3375.     end
  3376.     return n
  3377.   end
  3378.   --------------------------------------------------------------------
  3379.   local function getint_b(c)            -- return an int value (big-endian)
  3380.     local n = 0
  3381.     if not ensure(c, sz_int) then return end
  3382.     for j = 1, sz_int do
  3383.       n = n * 256 + getbyte(c)
  3384.     end
  3385.     return n
  3386.   end
  3387.   --------------------------------------------------------------------
  3388.   local function getsizet_l(c)          -- return a size_t value (little-endian)
  3389.     local n, scale = 0, 1
  3390.     if not ensure(c, sz_sizet) then return end
  3391.     for j = 1, sz_sizet do
  3392.       n = n + scale * getbyte(c)
  3393.       scale = scale * 256
  3394.     end
  3395.     return n
  3396.   end
  3397.   --------------------------------------------------------------------
  3398.   local function getsizet_b(c)          -- return a size_t value (big-endian)
  3399.     local n = 0
  3400.     if not ensure(c, sz_sizet) then return end
  3401.     for j = 1, sz_sizet do
  3402.       n = n * 256 + getbyte(c)
  3403.     end
  3404.     return n
  3405.   end
  3406.   --------------------------------------------------------------------
  3407.   local function getblock(c, sz)        -- return a block (as a string)
  3408.     local i = c.i
  3409.     local j = i + sz - 1
  3410.     if j > c.len then return end
  3411.     local d = sub(c.dat, i, j)
  3412.     c.i = i + sz
  3413.     return d
  3414.   end
  3415.   --------------------------------------------------------------------
  3416.   local function getstring(c)           -- return a string
  3417.     local n = getsizet(c)
  3418.     if not n then return end
  3419.     if n == 0 then return "" end
  3420.     return getblock(c, n)
  3421.   end
  3422.   --------------------------------------------------------------------
  3423.   local function goodbyte(c1, c2)       -- compare byte value
  3424.     local b1, b2 = getbyte(c1), getbyte(c2)
  3425.     if not b1 or not b2 or b1 ~= b2 then
  3426.       return
  3427.     end
  3428.     return b1
  3429.   end
  3430.   --------------------------------------------------------------------
  3431.   local function badbyte(c1, c2)        -- compare byte value
  3432.     local b = goodbyte(c1, c2)
  3433.     if not b then return true end
  3434.   end
  3435.   --------------------------------------------------------------------
  3436.   local function goodint(c1, c2)        -- compare int value
  3437.     local i1, i2 = getint(c1), getint(c2)
  3438.     if not i1 or not i2 or i1 ~= i2 then
  3439.       return
  3440.     end
  3441.     return i1
  3442.   end
  3443.   --------------------------------------------------------------------
  3444.   -- recursively-called function to compare function prototypes
  3445.   --------------------------------------------------------------------
  3446.   local function getfunc(c1, c2)
  3447.     -- source name (ignored)
  3448.     if not getstring(c1) or not getstring(c2) then
  3449.       bork("bad source name"); return
  3450.     end
  3451.     -- linedefined (ignored)
  3452.     if not getint(c1) or not getint(c2) then
  3453.       bork("bad linedefined"); return
  3454.     end
  3455.     -- lastlinedefined (ignored)
  3456.     if not getint(c1) or not getint(c2) then
  3457.       bork("bad lastlinedefined"); return
  3458.     end
  3459.     if not (ensure(c1, 4) and ensure(c2, 4)) then
  3460.       bork("prototype header broken")
  3461.     end
  3462.     -- nups (compared)
  3463.     if badbyte(c1, c2) then
  3464.       bork("bad nups"); return
  3465.     end
  3466.     -- numparams (compared)
  3467.     if badbyte(c1, c2) then
  3468.       bork("bad numparams"); return
  3469.     end
  3470.     -- is_vararg (compared)
  3471.     if badbyte(c1, c2) then
  3472.       bork("bad is_vararg"); return
  3473.     end
  3474.     -- maxstacksize (compared)
  3475.     if badbyte(c1, c2) then
  3476.       bork("bad maxstacksize"); return
  3477.     end
  3478.     -- code (compared)
  3479.     local ncode = goodint(c1, c2)
  3480.     if not ncode then
  3481.       bork("bad ncode"); return
  3482.     end
  3483.     local code1 = getblock(c1, ncode * sz_inst)
  3484.     local code2 = getblock(c2, ncode * sz_inst)
  3485.     if not code1 or not code2 or code1 ~= code2 then
  3486.       bork("bad code block"); return
  3487.     end
  3488.     -- constants (compared)
  3489.     local nconst = goodint(c1, c2)
  3490.     if not nconst then
  3491.       bork("bad nconst"); return
  3492.     end
  3493.     for i = 1, nconst do
  3494.       local ctype = goodbyte(c1, c2)
  3495.       if not ctype then
  3496.         bork("bad const type"); return
  3497.       end
  3498.       if ctype == TBOOLEAN then
  3499.         if badbyte(c1, c2) then
  3500.           bork("bad boolean value"); return
  3501.         end
  3502.       elseif ctype == TNUMBER then
  3503.         local num1 = getblock(c1, sz_number)
  3504.         local num2 = getblock(c2, sz_number)
  3505.         if not num1 or not num2 or num1 ~= num2 then
  3506.           bork("bad number value"); return
  3507.         end
  3508.       elseif ctype == TSTRING then
  3509.         local str1 = getstring(c1)
  3510.         local str2 = getstring(c2)
  3511.         if not str1 or not str2 or str1 ~= str2 then
  3512.           bork("bad string value"); return
  3513.         end
  3514.       end
  3515.     end
  3516.     -- prototypes (compared recursively)
  3517.     local nproto = goodint(c1, c2)
  3518.     if not nproto then
  3519.       bork("bad nproto"); return
  3520.     end
  3521.     for i = 1, nproto do
  3522.       if not getfunc(c1, c2) then
  3523.         bork("bad function prototype"); return
  3524.       end
  3525.     end
  3526.     -- debug information (ignored)
  3527.     -- lineinfo (ignored)
  3528.     local sizelineinfo1 = getint(c1)
  3529.     if not sizelineinfo1 then
  3530.       bork("bad sizelineinfo1"); return
  3531.     end
  3532.     local sizelineinfo2 = getint(c2)
  3533.     if not sizelineinfo2 then
  3534.       bork("bad sizelineinfo2"); return
  3535.     end
  3536.     if not getblock(c1, sizelineinfo1 * sz_int) then
  3537.       bork("bad lineinfo1"); return
  3538.     end
  3539.     if not getblock(c2, sizelineinfo2 * sz_int) then
  3540.       bork("bad lineinfo2"); return
  3541.     end
  3542.     -- locvars (ignored)
  3543.     local sizelocvars1 = getint(c1)
  3544.     if not sizelocvars1 then
  3545.       bork("bad sizelocvars1"); return
  3546.     end
  3547.     local sizelocvars2 = getint(c2)
  3548.     if not sizelocvars2 then
  3549.       bork("bad sizelocvars2"); return
  3550.     end
  3551.     for i = 1, sizelocvars1 do
  3552.       if not getstring(c1) or not getint(c1) or not getint(c1) then
  3553.         bork("bad locvars1"); return
  3554.       end
  3555.     end
  3556.     for i = 1, sizelocvars2 do
  3557.       if not getstring(c2) or not getint(c2) or not getint(c2) then
  3558.         bork("bad locvars2"); return
  3559.       end
  3560.     end
  3561.     -- upvalues (ignored)
  3562.     local sizeupvalues1 = getint(c1)
  3563.     if not sizeupvalues1 then
  3564.       bork("bad sizeupvalues1"); return
  3565.     end
  3566.     local sizeupvalues2 = getint(c2)
  3567.     if not sizeupvalues2 then
  3568.       bork("bad sizeupvalues2"); return
  3569.     end
  3570.     for i = 1, sizeupvalues1 do
  3571.       if not getstring(c1) then bork("bad upvalues1"); return end
  3572.     end
  3573.     for i = 1, sizeupvalues2 do
  3574.       if not getstring(c2) then bork("bad upvalues2"); return end
  3575.     end
  3576.     return true
  3577.   end
  3578.   --------------------------------------------------------------------
  3579.   -- parse binary chunks to verify equivalence
  3580.   -- * for headers, handle sizes to allow a degree of flexibility
  3581.   -- * assume a valid binary chunk is generated, since it was not
  3582.   --   generated via external means
  3583.   --------------------------------------------------------------------
  3584.   if not (ensure(c1, 12) and ensure(c2, 12)) then
  3585.     bork("header broken")
  3586.   end
  3587.   skip(c1, 6)                   -- skip signature(4), version, format
  3588.   endian    = getbyte(c1)       -- 1 = little endian
  3589.   sz_int    = getbyte(c1)       -- get data type sizes
  3590.   sz_sizet  = getbyte(c1)
  3591.   sz_inst   = getbyte(c1)
  3592.   sz_number = getbyte(c1)
  3593.   skip(c1)                      -- skip integral flag
  3594.   skip(c2, 12)                  -- skip other header (assume similar)
  3595.   if endian == 1 then           -- set for endian sensitive data we need
  3596.     getint   = getint_l
  3597.     getsizet = getsizet_l
  3598.   else
  3599.     getint   = getint_b
  3600.     getsizet = getsizet_b
  3601.   end
  3602.   getfunc(c1, c2)               -- get prototype at root
  3603.   if c1.i ~= c1.len + 1 then
  3604.     bork("inconsistent binary chunk1"); return
  3605.   elseif c2.i ~= c2.len + 1 then
  3606.     bork("inconsistent binary chunk2"); return
  3607.   end
  3608.   --------------------------------------------------------------------
  3609.   -- successful comparison if end is reached with no borks
  3610.   --------------------------------------------------------------------
  3611. end
  3612. --end of inserted module
  3613. end
  3614.  
  3615. -- preload function for module plugin/html
  3616. preload["plugin/html"] =
  3617. function()
  3618. --start of inserted module
  3619. module "plugin/html"
  3620.  
  3621. local string = base.require "string"
  3622. local table = base.require "table"
  3623. local io = base.require "io"
  3624.  
  3625. ------------------------------------------------------------------------
  3626. -- constants and configuration
  3627. ------------------------------------------------------------------------
  3628.  
  3629. local HTML_EXT = ".html"
  3630. local ENTITIES = {
  3631.   ["&"] = "&amp;", ["<"] = "&lt;", [">"] = "&gt;",
  3632.   ["'"] = "&apos;", ["\""] = "&quot;",
  3633. }
  3634.  
  3635. -- simple headers and footers
  3636. local HEADER = [[
  3637. <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
  3638. <html>
  3639. <head>
  3640. <title>%s</title>
  3641. <meta name="Generator" content="LuaSrcDiet">
  3642. <style type="text/css">
  3643. %s</style>
  3644. </head>
  3645. <body>
  3646. <pre class="code">
  3647. ]]
  3648. local FOOTER = [[
  3649. </pre>
  3650. </body>
  3651. </html>
  3652. ]]
  3653. -- for more, please see wikimain.css from the Lua wiki site
  3654. local STYLESHEET = [[
  3655. BODY {
  3656.     background: white;
  3657.     color: navy;
  3658. }
  3659. pre.code { color: black; }
  3660. span.comment { color: #00a000; }
  3661. span.string  { color: #009090; }
  3662. span.keyword { color: black; font-weight: bold; }
  3663. span.number { color: #993399; }
  3664. span.operator { }
  3665. span.name { }
  3666. span.global { color: #ff0000; font-weight: bold; }
  3667. span.local { color: #0000ff; font-weight: bold; }
  3668. ]]
  3669.  
  3670. ------------------------------------------------------------------------
  3671. -- option handling, plays nice with --quiet option
  3672. ------------------------------------------------------------------------
  3673.  
  3674. local option                    -- local reference to list of options
  3675. local srcfl, destfl             -- filenames
  3676. local toklist, seminfolist, toklnlist  -- token data
  3677.  
  3678. local function print(...)               -- handle quiet option
  3679.   if option.QUIET then return end
  3680.   base.print(...)
  3681. end
  3682.  
  3683. ------------------------------------------------------------------------
  3684. -- initialization
  3685. ------------------------------------------------------------------------
  3686.  
  3687. function init(_option, _srcfl, _destfl)
  3688.   option = _option
  3689.   srcfl = _srcfl
  3690.   local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$")
  3691.   local basename, extension = srcfl, ""
  3692.   if extb and extb > 1 then
  3693.     basename = string.sub(srcfl, 1, extb - 1)
  3694.     extension = string.sub(srcfl, extb, exte)
  3695.   end
  3696.   destfl = basename..HTML_EXT
  3697.   if option.OUTPUT_FILE then
  3698.     destfl = option.OUTPUT_FILE
  3699.   end
  3700.   if srcfl == destfl then
  3701.     base.error("output filename identical to input filename")
  3702.   end
  3703. end
  3704.  
  3705. ------------------------------------------------------------------------
  3706. -- message display, post-load processing
  3707. ------------------------------------------------------------------------
  3708.  
  3709. function post_load(z)
  3710.   print([[
  3711. HTML plugin module for LuaSrcDiet
  3712. ]])
  3713.   print("Exporting: "..srcfl.." -> "..destfl.."\n")
  3714. end
  3715.  
  3716. ------------------------------------------------------------------------
  3717. -- post-lexing processing, can work on lexer table output
  3718. ------------------------------------------------------------------------
  3719.  
  3720. function post_lex(_toklist, _seminfolist, _toklnlist)
  3721.   toklist, seminfolist, toklnlist
  3722.     = _toklist, _seminfolist, _toklnlist
  3723. end
  3724.  
  3725. ------------------------------------------------------------------------
  3726. -- escape the usual suspects for HTML/XML
  3727. ------------------------------------------------------------------------
  3728.  
  3729. local function do_entities(z)
  3730.   local i = 1
  3731.   while i <= #z do
  3732.     local c = string.sub(z, i, i)
  3733.     local d = ENTITIES[c]
  3734.     if d then
  3735.       c = d
  3736.       z = string.sub(z, 1, i - 1)..c..string.sub(z, i + 1)
  3737.     end
  3738.     i = i + #c
  3739.   end--while
  3740.   return z
  3741. end
  3742.  
  3743. ------------------------------------------------------------------------
  3744. -- save source code to file
  3745. ------------------------------------------------------------------------
  3746.  
  3747. local function save_file(fname, dat)
  3748.   local OUTF = io.open(fname, "wb")
  3749.   if not OUTF then base.error("cannot open \""..fname.."\" for writing") end
  3750.   local status = OUTF:write(dat)
  3751.   if not status then base.error("cannot write to \""..fname.."\"") end
  3752.   OUTF:close()
  3753. end
  3754.  
  3755. ------------------------------------------------------------------------
  3756. -- post-parsing processing, gives globalinfo, localinfo
  3757. ------------------------------------------------------------------------
  3758.  
  3759. function post_parse(globalinfo, localinfo)
  3760.   local html = {}
  3761.   local function add(s)         -- html helpers
  3762.     html[#html + 1] = s
  3763.   end
  3764.   local function span(class, s)
  3765.     add('<span class="'..class..'">'..s..'</span>')
  3766.   end
  3767.   ----------------------------------------------------------------------
  3768.   for i = 1, #globalinfo do     -- mark global identifiers as TK_GLOBAL
  3769.     local obj = globalinfo[i]
  3770.     local xref = obj.xref
  3771.     for j = 1, #xref do
  3772.       local p = xref[j]
  3773.       toklist[p] = "TK_GLOBAL"
  3774.     end
  3775.   end--for
  3776.   ----------------------------------------------------------------------
  3777.   for i = 1, #localinfo do      -- mark local identifiers as TK_LOCAL
  3778.     local obj = localinfo[i]
  3779.     local xref = obj.xref
  3780.     for j = 1, #xref do
  3781.       local p = xref[j]
  3782.       toklist[p] = "TK_LOCAL"
  3783.     end
  3784.   end--for
  3785.   ----------------------------------------------------------------------
  3786.   add(string.format(HEADER,     -- header and leading stuff
  3787.     do_entities(srcfl),
  3788.     STYLESHEET))
  3789.   for i = 1, #toklist do        -- enumerate token list
  3790.     local tok, info = toklist[i], seminfolist[i]
  3791.     if tok == "TK_KEYWORD" then
  3792.       span("keyword", info)
  3793.     elseif tok == "TK_STRING" or tok == "TK_LSTRING" then
  3794.       span("string", do_entities(info))
  3795.     elseif tok == "TK_COMMENT" or tok == "TK_LCOMMENT" then
  3796.       span("comment", do_entities(info))
  3797.     elseif tok == "TK_GLOBAL" then
  3798.       span("global", info)
  3799.     elseif tok == "TK_LOCAL" then
  3800.       span("local", info)
  3801.     elseif tok == "TK_NAME" then
  3802.       span("name", info)
  3803.     elseif tok == "TK_NUMBER" then
  3804.       span("number", info)
  3805.     elseif tok == "TK_OP" then
  3806.       span("operator", do_entities(info))
  3807.     elseif tok ~= "TK_EOS" then  -- TK_EOL, TK_SPACE
  3808.       add(info)
  3809.     end
  3810.   end--for
  3811.   add(FOOTER)
  3812.   save_file(destfl, table.concat(html))
  3813.   option.EXIT = true
  3814. end
  3815. --end of inserted module
  3816. end
  3817.  
  3818. -- preload function for module plugin/sloc
  3819. preload["plugin/sloc"] =
  3820. function()
  3821. --start of inserted module
  3822. module "plugin/sloc"
  3823.  
  3824. local string = base.require "string"
  3825. local table = base.require "table"
  3826.  
  3827. ------------------------------------------------------------------------
  3828. -- initialization
  3829. ------------------------------------------------------------------------
  3830.  
  3831. local option                    -- local reference to list of options
  3832. local srcfl                     -- source file name
  3833.  
  3834. function init(_option, _srcfl, _destfl)
  3835.   option = _option
  3836.   option.QUIET = true
  3837.   srcfl = _srcfl
  3838. end
  3839.  
  3840. ------------------------------------------------------------------------
  3841. -- splits a block into a table of lines (minus EOLs)
  3842. ------------------------------------------------------------------------
  3843.  
  3844. local function split(blk)
  3845.   local lines = {}
  3846.   local i, nblk = 1, #blk
  3847.   while i <= nblk do
  3848.     local p, q, r, s = string.find(blk, "([\r\n])([\r\n]?)", i)
  3849.     if not p then
  3850.       p = nblk + 1
  3851.     end
  3852.     lines[#lines + 1] = string.sub(blk, i, p - 1)
  3853.     i = p + 1
  3854.     if p < nblk and q > p and r ~= s then  -- handle Lua-style CRLF, LFCR
  3855.       i = i + 1
  3856.     end
  3857.   end
  3858.   return lines
  3859. end
  3860.  
  3861. ------------------------------------------------------------------------
  3862. -- post-lexing processing, can work on lexer table output
  3863. ------------------------------------------------------------------------
  3864.  
  3865. function post_lex(toklist, seminfolist, toklnlist)
  3866.   local lnow, sloc = 0, 0
  3867.   local function chk(ln)        -- if a new line, count it as an SLOC
  3868.     if ln > lnow then           -- new line # must be > old line #
  3869.       sloc = sloc + 1; lnow = ln
  3870.     end
  3871.   end
  3872.   for i = 1, #toklist do        -- enumerate over all tokens
  3873.     local tok, info, ln
  3874.       = toklist[i], seminfolist[i], toklnlist[i]
  3875.     --------------------------------------------------------------------
  3876.     if tok == "TK_KEYWORD" or tok == "TK_NAME" or       -- significant
  3877.        tok == "TK_NUMBER" or tok == "TK_OP" then
  3878.       chk(ln)
  3879.     --------------------------------------------------------------------
  3880.     -- Both TK_STRING and TK_LSTRING may be multi-line, hence, a loop
  3881.     -- is needed in order to mark off lines one-by-one. Since llex.lua
  3882.     -- currently returns the line number of the last part of the string,
  3883.     -- we must subtract in order to get the starting line number.
  3884.     --------------------------------------------------------------------
  3885.     elseif tok == "TK_STRING" then      -- possible multi-line
  3886.       local t = split(info)
  3887.       ln = ln - #t + 1
  3888.       for j = 1, #t do
  3889.         chk(ln); ln = ln + 1
  3890.       end
  3891.     --------------------------------------------------------------------
  3892.     elseif tok == "TK_LSTRING" then     -- possible multi-line
  3893.       local t = split(info)
  3894.       ln = ln - #t + 1
  3895.       for j = 1, #t do
  3896.         if t[j] ~= "" then chk(ln) end
  3897.         ln = ln + 1
  3898.       end
  3899.     --------------------------------------------------------------------
  3900.     -- other tokens are comments or whitespace and are ignored
  3901.     --------------------------------------------------------------------
  3902.     end
  3903.   end--for
  3904.   base.print(srcfl..": "..sloc) -- display result
  3905.   option.EXIT = true
  3906. end
  3907. --end of inserted module
  3908. end
  3909.  
  3910. -- support modules
  3911. local llex = require "llex"
  3912. local lparser = require "lparser"
  3913. local optlex = require "optlex"
  3914. local optparser = require "optparser"
  3915. local equiv = require "equiv"
  3916. local plugin
  3917.  
  3918. --[[--------------------------------------------------------------------
  3919. -- messages and textual data
  3920. ----------------------------------------------------------------------]]
  3921.  
  3922. local MSG_TITLE = [[
  3923. LuaSrcDiet: Puts your Lua 5.1 source code on a diet
  3924. Version 0.12.1 (20120407)  Copyright (c) 2012 Kein-Hong Man
  3925. The COPYRIGHT file describes the conditions under which this
  3926. software may be distributed.
  3927. ]]
  3928.  
  3929. local MSG_USAGE = [[
  3930. usage: LuaSrcDiet [options] [filenames]
  3931.  
  3932. example:
  3933.   >LuaSrcDiet myscript.lua -o myscript_.lua
  3934.  
  3935. options:
  3936.   -v, --version       prints version information
  3937.   -h, --help          prints usage information
  3938.   -o <file>           specify file name to write output
  3939.   -s <suffix>         suffix for output files (default '_')
  3940.   --keep <msg>        keep block comment with <msg> inside
  3941.   --plugin <module>   run <module> in plugin/ directory
  3942.   -                   stop handling arguments
  3943.  
  3944.   (optimization levels)
  3945.   --none              all optimizations off (normalizes EOLs only)
  3946.   --basic             lexer-based optimizations only
  3947.   --maximum           maximize reduction of source
  3948.  
  3949.   (informational)
  3950.   --quiet             process files quietly
  3951.   --read-only         read file and print token stats only
  3952.   --dump-lexer        dump raw tokens from lexer to stdout
  3953.   --dump-parser       dump variable tracking tables from parser
  3954.   --details           extra info (strings, numbers, locals)
  3955.  
  3956. features (to disable, insert 'no' prefix like --noopt-comments):
  3957. %s
  3958. default settings:
  3959. %s]]
  3960.  
  3961. ------------------------------------------------------------------------
  3962. -- optimization options, for ease of switching on and off
  3963. -- * positive to enable optimization, negative (no) to disable
  3964. -- * these options should follow --opt-* and --noopt-* style for now
  3965. ------------------------------------------------------------------------
  3966.  
  3967. local OPTION = [[
  3968. --opt-comments,'remove comments and block comments'
  3969. --opt-whitespace,'remove whitespace excluding EOLs'
  3970. --opt-emptylines,'remove empty lines'
  3971. --opt-eols,'all above, plus remove unnecessary EOLs'
  3972. --opt-strings,'optimize strings and long strings'
  3973. --opt-numbers,'optimize numbers'
  3974. --opt-locals,'optimize local variable names'
  3975. --opt-entropy,'tries to reduce symbol entropy of locals'
  3976. --opt-srcequiv,'insist on source (lexer stream) equivalence'
  3977. --opt-binequiv,'insist on binary chunk equivalence'
  3978. --opt-experimental,'apply experimental optimizations'
  3979. ]]
  3980.  
  3981. -- preset configuration
  3982. local DEFAULT_CONFIG = [[
  3983.   --opt-comments --opt-whitespace --opt-emptylines
  3984.   --opt-numbers --opt-locals
  3985.   --opt-srcequiv --opt-binequiv
  3986. ]]
  3987. -- override configurations
  3988. -- * MUST explicitly enable/disable everything for
  3989. --   total option replacement
  3990. local BASIC_CONFIG = [[
  3991.   --opt-comments --opt-whitespace --opt-emptylines
  3992.   --noopt-eols --noopt-strings --noopt-numbers
  3993.   --noopt-locals --noopt-entropy
  3994.   --opt-srcequiv --opt-binequiv
  3995. ]]
  3996. local MAXIMUM_CONFIG = [[
  3997.   --opt-comments --opt-whitespace --opt-emptylines
  3998.   --opt-eols --opt-strings --opt-numbers
  3999.   --opt-locals --opt-entropy
  4000.   --opt-srcequiv --opt-binequiv
  4001. ]]
  4002. local NONE_CONFIG = [[
  4003.   --noopt-comments --noopt-whitespace --noopt-emptylines
  4004.   --noopt-eols --noopt-strings --noopt-numbers
  4005.   --noopt-locals --noopt-entropy
  4006.   --opt-srcequiv --opt-binequiv
  4007. ]]
  4008.  
  4009. local DEFAULT_SUFFIX = "_"      -- default suffix for file renaming
  4010. local PLUGIN_SUFFIX = "plugin/" -- relative location of plugins
  4011.  
  4012. --[[--------------------------------------------------------------------
  4013. -- startup and initialize option list handling
  4014. ----------------------------------------------------------------------]]
  4015.  
  4016. -- simple error message handler; change to error if traceback wanted
  4017. local function die(msg)
  4018.   print("LuaSrcDiet (error): "..msg); os.exit(1)
  4019. end
  4020. --die = error--DEBUG
  4021.  
  4022. if not match(_VERSION, "5.1", 1, 1) then  -- sanity check
  4023.   die("requires Lua 5.1 to run")
  4024. end
  4025.  
  4026. ------------------------------------------------------------------------
  4027. -- prepares text for list of optimizations, prepare lookup table
  4028. ------------------------------------------------------------------------
  4029.  
  4030. local MSG_OPTIONS = ""
  4031. do
  4032.   local WIDTH = 24
  4033.   local o = {}
  4034.   for op, desc in gmatch(OPTION, "%s*([^,]+),'([^']+)'") do
  4035.     local msg = "  "..op
  4036.     msg = msg..string.rep(" ", WIDTH - #msg)..desc.."\n"
  4037.     MSG_OPTIONS = MSG_OPTIONS..msg
  4038.     o[op] = true
  4039.     o["--no"..sub(op, 3)] = true
  4040.   end
  4041.   OPTION = o  -- replace OPTION with lookup table
  4042. end
  4043.  
  4044. MSG_USAGE = string.format(MSG_USAGE, MSG_OPTIONS, DEFAULT_CONFIG)
  4045.  
  4046. if p_embedded then  -- embedded plugins
  4047.   local EMBED_INFO = "\nembedded plugins:\n"
  4048.   for i = 1, #p_embedded do
  4049.     local p = p_embedded[i]
  4050.     EMBED_INFO = EMBED_INFO.."  "..plugin_info[p].."\n"
  4051.   end
  4052.   MSG_USAGE = MSG_USAGE..EMBED_INFO
  4053. end
  4054.  
  4055. ------------------------------------------------------------------------
  4056. -- global variable initialization, option set handling
  4057. ------------------------------------------------------------------------
  4058.  
  4059. local suffix = DEFAULT_SUFFIX           -- file suffix
  4060. local option = {}                       -- program options
  4061. local stat_c, stat_l                    -- statistics tables
  4062.  
  4063. -- function to set option lookup table based on a text list of options
  4064. -- note: additional forced settings for --opt-eols is done in optlex.lua
  4065. local function set_options(CONFIG)
  4066.   for op in gmatch(CONFIG, "(%-%-%S+)") do
  4067.     if sub(op, 3, 4) == "no" and        -- handle negative options
  4068.        OPTION["--"..sub(op, 5)] then
  4069.       option[sub(op, 5)] = false
  4070.     else
  4071.       option[sub(op, 3)] = true
  4072.     end
  4073.   end
  4074. end
  4075.  
  4076. --[[--------------------------------------------------------------------
  4077. -- support functions
  4078. ----------------------------------------------------------------------]]
  4079.  
  4080. -- list of token types, parser-significant types are up to TTYPE_GRAMMAR
  4081. -- while the rest are not used by parsers; arranged for stats display
  4082. local TTYPES = {
  4083.   "TK_KEYWORD", "TK_NAME", "TK_NUMBER",         -- grammar
  4084.   "TK_STRING", "TK_LSTRING", "TK_OP",
  4085.   "TK_EOS",
  4086.   "TK_COMMENT", "TK_LCOMMENT",                  -- non-grammar
  4087.   "TK_EOL", "TK_SPACE",
  4088. }
  4089. local TTYPE_GRAMMAR = 7
  4090.  
  4091. local EOLTYPES = {                      -- EOL names for token dump
  4092.   ["\n"] = "LF", ["\r"] = "CR",
  4093.   ["\n\r"] = "LFCR", ["\r\n"] = "CRLF",
  4094. }
  4095.  
  4096. ------------------------------------------------------------------------
  4097. -- read source code from file
  4098. ------------------------------------------------------------------------
  4099.  
  4100. local function load_file(fname)
  4101.   local INF = io.open(fname, "rb")
  4102.   if not INF then die('cannot open "'..fname..'" for reading') end
  4103.   local dat = INF:read("*a")
  4104.   if not dat then die('cannot read from "'..fname..'"') end
  4105.   INF:close()
  4106.   return dat
  4107. end
  4108.  
  4109. ------------------------------------------------------------------------
  4110. -- save source code to file
  4111. ------------------------------------------------------------------------
  4112.  
  4113. local function save_file(fname, dat)
  4114.   local OUTF = io.open(fname, "wb")
  4115.   if not OUTF then die('cannot open "'..fname..'" for writing') end
  4116.   local status = OUTF:write(dat)
  4117.   if not status then die('cannot write to "'..fname..'"') end
  4118.   OUTF:close()
  4119. end
  4120.  
  4121. ------------------------------------------------------------------------
  4122. -- functions to deal with statistics
  4123. ------------------------------------------------------------------------
  4124.  
  4125. -- initialize statistics table
  4126. local function stat_init()
  4127.   stat_c, stat_l = {}, {}
  4128.   for i = 1, #TTYPES do
  4129.     local ttype = TTYPES[i]
  4130.     stat_c[ttype], stat_l[ttype] = 0, 0
  4131.   end
  4132. end
  4133.  
  4134. -- add a token to statistics table
  4135. local function stat_add(tok, seminfo)
  4136.   stat_c[tok] = stat_c[tok] + 1
  4137.   stat_l[tok] = stat_l[tok] + #seminfo
  4138. end
  4139.  
  4140. -- do totals for statistics table, return average table
  4141. local function stat_calc()
  4142.   local function avg(c, l)                      -- safe average function
  4143.     if c == 0 then return 0 end
  4144.     return l / c
  4145.   end
  4146.   local stat_a = {}
  4147.   local c, l = 0, 0
  4148.   for i = 1, TTYPE_GRAMMAR do                   -- total grammar tokens
  4149.     local ttype = TTYPES[i]
  4150.     c = c + stat_c[ttype]; l = l + stat_l[ttype]
  4151.   end
  4152.   stat_c.TOTAL_TOK, stat_l.TOTAL_TOK = c, l
  4153.   stat_a.TOTAL_TOK = avg(c, l)
  4154.   c, l = 0, 0
  4155.   for i = 1, #TTYPES do                         -- total all tokens
  4156.     local ttype = TTYPES[i]
  4157.     c = c + stat_c[ttype]; l = l + stat_l[ttype]
  4158.     stat_a[ttype] = avg(stat_c[ttype], stat_l[ttype])
  4159.   end
  4160.   stat_c.TOTAL_ALL, stat_l.TOTAL_ALL = c, l
  4161.   stat_a.TOTAL_ALL = avg(c, l)
  4162.   return stat_a
  4163. end
  4164.  
  4165. --[[--------------------------------------------------------------------
  4166. -- main tasks
  4167. ----------------------------------------------------------------------]]
  4168.  
  4169. ------------------------------------------------------------------------
  4170. -- a simple token dumper, minimal translation of seminfo data
  4171. ------------------------------------------------------------------------
  4172.  
  4173. local function dump_tokens(srcfl)
  4174.   --------------------------------------------------------------------
  4175.   -- load file and process source input into tokens
  4176.   --------------------------------------------------------------------
  4177.   local z = load_file(srcfl)
  4178.   llex.init(z)
  4179.   llex.llex()
  4180.   local toklist, seminfolist = llex.tok, llex.seminfo
  4181.   --------------------------------------------------------------------
  4182.   -- display output
  4183.   --------------------------------------------------------------------
  4184.   for i = 1, #toklist do
  4185.     local tok, seminfo = toklist[i], seminfolist[i]
  4186.     if tok == "TK_OP" and string.byte(seminfo) < 32 then
  4187.       seminfo = "(".. string.byte(seminfo)..")"
  4188.     elseif tok == "TK_EOL" then
  4189.       seminfo = EOLTYPES[seminfo]
  4190.     else
  4191.       seminfo = "'"..seminfo.."'"
  4192.     end
  4193.     print(tok.." "..seminfo)
  4194.   end--for
  4195. end
  4196.  
  4197. ----------------------------------------------------------------------
  4198. -- parser dump; dump globalinfo and localinfo tables
  4199. ----------------------------------------------------------------------
  4200.  
  4201. local function dump_parser(srcfl)
  4202.   local print = print
  4203.   --------------------------------------------------------------------
  4204.   -- load file and process source input into tokens
  4205.   --------------------------------------------------------------------
  4206.   local z = load_file(srcfl)
  4207.   llex.init(z)
  4208.   llex.llex()
  4209.   local toklist, seminfolist, toklnlist
  4210.     = llex.tok, llex.seminfo, llex.tokln
  4211.   --------------------------------------------------------------------
  4212.   -- do parser optimization here
  4213.   --------------------------------------------------------------------
  4214.   lparser.init(toklist, seminfolist, toklnlist)
  4215.   local xinfo = lparser.parser()
  4216.   local globalinfo, localinfo =
  4217.     xinfo.globalinfo, xinfo.localinfo
  4218.   --------------------------------------------------------------------
  4219.   -- display output
  4220.   --------------------------------------------------------------------
  4221.   local hl = string.rep("-", 72)
  4222.   print("*** Local/Global Variable Tracker Tables ***")
  4223.   print(hl.."\n GLOBALS\n"..hl)
  4224.   -- global tables have a list of xref numbers only
  4225.   for i = 1, #globalinfo do
  4226.     local obj = globalinfo[i]
  4227.     local msg = "("..i..") '"..obj.name.."' -> "
  4228.     local xref = obj.xref
  4229.     for j = 1, #xref do msg = msg..xref[j].." " end
  4230.     print(msg)
  4231.   end
  4232.   -- local tables have xref numbers and a few other special
  4233.   -- numbers that are specially named: decl (declaration xref),
  4234.   -- act (activation xref), rem (removal xref)
  4235.   print(hl.."\n LOCALS (decl=declared act=activated rem=removed)\n"..hl)
  4236.   for i = 1, #localinfo do
  4237.     local obj = localinfo[i]
  4238.     local msg = "("..i..") '"..obj.name.."' decl:"..obj.decl..
  4239.                 " act:"..obj.act.." rem:"..obj.rem
  4240.     if obj.isself then
  4241.       msg = msg.." isself"
  4242.     end
  4243.     msg = msg.." -> "
  4244.     local xref = obj.xref
  4245.     for j = 1, #xref do msg = msg..xref[j].." " end
  4246.     print(msg)
  4247.   end
  4248.   print(hl.."\n")
  4249. end
  4250.  
  4251. ------------------------------------------------------------------------
  4252. -- reads source file(s) and reports some statistics
  4253. ------------------------------------------------------------------------
  4254.  
  4255. local function read_only(srcfl)
  4256.   local print = print
  4257.   --------------------------------------------------------------------
  4258.   -- load file and process source input into tokens
  4259.   --------------------------------------------------------------------
  4260.   local z = load_file(srcfl)
  4261.   llex.init(z)
  4262.   llex.llex()
  4263.   local toklist, seminfolist = llex.tok, llex.seminfo
  4264.   print(MSG_TITLE)
  4265.   print("Statistics for: "..srcfl.."\n")
  4266.   --------------------------------------------------------------------
  4267.   -- collect statistics
  4268.   --------------------------------------------------------------------
  4269.   stat_init()
  4270.   for i = 1, #toklist do
  4271.     local tok, seminfo = toklist[i], seminfolist[i]
  4272.     stat_add(tok, seminfo)
  4273.   end--for
  4274.   local stat_a = stat_calc()
  4275.   --------------------------------------------------------------------
  4276.   -- display output
  4277.   --------------------------------------------------------------------
  4278.   local fmt = string.format
  4279.   local function figures(tt)
  4280.     return stat_c[tt], stat_l[tt], stat_a[tt]
  4281.   end
  4282.   local tabf1, tabf2 = "%-16s%8s%8s%10s", "%-16s%8d%8d%10.2f"
  4283.   local hl = string.rep("-", 42)
  4284.   print(fmt(tabf1, "Lexical",  "Input", "Input", "Input"))
  4285.   print(fmt(tabf1, "Elements", "Count", "Bytes", "Average"))
  4286.   print(hl)
  4287.   for i = 1, #TTYPES do
  4288.     local ttype = TTYPES[i]
  4289.     print(fmt(tabf2, ttype, figures(ttype)))
  4290.     if ttype == "TK_EOS" then print(hl) end
  4291.   end
  4292.   print(hl)
  4293.   print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
  4294.   print(hl)
  4295.   print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
  4296.   print(hl.."\n")
  4297. end
  4298.  
  4299. ------------------------------------------------------------------------
  4300. -- process source file(s), write output and reports some statistics
  4301. ------------------------------------------------------------------------
  4302.  
  4303. local function process_file(srcfl, destfl)
  4304.   local function print(...)             -- handle quiet option
  4305.     if option.QUIET then return end
  4306.     _G.print(...)
  4307.   end
  4308.   if plugin and plugin.init then        -- plugin init
  4309.     option.EXIT = false
  4310.     plugin.init(option, srcfl, destfl)
  4311.     if option.EXIT then return end
  4312.   end
  4313.   print(MSG_TITLE)                      -- title message
  4314.   --------------------------------------------------------------------
  4315.   -- load file and process source input into tokens
  4316.   --------------------------------------------------------------------
  4317.   local z = load_file(srcfl)
  4318.   if plugin and plugin.post_load then   -- plugin post-load
  4319.     z = plugin.post_load(z) or z
  4320.     if option.EXIT then return end
  4321.   end
  4322.   llex.init(z)
  4323.   llex.llex()
  4324.   local toklist, seminfolist, toklnlist
  4325.     = llex.tok, llex.seminfo, llex.tokln
  4326.   if plugin and plugin.post_lex then    -- plugin post-lex
  4327.     plugin.post_lex(toklist, seminfolist, toklnlist)
  4328.     if option.EXIT then return end
  4329.   end
  4330.   --------------------------------------------------------------------
  4331.   -- collect 'before' statistics
  4332.   --------------------------------------------------------------------
  4333.   stat_init()
  4334.   for i = 1, #toklist do
  4335.     local tok, seminfo = toklist[i], seminfolist[i]
  4336.     stat_add(tok, seminfo)
  4337.   end--for
  4338.   local stat1_a = stat_calc()
  4339.   local stat1_c, stat1_l = stat_c, stat_l
  4340.   --------------------------------------------------------------------
  4341.   -- do parser optimization here
  4342.   --------------------------------------------------------------------
  4343.   optparser.print = print  -- hack
  4344.   lparser.init(toklist, seminfolist, toklnlist)
  4345.   local xinfo = lparser.parser()
  4346.   if plugin and plugin.post_parse then          -- plugin post-parse
  4347.     plugin.post_parse(xinfo.globalinfo, xinfo.localinfo)
  4348.     if option.EXIT then return end
  4349.   end
  4350.   optparser.optimize(option, toklist, seminfolist, xinfo)
  4351.   if plugin and plugin.post_optparse then       -- plugin post-optparse
  4352.     plugin.post_optparse()
  4353.     if option.EXIT then return end
  4354.   end
  4355.   --------------------------------------------------------------------
  4356.   -- do lexer optimization here, save output file
  4357.   --------------------------------------------------------------------
  4358.   local warn = optlex.warn  -- use this as a general warning lookup
  4359.   optlex.print = print  -- hack
  4360.   toklist, seminfolist, toklnlist
  4361.     = optlex.optimize(option, toklist, seminfolist, toklnlist)
  4362.   if plugin and plugin.post_optlex then         -- plugin post-optlex
  4363.     plugin.post_optlex(toklist, seminfolist, toklnlist)
  4364.     if option.EXIT then return end
  4365.   end
  4366.   local dat = table.concat(seminfolist)
  4367.   -- depending on options selected, embedded EOLs in long strings and
  4368.   -- long comments may not have been translated to \n, tack a warning
  4369.   if string.find(dat, "\r\n", 1, 1) or
  4370.      string.find(dat, "\n\r", 1, 1) then
  4371.     warn.MIXEDEOL = true
  4372.   end
  4373.   --------------------------------------------------------------------
  4374.   -- test source and binary chunk equivalence
  4375.   --------------------------------------------------------------------
  4376.   equiv.init(option, llex, warn)
  4377.   equiv.source(z, dat)
  4378.   equiv.binary(z, dat)
  4379.   local smsg = "before and after lexer streams are NOT equivalent!"
  4380.   local bmsg = "before and after binary chunks are NOT equivalent!"
  4381.   -- for reporting, die if option was selected, else just warn
  4382.   if warn.SRC_EQUIV then
  4383.     if option["opt-srcequiv"] then die(smsg) end
  4384.   else
  4385.     print("*** SRCEQUIV: token streams are sort of equivalent")
  4386.     if option["opt-locals"] then
  4387.       print("(but no identifier comparisons since --opt-locals enabled)")
  4388.     end
  4389.     print()
  4390.   end
  4391.   if warn.BIN_EQUIV then
  4392.     if option["opt-binequiv"] then die(bmsg) end
  4393.   else
  4394.     print("*** BINEQUIV: binary chunks are sort of equivalent")
  4395.     print()
  4396.   end
  4397.   --------------------------------------------------------------------
  4398.   -- save optimized source stream to output file
  4399.   --------------------------------------------------------------------
  4400.   save_file(destfl, dat)
  4401.   --------------------------------------------------------------------
  4402.   -- collect 'after' statistics
  4403.   --------------------------------------------------------------------
  4404.   stat_init()
  4405.   for i = 1, #toklist do
  4406.     local tok, seminfo = toklist[i], seminfolist[i]
  4407.     stat_add(tok, seminfo)
  4408.   end--for
  4409.   local stat_a = stat_calc()
  4410.   --------------------------------------------------------------------
  4411.   -- display output
  4412.   --------------------------------------------------------------------
  4413.   print("Statistics for: "..srcfl.." -> "..destfl.."\n")
  4414.   local fmt = string.format
  4415.   local function figures(tt)
  4416.     return stat1_c[tt], stat1_l[tt], stat1_a[tt],
  4417.            stat_c[tt],  stat_l[tt],  stat_a[tt]
  4418.   end
  4419.   local tabf1, tabf2 = "%-16s%8s%8s%10s%8s%8s%10s",
  4420.                        "%-16s%8d%8d%10.2f%8d%8d%10.2f"
  4421.   local hl = string.rep("-", 68)
  4422.   print("*** lexer-based optimizations summary ***\n"..hl)
  4423.   print(fmt(tabf1, "Lexical",
  4424.             "Input", "Input", "Input",
  4425.             "Output", "Output", "Output"))
  4426.   print(fmt(tabf1, "Elements",
  4427.             "Count", "Bytes", "Average",
  4428.             "Count", "Bytes", "Average"))
  4429.   print(hl)
  4430.   for i = 1, #TTYPES do
  4431.     local ttype = TTYPES[i]
  4432.     print(fmt(tabf2, ttype, figures(ttype)))
  4433.     if ttype == "TK_EOS" then print(hl) end
  4434.   end
  4435.   print(hl)
  4436.   print(fmt(tabf2, "Total Elements", figures("TOTAL_ALL")))
  4437.   print(hl)
  4438.   print(fmt(tabf2, "Total Tokens", figures("TOTAL_TOK")))
  4439.   print(hl)
  4440.   --------------------------------------------------------------------
  4441.   -- report warning flags from optimizing process
  4442.   --------------------------------------------------------------------
  4443.   if warn.LSTRING then
  4444.     print("* WARNING: "..warn.LSTRING)
  4445.   elseif warn.MIXEDEOL then
  4446.     print("* WARNING: ".."output still contains some CRLF or LFCR line endings")
  4447.   elseif warn.SRC_EQUIV then
  4448.     print("* WARNING: "..smsg)
  4449.   elseif warn.BIN_EQUIV then
  4450.     print("* WARNING: "..bmsg)
  4451.   end
  4452.   print()
  4453. end
  4454.  
  4455. --[[--------------------------------------------------------------------
  4456. -- main functions
  4457. ----------------------------------------------------------------------]]
  4458.  
  4459. local arg = {...}  -- program arguments
  4460. local fspec = {}
  4461. set_options(DEFAULT_CONFIG)     -- set to default options at beginning
  4462.  
  4463. ------------------------------------------------------------------------
  4464. -- per-file handling, ship off to tasks
  4465. ------------------------------------------------------------------------
  4466.  
  4467. local function do_files(fspec)
  4468.   for i = 1, #fspec do
  4469.     local srcfl = fspec[i]
  4470.     local destfl
  4471.     ------------------------------------------------------------------
  4472.     -- find and replace extension for filenames
  4473.     ------------------------------------------------------------------
  4474.     local extb, exte = string.find(srcfl, "%.[^%.%\\%/]*$")
  4475.     local basename, extension = srcfl, ""
  4476.     if extb and extb > 1 then
  4477.       basename = sub(srcfl, 1, extb - 1)
  4478.       extension = sub(srcfl, extb, exte)
  4479.     end
  4480.     destfl = basename..suffix..extension
  4481.     if #fspec == 1 and option.OUTPUT_FILE then
  4482.       destfl = option.OUTPUT_FILE
  4483.     end
  4484.     if srcfl == destfl then
  4485.       die("output filename identical to input filename")
  4486.     end
  4487.     ------------------------------------------------------------------
  4488.     -- perform requested operations
  4489.     ------------------------------------------------------------------
  4490.     if option.DUMP_LEXER then
  4491.       dump_tokens(srcfl)
  4492.     elseif option.DUMP_PARSER then
  4493.       dump_parser(srcfl)
  4494.     elseif option.READ_ONLY then
  4495.       read_only(srcfl)
  4496.     else
  4497.       process_file(srcfl, destfl)
  4498.     end
  4499.   end--for
  4500. end
  4501.  
  4502. ------------------------------------------------------------------------
  4503. -- main function (entry point is after this definition)
  4504. ------------------------------------------------------------------------
  4505.  
  4506. local function main()
  4507.   local argn, i = #arg, 1
  4508.   if argn == 0 then
  4509.     option.HELP = true
  4510.   end
  4511.   --------------------------------------------------------------------
  4512.   -- handle arguments
  4513.   --------------------------------------------------------------------
  4514.   while i <= argn do
  4515.     local o, p = arg[i], arg[i + 1]
  4516.     local dash = match(o, "^%-%-?")
  4517.     if dash == "-" then                 -- single-dash options
  4518.       if o == "-h" then
  4519.         option.HELP = true; break
  4520.       elseif o == "-v" then
  4521.         option.VERSION = true; break
  4522.       elseif o == "-s" then
  4523.         if not p then die("-s option needs suffix specification") end
  4524.         suffix = p
  4525.         i = i + 1
  4526.       elseif o == "-o" then
  4527.         if not p then die("-o option needs a file name") end
  4528.         option.OUTPUT_FILE = p
  4529.         i = i + 1
  4530.       elseif o == "-" then
  4531.         break -- ignore rest of args
  4532.       else
  4533.         die("unrecognized option "..o)
  4534.       end
  4535.     elseif dash == "--" then            -- double-dash options
  4536.       if o == "--help" then
  4537.         option.HELP = true; break
  4538.       elseif o == "--version" then
  4539.         option.VERSION = true; break
  4540.       elseif o == "--keep" then
  4541.         if not p then die("--keep option needs a string to match for") end
  4542.         option.KEEP = p
  4543.         i = i + 1
  4544.       elseif o == "--plugin" then
  4545.         if not p then die("--plugin option needs a module name") end
  4546.         if option.PLUGIN then die("only one plugin can be specified") end
  4547.         option.PLUGIN = p
  4548.         plugin = require(PLUGIN_SUFFIX..p)
  4549.         i = i + 1
  4550.       elseif o == "--quiet" then
  4551.         option.QUIET = true
  4552.       elseif o == "--read-only" then
  4553.         option.READ_ONLY = true
  4554.       elseif o == "--basic" then
  4555.         set_options(BASIC_CONFIG)
  4556.       elseif o == "--maximum" then
  4557.         set_options(MAXIMUM_CONFIG)
  4558.       elseif o == "--none" then
  4559.         set_options(NONE_CONFIG)
  4560.       elseif o == "--dump-lexer" then
  4561.         option.DUMP_LEXER = true
  4562.       elseif o == "--dump-parser" then
  4563.         option.DUMP_PARSER = true
  4564.       elseif o == "--details" then
  4565.         option.DETAILS = true
  4566.       elseif OPTION[o] then  -- lookup optimization options
  4567.         set_options(o)
  4568.       else
  4569.         die("unrecognized option "..o)
  4570.       end
  4571.     else
  4572.       fspec[#fspec + 1] = o             -- potential filename
  4573.     end
  4574.     i = i + 1
  4575.   end--while
  4576.   if option.HELP then
  4577.     print(MSG_TITLE..MSG_USAGE); return true
  4578.   elseif option.VERSION then
  4579.     print(MSG_TITLE); return true
  4580.   end
  4581.   if #fspec > 0 then
  4582.     if #fspec > 1 and option.OUTPUT_FILE then
  4583.       die("with -o, only one source file can be specified")
  4584.     end
  4585.     do_files(fspec)
  4586.     return true
  4587.   else
  4588.     die("nothing to do!")
  4589.   end
  4590. end
  4591.  
  4592. -- entry point -> main() -> do_files()
  4593. if not main() then
  4594.   die("Please run with option -h or --help for usage information")
  4595. end
  4596.  
  4597. -- end of script
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement