Advertisement
StefanBashkir

Untitled

Feb 5th, 2016
107
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lua 17.11 KB | None | 0 0
  1. -- Parsed as per W3's CSS Standard
  2. -- Guidelines: https://www.w3.org/TR/css-syntax-3/
  3.  
  4. -- A string to number function was not coded in
  5. -- I just used tonumber()
  6.  
  7. return function(CSSSource)
  8.    
  9.     local parserPosition = 1
  10.     local lengthOfSource = CSSSource:len()
  11.     local tokens = {} -- A collection of CSS-specific tokens
  12.     local currentToken; -- The current token being used by the parser.
  13.     local currentPoint; -- the current character indexed by the parser
  14.    
  15.     local function CreateToken(Type)
  16.         local Token = {
  17.             TokenType = Type;
  18.             Value = "";
  19.             Data = "";
  20.             Flag = "";
  21.             Unit = ""; -- used by <dimension-token>
  22.             Repr = ""; -- used by <dimension-token>
  23.         }
  24.         return Token
  25.     end
  26.    
  27.     local function EmitToken(Token)
  28.         table.insert(tokens, Token)
  29.     end
  30.    
  31.     local function AreCodePointsValidEscape(point1, point2)
  32.         if (point1 == "\\") then
  33.             if (point2 ~= "\n") then
  34.                 return true
  35.             end
  36.         end
  37.     end
  38.    
  39.     local function IsCodePointNameStart(point)
  40.         if (point:match("[%d_A-Za-z]")) then
  41.             return true
  42.         end
  43.     end
  44.    
  45.     local function IsCodePointName(point)
  46.         if (point:match("[%d%-A-Za-z]")) then
  47.             return true
  48.         end
  49.     end
  50.    
  51.     local function DoThreeCodePointsStartIdentifier(point1, point2, point3)
  52.         if (point1 == "-") then
  53.             if (IsCodePointNameStart(point2) or AreCodePointsValidEscape(point2,point3)) then
  54.                 return true
  55.             end
  56.         elseif (IsCodePointNameStart(point1)) then
  57.             return true
  58.         elseif (point1 == "\\") then
  59.             if (AreCodePointsValidEscape(point1, point2)) then
  60.                 return true
  61.             end
  62.         end
  63.     end
  64.    
  65.     local function DoThreeCodePointsStartNumber(point1, point2, point3)
  66.         if (point1 == "+" or point1 == "-") then
  67.             if (point2:match("%d")) then
  68.                 return true
  69.             elseif (point2 == ".") then
  70.                 if (point3:match("%d")) then
  71.                     return true
  72.                    
  73.                 end
  74.             else
  75.                 return false
  76.             end
  77.         elseif (point1 == ".") then
  78.             if (point2:match("%d")) then
  79.                 return true
  80.             else
  81.                 return false
  82.             end
  83.         elseif (point1:match("%d")) then
  84.             return true
  85.         end
  86.     end
  87.    
  88.     -- vvv Get number/int from string
  89.     local function ConsumeNumber()
  90.         local repr = ""
  91.         local typ = "integer"
  92.         local value
  93.         -- WARNING: this function uses the stream - not given code points
  94.         -- It will adjust the parserPosition
  95.         currentPoint = CSSSource:sub(parserPosition, parserPosition)
  96.         if (currentPoint == "+" or currentPoint == "-") then
  97.             parserPosition = parserPosition + 1
  98.             repr = repr .. currentPoint
  99.             currentPoint = CSSSource:sub(parserPosition, parserPosition)
  100.         end
  101.        
  102.         while (currentPoint:match("%d")) do
  103.             repr = repr .. currentPoint
  104.             parserPosition = parserPosition + 1
  105.             currentPoint = CSSSource:sub(parserPosition, parserPosition)
  106.         end
  107.  
  108.         local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
  109.        
  110.         if (currentPoint == "." and nextCodePoint:match("%d")) then
  111.             repr = repr .. "." .. nextCodePoint
  112.             parserPosition = parserPosition + 2 -- consume . and digit
  113.             typ = "number"
  114.             currentPoint = CSSSource:sub(parserPosition, parserPosition)
  115.             while (currentPoint:match("%d")) do
  116.                 repr = repr .. currentPoint
  117.                 parserPosition = parserPosition + 1
  118.                 currentPoint = CSSSource:sub(parserPosition, parserPosition)
  119.             end
  120.             nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)   -- Redefine next code point. !IMPORTANT
  121.         end
  122.        
  123.         local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
  124.         local continueWith_E_Parsing = false
  125.         if (currentPoint == "e" or currentPoint == "E") then
  126.             if (nextCodePoint == "+" or nextCodePoint == "-") then
  127.                 if (nextNextCodePoint:match("%d")) then
  128.                     parserPosition = parserPosition + 3
  129.                     repr = repr .. currentPoint .. nextCodePoint .. nextNextCodePoint
  130.                     continueWith_E_Parsing = true
  131.                 end
  132.             else
  133.                 if (nextCodePoint:match("%d")) then
  134.                     parserPosition = parserPosition + 2
  135.                     repr = repr .. currentPoint .. nextCodePoint
  136.                     continueWith_E_Parsing = true
  137.                 end
  138.             end
  139.             if (continueWith_E_Parsing) then
  140.                 typ = "number"
  141.                 currentPoint = CSSSource:sub(parserPosition, parserPosition)
  142.                 while (currentPoint:match("%d")) do
  143.                     repr = repr .. currentPoint
  144.                     parserPosition = parserPosition + 1
  145.                     currentPoint = CSSSource:sub(parserPosition, parserPosition)
  146.                 end
  147.             end
  148.         end
  149.        
  150.         value = tonumber(repr)
  151.         return repr, value, typ
  152.     end
  153.    
  154.     local function ConsumeName()
  155.         -- WARNING: this function uses the stream - not given code points
  156.         -- It will adjust the parserPosition
  157.         local result = ""
  158.         while (parserPosition < lengthOfSource) do
  159.             if (IsCodePointName(currentPoint)) then
  160.                 result = result .. currentPoint
  161.                 parserPosition = parserPosition + 1
  162.                 currentPoint = CSSSource:sub(parserPosition, parserPosition)
  163.             else
  164.                 break
  165.             end
  166.         end
  167.         return result
  168.     end
  169.    
  170.     local function ConsumeNumericToken()
  171.         -- WARNING: this function uses the stream - not given code points
  172.         -- It will adjust the parserPosition
  173.         local repr, value, typ = ConsumeNumber()
  174.        
  175.         local nextCodePoint = CSSSource:sub(parserPosition, parserPosition)
  176.         local nextNextCodePoint = CSSSource:sub(parserPosition + 1, parserPosition + 1)
  177.         local currentToken -- stops it from affecting global 'currentToken'
  178.         if (DoThreeCodePointsStartIdentifier(currentPoint, nextCodePoint, nextNextCodePoint)) then
  179.             currentToken = CreateToken("<dimenion-token>")
  180.             currentToken.Repr = repr
  181.             currentToken.Value = value
  182.             currentToken.Flag = typ
  183.            
  184.             local name = ConsumeName()
  185.            
  186.             currentToken.Unit = name
  187.             EmitToken(currentToken)
  188.         elseif (currentPoint == "%") then
  189.             currentToken = CreateToken("<percentage-token>")
  190.             currentToken.Repr = repr
  191.             currentToken.Value = value
  192.             currentToken.Flag = typ
  193.             EmitToken(currentToken)
  194.         else
  195.             currentToken = CreateToken("<number-token>")
  196.             currentToken.Repr = repr
  197.             currentToken.Value = value
  198.             currentToken.Flag = typ
  199.             EmitToken(currentToken)
  200.         end
  201.     end
  202.    
  203.     local function ConsumeString()
  204.         -- WARNING: this function uses the stream - not given code points
  205.         -- It will adjust the parserPosition
  206.         local stringEndPoint = currentPoint == "'" and "'" or '"' -- Tells when to end the string
  207.         -- Consume a string token
  208.         local stringToken = CreateToken("<string-token>")
  209.         while (parserPosition < lengthOfSource) do
  210.             parserPosition = parserPosition + 1
  211.             currentPoint = CSSSource:sub(parserPosition, parserPosition)
  212.             if (currentPoint == "\\") then
  213.                 -- Escape character. Check if it is a valid escape
  214.                 -- Then consume one
  215.             elseif (currentPoint == "\n") then
  216.                 -- Parse error. Should not expect new-line
  217.                 stringToken = CreateToken("<bad-string-token>") -- Overwrites the original string token
  218.                 EmitToken(stringToken)
  219.                 -- Reconsume token. Do not move parser position.
  220.                 break
  221.             elseif (currentPoint == stringEndPoint) then
  222.                 -- Done with string. Move one. Emit it.
  223.                 EmitToken(stringToken)
  224.                 parserPosition = parserPosition + 1
  225.                 currentPoint = CSSSource:sub(parserPosition, parserPosition)
  226.                 break
  227.             else
  228.                 -- Add to the string value
  229.                 stringToken.Value = currentPoint
  230.             end
  231.         end
  232.         return stringToken
  233.     end
  234.    
  235.     local function ConsumeWhitespace()
  236.         -- WARNING: this function uses the stream - not given code points
  237.         -- It will adjust the parserPosition
  238.         local token
  239.        
  240.         if (currentPoint:match("%s")) then
  241.             token = CreateToken("<whitespace-token>")
  242.         end
  243.        
  244.         while (currentPoint:match("%s")) do
  245.             token.Value = token.Value .. currentPoint
  246.             parserPosition = parserPosition + 1
  247.             currentPoint = CSSSource:sub(parserPosition, parserPosition)
  248.         end
  249.        
  250.         if (token) then
  251.             table.insert(tokens, token)    
  252.             return token
  253.         end
  254.     end
  255.    
  256.     local function ConsumeURL()
  257.         -- WARNING: this function uses the stream - not given code points
  258.         -- It will adjust the parserPosition
  259.         local urlToken = CreateToken("<url-token>")
  260.         ConsumeWhitespace()
  261.         if (currentPoint == '"' or currentPoint == "'") then
  262.             local stringToken = ConsumeString()
  263.             if (stringToken.TokenType == "<bad-string-token>") then
  264.                 -- consume remnants of a bad url
  265.             else
  266.                 urlToken.Value = stringToken.Value
  267.                 ConsumeWhitespace()
  268.                 if (currentPoint == ")") then
  269.                     parserPosition = parserPosition + 1
  270.                     return
  271.                 else
  272.                     -- consume remnants of a bad url
  273.                 end
  274.             end
  275.         end
  276.         while (parserPosition < lengthOfSource) do
  277.             if (currentPoint == "(" or currentPoint == '"' or currentPoint == "'") then
  278.                 -- parse error. consume remnants of bad url
  279.             elseif (currentPoint == "\\") then
  280.                 -- escape character
  281.             else
  282.                 urlToken.Value = urlToken.Value .. currentPoint
  283.                 parserPosition = parserPosition + 1
  284.                 currentPoint = CSSSource:sub(parserPosition, parserPosition)
  285.             end
  286.         end
  287.         return urlToken
  288.     end
  289.    
  290.     local function ConsumeIdentLikeToken()
  291.         -- WARNING: this function uses the stream - not given code points
  292.         -- It will adjust the parserPosition
  293.         local name = ConsumeName()
  294.         if (name == "url") then
  295.             if (currentPoint == "(") then
  296.                 parserPosition = parserPosition + 1 -- Consume (
  297.                 local urlToken = ConsumeURL()
  298.                 EmitToken(urlToken)
  299.             end
  300.         elseif (currentPoint == "(") then
  301.             -- function token
  302.             local functionToken = CreateToken("<function-token>")
  303.             functionToken.Value = name
  304.             EmitToken(functionToken)
  305.         else
  306.             local identToken = CreateToken("<ident-token>")
  307.             identToken.Value = name
  308.             EmitToken(identToken)
  309.         end
  310.     end
  311.    
  312.    
  313.    
  314.     -- Here begins the actual parser   
  315.    
  316.     local function ConsumeToken()
  317.         local localToken
  318.         -- WARNING: this function uses the stream - not given code points
  319.         -- It will adjust the parserPosition
  320.         if (currentPoint:match("%s")) then
  321.             -- Consume as much whitespace as possible
  322.             ConsumeWhitespace()
  323.         elseif (currentPoint == '"' or currentPoint == "'") then
  324.             ConsumeString()
  325.         elseif (currentPoint == "#") then
  326.             local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
  327.             local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
  328.             local nNNextCodePoint = CSSSource:sub(parserPosition+3, parserPosition+3)
  329.             if (IsCodePointName(nextCodePoint) or AreCodePointsValidEscape(nextCodePoint, nextNextCodePoint)) then -- DOES NOT SUPPORT non-ASCII code points
  330.                 localToken = CreateToken("<hash-token>")
  331.                 if (DoThreeCodePointsStartIdentifier(nextCodePoint, nextNextCodePoint, nNNextCodePoint)) then
  332.                     localToken.Flag = "id"
  333.                 end
  334.                 local result = ""
  335.                 while (parserPosition < lengthOfSource) do
  336.                     parserPosition = parserPosition + 1
  337.                     currentPoint = CSSSource:sub(parserPosition, parserPosition)
  338.                     if (IsCodePointName(currentPoint)) then
  339.                         result = result .. currentPoint
  340.                     else
  341.                         break
  342.                     end
  343.                 end
  344.                 localToken.Value = result
  345.                 EmitToken(localToken)
  346.             else
  347.                 localToken = CreateToken("<delim-token>")
  348.                 localToken.Value = currentPoint
  349.                 EmitToken(localToken)
  350.                 parserPosition = parserPosition + 1
  351.             end
  352.         elseif (currentPoint == "(") then
  353.             localToken = CreateToken("<(-token>")
  354.             EmitToken(localToken)
  355.             parserPosition = parserPosition + 1
  356.         elseif (currentPoint == ")") then
  357.             localToken = CreateToken("<)-token>")
  358.             EmitToken(localToken)
  359.             parserPosition = parserPosition + 1
  360.         elseif (currentPoint == ",") then
  361.             localToken = CreateToken("<comma-token>")
  362.             EmitToken(localToken)
  363.             parserPosition = parserPosition + 1
  364.         elseif (currentPoint == "*") then
  365.             local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
  366.             if (nextCodePoint == "=") then
  367.                 parserPosition = parserPosition + 2 -- Consume current AND next point
  368.                 localToken = CreateToken("<substring-match-token>")
  369.                 EmitToken(localToken)
  370.             else
  371.                 localToken = CreateToken("<delim-token>")
  372.                 localToken.Value = currentPoint
  373.                 EmitToken(localToken)
  374.                 parserPosition = parserPosition + 1
  375.             end
  376.         elseif (currentPoint == "-") then
  377.             local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
  378.             local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
  379.             local nNNextCodePoint = CSSSource:sub(parserPosition+3, parserPosition+3)
  380.             if (DoThreeCodePointsStartNumber(currentPoint, nextCodePoint, nextNextCodePoint)) then
  381.                 ConsumeNumericToken()
  382.             elseif (DoThreeCodePointsStartIdentifier(currentPoint, nextCodePoint, nextNextCodePoint)) then
  383.                 ConsumeIdentLikeToken()
  384.             elseif (nextCodePoint == "-" and nextNextCodePoint == ">") then
  385.                 localToken = CreateToken("<CDC-token>")
  386.                 EmitToken(localToken)
  387.                 parserPosition = parserPosition + 3
  388.             else
  389.                 localToken = CreateToken("<delim-token>")
  390.                 localToken.Value = currentPoint
  391.                 EmitToken(localToken)
  392.                 parserPosition = parserPosition + 1
  393.             end
  394.         elseif (currentPoint == "+") then
  395.             local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
  396.             local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
  397.             if (DoThreeCodePointsStartNumber(currentPoint, nextCodePoint, nextNextCodePoint)) then
  398.                 ConsumeNumericToken()
  399.             else
  400.                 localToken = CreateToken("<delim-token>")
  401.                 localToken.Value = currentPoint
  402.                 EmitToken(localToken)
  403.                 parserPosition = parserPosition + 1
  404.             end
  405.         elseif (currentPoint == ".") then
  406.             local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
  407.             local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
  408.             if (DoThreeCodePointsStartNumber(currentPoint, nextCodePoint, nextNextCodePoint)) then
  409.                 ConsumeNumericToken()
  410.             else
  411.                 localToken = CreateToken("<delim-token>")
  412.                 localToken.Value = currentPoint
  413.                 EmitToken(localToken)
  414.                 parserPosition = parserPosition + 1
  415.             end
  416.         elseif (currentPoint == "/") then
  417.             local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
  418.             if (nextCodePoint == "*") then
  419.                 parserPosition = parserPosition + 2
  420.                 -- consume everything until next asterisk followed by /
  421.                 while (currentPoint ~= "*" and nextCodePoint ~= "/") do
  422.                     parserPosition = parserPosition + 1
  423.                     currentPoint = CSSSource:sub(parserPosition, parserPosition)
  424.                     nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
  425.                 end
  426.                 ConsumeToken() -- Is this even needed...?
  427.             else
  428.                 localToken = CreateToken("<delim-token>")
  429.                 localToken.Value = currentPoint
  430.                 EmitToken(localToken)
  431.                 parserPosition = parserPosition + 1
  432.             end
  433.         elseif (currentPoint == ":") then
  434.             localToken = CreateToken("<colon-token>")
  435.             localToken.Value = currentPoint
  436.             EmitToken(localToken)
  437.             parserPosition = parserPosition + 1
  438.         elseif (currentPoint == ";") then
  439.             localToken = CreateToken("<semicolon-token>")
  440.             localToken.Value = currentPoint
  441.             EmitToken(localToken)
  442.             parserPosition = parserPosition + 1
  443.         elseif (currentPoint == "<") then
  444.             local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
  445.             local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
  446.             local nNNextCodePoint = CSSSource:sub(parserPosition+3, parserPosition+3)
  447.             if (nextCodePoint == "!" and nextNextCodePoint == "-" and nNNextCodePoint == "-") then
  448.                 parserPosition = parserPosition + 4
  449.                 -- Consume current and next three points
  450.             else
  451.                 localToken = CreateToken("<delim-token>")
  452.                 localToken.Value = currentPoint
  453.                 EmitToken(localToken)
  454.                 parserPosition = parserPosition + 1
  455.             end
  456.         elseif (currentPoint == "@") then
  457.             local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
  458.             local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
  459.             local nNNextCodePoint = CSSSource:sub(parserPosition+3, parserPosition+3)
  460.             if (DoThreeCodePointsStartIdentifier(nextCodePoint,nextNextCodePoint,  nNNextCodePoint)) then
  461.                 parserPosition = parserPosition + 1
  462.                 local name = ConsumeName()
  463.                 localToken = CreateToken("<at-keyword-token>")
  464.                 localToken.Value = name
  465.                 EmitToken(localToken)
  466.             else
  467.                 localToken = CreateToken("<delim-token>")
  468.                 localToken.Value = currentPoint
  469.                 EmitToken(localToken)
  470.                 parserPosition = parserPosition + 1
  471.             end
  472.         elseif (currentPoint == "]") then
  473.             localToken = CreateToken("<]-token>")
  474.             localToken.Value = currentPoint
  475.             EmitToken(localToken)
  476.             parserPosition = parserPosition + 1
  477.         elseif (currentPoint == "^") then
  478.             local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
  479.             if (nextCodePoint == "=") then
  480.                 parserPosition = parserPosition + 2
  481.                 localToken = CreateToken("<prefix-match-token>")
  482.                 EmitToken(localToken)
  483.             else
  484.                 localToken = CreateToken("<delim-token>")
  485.                 localToken.Value = currentPoint
  486.                 EmitToken(localToken)
  487.                 parserPosition = parserPosition + 1
  488.             end
  489.         elseif (currentPoint == "}") then
  490.             localToken = CreateToken("<}-token>")
  491.             localToken.Value = currentPoint
  492.             EmitToken(localToken)
  493.             parserPosition = parserPosition + 1
  494.         elseif (currentPoint == "{") then
  495.             localToken = CreateToken("<{-token>")
  496.             localToken.Value = currentPoint
  497.             EmitToken(localToken)
  498.             parserPosition = parserPosition + 1
  499.         elseif (currentPoint:match("%d")) then
  500.             ConsumeNumericToken()
  501.         end
  502.     end
  503.    
  504.    
  505.     while (parserPosition < lengthOfSource) and wait(0.25) do
  506.         currentPoint = CSSSource:sub(parserPosition, parserPosition)
  507.         ConsumeToken()
  508.     end
  509.    
  510.     print("CSS Tokenizing done.")
  511.    
  512.     return tokens
  513. end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement