Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -- Parsed as per W3's CSS Standard
- -- Guidelines: https://www.w3.org/TR/css-syntax-3/
- -- A string to number function was not coded in
- -- I just used tonumber()
- return function(CSSSource)
- local parserPosition = 1
- local lengthOfSource = CSSSource:len()
- local tokens = {} -- A collection of CSS-specific tokens
- local currentToken; -- The current token being used by the parser.
- local currentPoint; -- the current character indexed by the parser
- local function CreateToken(Type)
- local Token = {
- TokenType = Type;
- Value = "";
- Data = "";
- Flag = "";
- Unit = ""; -- used by <dimension-token>
- Repr = ""; -- used by <dimension-token>
- }
- return Token
- end
- local function EmitToken(Token)
- table.insert(tokens, Token)
- end
- local function AreCodePointsValidEscape(point1, point2)
- if (point1 == "\\") then
- if (point2 ~= "\n") then
- return true
- end
- end
- end
- local function IsCodePointNameStart(point)
- if (point:match("[%d_A-Za-z]")) then
- return true
- end
- end
- local function IsCodePointName(point)
- if (point:match("[%d%-A-Za-z]")) then
- return true
- end
- end
- local function DoThreeCodePointsStartIdentifier(point1, point2, point3)
- if (point1 == "-") then
- if (IsCodePointNameStart(point2) or AreCodePointsValidEscape(point2,point3)) then
- return true
- end
- elseif (IsCodePointNameStart(point1)) then
- return true
- elseif (point1 == "\\") then
- if (AreCodePointsValidEscape(point1, point2)) then
- return true
- end
- end
- end
- local function DoThreeCodePointsStartNumber(point1, point2, point3)
- if (point1 == "+" or point1 == "-") then
- if (point2:match("%d")) then
- return true
- elseif (point2 == ".") then
- if (point3:match("%d")) then
- return true
- end
- else
- return false
- end
- elseif (point1 == ".") then
- if (point2:match("%d")) then
- return true
- else
- return false
- end
- elseif (point1:match("%d")) then
- return true
- end
- end
- -- vvv Get number/int from string
- local function ConsumeNumber()
- local repr = ""
- local typ = "integer"
- local value
- -- WARNING: this function uses the stream - not given code points
- -- It will adjust the parserPosition
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- if (currentPoint == "+" or currentPoint == "-") then
- parserPosition = parserPosition + 1
- repr = repr .. currentPoint
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- end
- while (currentPoint:match("%d")) do
- repr = repr .. currentPoint
- parserPosition = parserPosition + 1
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- end
- local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
- if (currentPoint == "." and nextCodePoint:match("%d")) then
- repr = repr .. "." .. nextCodePoint
- parserPosition = parserPosition + 2 -- consume . and digit
- typ = "number"
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- while (currentPoint:match("%d")) do
- repr = repr .. currentPoint
- parserPosition = parserPosition + 1
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- end
- nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1) -- Redefine next code point. !IMPORTANT
- end
- local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
- local continueWith_E_Parsing = false
- if (currentPoint == "e" or currentPoint == "E") then
- if (nextCodePoint == "+" or nextCodePoint == "-") then
- if (nextNextCodePoint:match("%d")) then
- parserPosition = parserPosition + 3
- repr = repr .. currentPoint .. nextCodePoint .. nextNextCodePoint
- continueWith_E_Parsing = true
- end
- else
- if (nextCodePoint:match("%d")) then
- parserPosition = parserPosition + 2
- repr = repr .. currentPoint .. nextCodePoint
- continueWith_E_Parsing = true
- end
- end
- if (continueWith_E_Parsing) then
- typ = "number"
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- while (currentPoint:match("%d")) do
- repr = repr .. currentPoint
- parserPosition = parserPosition + 1
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- end
- end
- end
- value = tonumber(repr)
- return repr, value, typ
- end
- local function ConsumeName()
- -- WARNING: this function uses the stream - not given code points
- -- It will adjust the parserPosition
- local result = ""
- while (parserPosition < lengthOfSource) do
- if (IsCodePointName(currentPoint)) then
- result = result .. currentPoint
- parserPosition = parserPosition + 1
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- else
- break
- end
- end
- return result
- end
- local function ConsumeNumericToken()
- -- WARNING: this function uses the stream - not given code points
- -- It will adjust the parserPosition
- local repr, value, typ = ConsumeNumber()
- local nextCodePoint = CSSSource:sub(parserPosition, parserPosition)
- local nextNextCodePoint = CSSSource:sub(parserPosition + 1, parserPosition + 1)
- local currentToken -- stops it from affecting global 'currentToken'
- if (DoThreeCodePointsStartIdentifier(currentPoint, nextCodePoint, nextNextCodePoint)) then
- currentToken = CreateToken("<dimenion-token>")
- currentToken.Repr = repr
- currentToken.Value = value
- currentToken.Flag = typ
- local name = ConsumeName()
- currentToken.Unit = name
- EmitToken(currentToken)
- elseif (currentPoint == "%") then
- currentToken = CreateToken("<percentage-token>")
- currentToken.Repr = repr
- currentToken.Value = value
- currentToken.Flag = typ
- EmitToken(currentToken)
- else
- currentToken = CreateToken("<number-token>")
- currentToken.Repr = repr
- currentToken.Value = value
- currentToken.Flag = typ
- EmitToken(currentToken)
- end
- end
- local function ConsumeString()
- -- WARNING: this function uses the stream - not given code points
- -- It will adjust the parserPosition
- local stringEndPoint = currentPoint == "'" and "'" or '"' -- Tells when to end the string
- -- Consume a string token
- local stringToken = CreateToken("<string-token>")
- while (parserPosition < lengthOfSource) do
- parserPosition = parserPosition + 1
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- if (currentPoint == "\\") then
- -- Escape character. Check if it is a valid escape
- -- Then consume one
- elseif (currentPoint == "\n") then
- -- Parse error. Should not expect new-line
- stringToken = CreateToken("<bad-string-token>") -- Overwrites the original string token
- EmitToken(stringToken)
- -- Reconsume token. Do not move parser position.
- break
- elseif (currentPoint == stringEndPoint) then
- -- Done with string. Move one. Emit it.
- EmitToken(stringToken)
- parserPosition = parserPosition + 1
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- break
- else
- -- Add to the string value
- stringToken.Value = currentPoint
- end
- end
- return stringToken
- end
- local function ConsumeWhitespace()
- -- WARNING: this function uses the stream - not given code points
- -- It will adjust the parserPosition
- local token
- if (currentPoint:match("%s")) then
- token = CreateToken("<whitespace-token>")
- end
- while (currentPoint:match("%s")) do
- token.Value = token.Value .. currentPoint
- parserPosition = parserPosition + 1
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- end
- if (token) then
- table.insert(tokens, token)
- return token
- end
- end
- local function ConsumeURL()
- -- WARNING: this function uses the stream - not given code points
- -- It will adjust the parserPosition
- local urlToken = CreateToken("<url-token>")
- ConsumeWhitespace()
- if (currentPoint == '"' or currentPoint == "'") then
- local stringToken = ConsumeString()
- if (stringToken.TokenType == "<bad-string-token>") then
- -- consume remnants of a bad url
- else
- urlToken.Value = stringToken.Value
- ConsumeWhitespace()
- if (currentPoint == ")") then
- parserPosition = parserPosition + 1
- return
- else
- -- consume remnants of a bad url
- end
- end
- end
- while (parserPosition < lengthOfSource) do
- if (currentPoint == "(" or currentPoint == '"' or currentPoint == "'") then
- -- parse error. consume remnants of bad url
- elseif (currentPoint == "\\") then
- -- escape character
- else
- urlToken.Value = urlToken.Value .. currentPoint
- parserPosition = parserPosition + 1
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- end
- end
- return urlToken
- end
- local function ConsumeIdentLikeToken()
- -- WARNING: this function uses the stream - not given code points
- -- It will adjust the parserPosition
- local name = ConsumeName()
- if (name == "url") then
- if (currentPoint == "(") then
- parserPosition = parserPosition + 1 -- Consume (
- local urlToken = ConsumeURL()
- EmitToken(urlToken)
- end
- elseif (currentPoint == "(") then
- -- function token
- local functionToken = CreateToken("<function-token>")
- functionToken.Value = name
- EmitToken(functionToken)
- else
- local identToken = CreateToken("<ident-token>")
- identToken.Value = name
- EmitToken(identToken)
- end
- end
- -- Here begins the actual parser
- local function ConsumeToken()
- local localToken
- -- WARNING: this function uses the stream - not given code points
- -- It will adjust the parserPosition
- if (currentPoint:match("%s")) then
- -- Consume as much whitespace as possible
- ConsumeWhitespace()
- elseif (currentPoint == '"' or currentPoint == "'") then
- ConsumeString()
- elseif (currentPoint == "#") then
- local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
- local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
- local nNNextCodePoint = CSSSource:sub(parserPosition+3, parserPosition+3)
- if (IsCodePointName(nextCodePoint) or AreCodePointsValidEscape(nextCodePoint, nextNextCodePoint)) then -- DOES NOT SUPPORT non-ASCII code points
- localToken = CreateToken("<hash-token>")
- if (DoThreeCodePointsStartIdentifier(nextCodePoint, nextNextCodePoint, nNNextCodePoint)) then
- localToken.Flag = "id"
- end
- local result = ""
- while (parserPosition < lengthOfSource) do
- parserPosition = parserPosition + 1
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- if (IsCodePointName(currentPoint)) then
- result = result .. currentPoint
- else
- break
- end
- end
- localToken.Value = result
- EmitToken(localToken)
- else
- localToken = CreateToken("<delim-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- end
- elseif (currentPoint == "(") then
- localToken = CreateToken("<(-token>")
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- elseif (currentPoint == ")") then
- localToken = CreateToken("<)-token>")
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- elseif (currentPoint == ",") then
- localToken = CreateToken("<comma-token>")
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- elseif (currentPoint == "*") then
- local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
- if (nextCodePoint == "=") then
- parserPosition = parserPosition + 2 -- Consume current AND next point
- localToken = CreateToken("<substring-match-token>")
- EmitToken(localToken)
- else
- localToken = CreateToken("<delim-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- end
- elseif (currentPoint == "-") then
- local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
- local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
- local nNNextCodePoint = CSSSource:sub(parserPosition+3, parserPosition+3)
- if (DoThreeCodePointsStartNumber(currentPoint, nextCodePoint, nextNextCodePoint)) then
- ConsumeNumericToken()
- elseif (DoThreeCodePointsStartIdentifier(currentPoint, nextCodePoint, nextNextCodePoint)) then
- ConsumeIdentLikeToken()
- elseif (nextCodePoint == "-" and nextNextCodePoint == ">") then
- localToken = CreateToken("<CDC-token>")
- EmitToken(localToken)
- parserPosition = parserPosition + 3
- else
- localToken = CreateToken("<delim-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- end
- elseif (currentPoint == "+") then
- local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
- local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
- if (DoThreeCodePointsStartNumber(currentPoint, nextCodePoint, nextNextCodePoint)) then
- ConsumeNumericToken()
- else
- localToken = CreateToken("<delim-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- end
- elseif (currentPoint == ".") then
- local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
- local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
- if (DoThreeCodePointsStartNumber(currentPoint, nextCodePoint, nextNextCodePoint)) then
- ConsumeNumericToken()
- else
- localToken = CreateToken("<delim-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- end
- elseif (currentPoint == "/") then
- local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
- if (nextCodePoint == "*") then
- parserPosition = parserPosition + 2
- -- consume everything until next asterisk followed by /
- while (currentPoint ~= "*" and nextCodePoint ~= "/") do
- parserPosition = parserPosition + 1
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
- end
- ConsumeToken() -- Is this even needed...?
- else
- localToken = CreateToken("<delim-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- end
- elseif (currentPoint == ":") then
- localToken = CreateToken("<colon-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- elseif (currentPoint == ";") then
- localToken = CreateToken("<semicolon-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- elseif (currentPoint == "<") then
- local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
- local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
- local nNNextCodePoint = CSSSource:sub(parserPosition+3, parserPosition+3)
- if (nextCodePoint == "!" and nextNextCodePoint == "-" and nNNextCodePoint == "-") then
- parserPosition = parserPosition + 4
- -- Consume current and next three points
- else
- localToken = CreateToken("<delim-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- end
- elseif (currentPoint == "@") then
- local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
- local nextNextCodePoint = CSSSource:sub(parserPosition+2, parserPosition+2)
- local nNNextCodePoint = CSSSource:sub(parserPosition+3, parserPosition+3)
- if (DoThreeCodePointsStartIdentifier(nextCodePoint,nextNextCodePoint, nNNextCodePoint)) then
- parserPosition = parserPosition + 1
- local name = ConsumeName()
- localToken = CreateToken("<at-keyword-token>")
- localToken.Value = name
- EmitToken(localToken)
- else
- localToken = CreateToken("<delim-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- end
- elseif (currentPoint == "]") then
- localToken = CreateToken("<]-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- elseif (currentPoint == "^") then
- local nextCodePoint = CSSSource:sub(parserPosition+1, parserPosition+1)
- if (nextCodePoint == "=") then
- parserPosition = parserPosition + 2
- localToken = CreateToken("<prefix-match-token>")
- EmitToken(localToken)
- else
- localToken = CreateToken("<delim-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- end
- elseif (currentPoint == "}") then
- localToken = CreateToken("<}-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- elseif (currentPoint == "{") then
- localToken = CreateToken("<{-token>")
- localToken.Value = currentPoint
- EmitToken(localToken)
- parserPosition = parserPosition + 1
- elseif (currentPoint:match("%d")) then
- ConsumeNumericToken()
- end
- end
- while (parserPosition < lengthOfSource) and wait(0.25) do
- currentPoint = CSSSource:sub(parserPosition, parserPosition)
- ConsumeToken()
- end
- print("CSS Tokenizing done.")
- return tokens
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement