Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- function lookupify(src, list)
- list = list or {}
- if type(src) == 'string' then
- for i = 1, src:len() do
- list[src:sub(i, i)] = true
- end
- elseif type(src) == 'table' then
- for i = 1, #src do
- list[src[i]] = true
- end
- end
- return list
- end
- local base_ident = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_'
- local base_digits = '0123456789'
- local base_operators = '+-*/^%#'
- local chars = {
- whitespace = lookupify(' \n\t\r'),
- validEscapes = lookupify('abfnrtv"\'\\'),
- ident = lookupify(
- base_ident .. base_digits,
- {
- start = lookupify(base_ident),
- }
- ),
- digits = lookupify(
- base_digits,
- {
- hex = lookupify(base_digits .. 'abcdefABCDEF')
- }
- ),
- symbols = lookupify(
- base_operators .. ',{}[]();.:', {
- equality = lookupify('~=><'),
- operators = lookupify(base_operators)
- }
- )
- }
- local keywords = {
- structure = lookupify({
- 'and', 'break', 'do', 'else', 'elseif', 'end', 'for', 'function',
- 'goto', 'if', 'in', 'local', 'not', 'or', 'repeat', 'return', 'then',
- 'until', 'while'
- }),
- values = lookupify({
- 'true', 'false', 'nil'
- })
- }
- local builtin = {
- structure = lookupify({
- 'pairs', 'print', 'Color3', 'Enum', 'UDim', 'UDim2', 'Instance', 'game'
- })
- }
- return function(text)
- local pos = 1
- local start = 1
- local line = 0
- local buffer = {}
- local lines = {}
- local function look(delta)
- delta = pos + (delta or 0)
- return text:sub(delta, delta)
- end
- local function get()
- pos = pos + 1
- return look(-1)
- end
- local function getDataLevel()
- local num = 0
- while look(num) == '=' do
- num = num + 1
- end
- if look(num) == '[' then
- pos = pos + num + 1
- return num
- end
- end
- local function getCurrentTokenText()
- return text:sub(start, pos - 1)
- end
- local currentLineLength = 0
- local lineoffset = 0
- local waitingOnLine = false
- local line = 0
- local function pushToken(type, text)
- text = text or getCurrentTokenText()
- local tk = buffer[#buffer]
- if not tk or tk.type ~= type then
- if waitingOnLine and type ~= "newline" then
- line = line + 1
- waitingOnLine = false
- end
- tk = {
- type = type,
- data = text,
- posFirst = start - lineoffset,
- posLast = pos - 1 - lineoffset,
- line = line
- }
- if tk.data ~= '' then
- buffer[#buffer + 1] = tk
- end
- else
- tk.data = tk.data .. text
- tk.posLast = tk.posLast + text:len()
- end
- currentLineLength = currentLineLength + text:len()
- start = pos
- return tk
- end
- local function newline()
- lines[#lines + 1] = buffer
- waitingOnLine = true
- buffer = {}
- get()
- pushToken('newline')
- buffer[1] = nil
- lineoffset = lineoffset + currentLineLength
- currentLineLength = 0
- end
- local function getData(level, type)
- while true do
- local char = get()
- if char == '' then
- return
- elseif char == '\n' then
- pos = pos - 1
- pushToken(type)
- newline()
- elseif char == ']' then
- local valid = true
- for i = 1, level do
- if look() == '=' then
- pos = pos + 1
- else
- valid = false
- break
- end
- end
- if valid and look() == ']' then
- pos = pos - level - 1
- return
- end
- end
- end
- end
- local function chompWhitespace()
- while true do
- local char = look()
- if char == '\n' then
- pushToken('whitespace')
- newline()
- elseif chars.whitespace[char] then
- pos = pos + 1
- else
- break
- end
- end
- pushToken('whitespace')
- end
- while true do
- chompWhitespace()
- local char = get()
- if char == '' then
- break
- elseif char == '-' and look() == '-' then
- pos = pos + 1
- if look() == '[' then
- pos = pos + 1
- local level = getDataLevel()
- if level then
- getData(level, 'comment')
- pos = pos + level + 2
- pushToken('comment')
- else
- while true do
- local char2 = get()
- if char2 == '' or char2 == '\n' then
- pos = pos - 1
- pushToken('comment')
- if char2 == '\n' then
- newline()
- end
- break
- end
- end
- end
- else
- while true do
- local char2 = get()
- if char2 == '' or char2 == '\n' then
- pos = pos - 1
- pushToken('comment')
- if char2 == '\n' then
- newline()
- end
- break
- end
- end
- end
- pushToken('comment')
- elseif char == '\'' or char == '"' then
- pushToken('string_start')
- while true do
- local char2 = get()
- if char2 == '\\' then
- pos = pos - 1
- pushToken('string')
- get()
- local char3 = get()
- if chars.digits[char3] then
- for i = 1, 2 do
- if chars.digits[look()] then
- pos = pos + 1
- end
- end
- elseif char3 == 'x' then
- if chars.digits.hex[look()] and chars.digits.hex[look(1)] then
- pos = pos + 2
- else
- pushToken('unidentified')
- end
- elseif char3 == '\n' then
- pos = pos - 1
- pushToken('escape')
- newline()
- elseif not chars.validEscapes[char3] then
- pushToken('unidentified')
- end
- pushToken('escape')
- elseif char2 == '\n' then
- pos = pos - 1
- pushToken('string')
- newline()
- break
- elseif char2 == char or char2 == '' then
- pos = pos - 1
- pushToken('string')
- get()
- break
- end
- end
- pushToken('string_end')
- elseif chars.ident.start[char] then
- while chars.ident[look()] do
- pos = pos + 1
- end
- local word = getCurrentTokenText()
- if keywords.structure[word] then
- pushToken('keyword')
- elseif keywords.values[word] then
- pushToken('value')
- elseif builtin.structure[word] then
- pushToken('builtin')
- else
- pushToken('ident')
- end
- elseif chars.digits[char] or (char == '.' and chars.digits[look()]) then
- if char == '0' and look() == 'x' then
- pos = pos + 1
- while chars.digits.hex[look()] do
- pos = pos + 1
- end
- else
- while chars.digits[look()] do
- pos = pos + 1
- end
- if look() == '.' then
- pos = pos + 1
- while chars.digits[look()] do
- pos = pos + 1
- end
- end
- if look():lower() == 'e' then
- pos = pos + 1
- if look() == '-' then
- pos = pos + 1
- end
- while chars.digits[look()] do
- pos = pos + 1
- end
- end
- end
- pushToken('number')
- elseif char == '[' then
- local level = getDataLevel()
- if level then
- pushToken('string_start')
- getData(level, 'string')
- pushToken('string')
- pos = pos + level + 2
- pushToken('string_end')
- else
- pushToken('symbol')
- end
- elseif char == '.' then
- if look() == '.' then
- pos = pos + 1
- if look() == '.' then
- pos = pos + 1
- end
- end
- if getCurrentTokenText():len() == 3 then
- pushToken('vararg')
- else
- pushToken('symbol')
- end
- elseif char == ':' and look() == ':' then
- get()
- pushToken('label_start')
- chompWhitespace()
- if chars.ident.start[look()] then
- get()
- while chars.ident[look()] do
- get()
- end
- pushToken('label')
- chompWhitespace()
- if look() == ':' and look(1) == ':' then
- get()
- get()
- pushToken('label_end')
- end
- end
- elseif chars.symbols.equality[char] then
- if look() == '=' then
- pos = pos + 1
- end
- pushToken('operator')
- elseif chars.symbols[char] then
- if chars.symbols.operators[char] then
- pushToken('operator')
- else
- pushToken('symbol')
- end
- else
- pushToken('unidentified')
- end
- end
- lines[#lines + 1] = buffer
- return lines
- end
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement