Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -- (C) Tommy aka 16Tomatons
- -- MIT license
- local url = {}
- url.cleaning = {
- {rx = '%s.*', rp = ''}, -- clear all after space character
- {rx = '&+', rp = '&'},
- {rx = '=+', rp = '='},
- {rx = '@+', rp = '@'},
- {rx = ':+', rp = ':'},
- {rx = '#+', rp = '#'},
- {rx = '/+', rp = '/'}, -- remove all '/'-duplicates
- {rx = '%?+', rp = '?'},
- {rx = '^%w+:/', rp = '%1/'}, -- restore '/', if scheme is here
- }
- function url.clear(u)
- local sub, subs = 0, 0
- for i, v in pairs(url.cleaning) do
- u, sub = u:gsub(v.rx, v.rp)
- subs = subs + sub
- end
- return u, subs
- end
- -- specific string function, like table.remove returning an erased item
- -- a, b = url.extract('qwerty', 'w(er)') --> er, qty
- function url.extract(str, regex)
- local a, b, matched = str:find(regex)
- if not a then
- return nil, str
- end
- local head, mid, tail = str:sub(1, a - 1), str:sub(a, b), str:sub(b + 1)
- return matched, head .. tail
- end
- -- average string-split
- function url.split(str, delm)
- delm = delm or ' '
- local res = {}
- for line in (str..delm):gmatch('(.-)'..delm) do
- res[#res + 1] = line
- end
- return res
- end
- function url.parse_query(query)
- local res = {}
- for i, line in ipairs(url.split(query, '&')) do
- local k, v = line:match('(.-)=(.*)')
- if k then -- key-value
- res[k] = v
- else -- flag, like /page?flag1&flag2
- res[line] = true
- end
- end
- return res
- end
- function url.parse(u)
- local res = {}
- -- foo://username:password@www.example.com.net:8080/hello/world.html/?arg1=val1&arg2=val2#frag
- u = url.clear(u)
- -- (foo):// | username:password@www.example.com.net:8080/hello/world.html/?arg1=val1&arg2=val2#frag
- local scheme, u = url.extract(u, '^(%w+)://')
- res.scheme = scheme
- -- (username:password)@ | www.example.com.net:8080/hello/world.html/?arg1=val1&arg2=val2#frag
- local userdata, u = url.extract(u, '^([^/]+)@')
- if userdata then
- res.authority = {}
- -- (username):(password) -- first priority
- local user, pass = userdata:match('([^:]*):?(.*)')
- res.authority.username = user
- res.authority.password = pass
- end
- -- (www.example.com.net:8080) | /hello/world.html/?arg1=val1&arg2=val2#frag
- local hostname, u = url.extract(u, '^([^/]+)')
- if hostname then
- res.authority = res.authority or {}
- -- (www.example.com.net):(8080) -- first priority
- local host, port = hostname:match('([^:]*):?(.*)')
- res.authority.hostname = host
- res.authority.port = port
- end
- -- /(hello/world.html/) | ?arg1=val1&arg2=val2#frag
- local path, u = url.extract(u, '^(/[^?]+)')
- res.path = (path or '//'):gsub('/$', '') -- remove endslash, '/' - is default path
- res.ppath = url.split(res.path:sub(2), '/')
- -- ?(arg1=val1&arg2=val2) | #frag
- local query, u = url.extract(u, '^?([^#]+)')
- if query then
- res.query = query
- res.pquery = url.parse_query(query)
- end
- -- #(frag)
- local fragment, u = url.extract(u, '^#(.+)')
- res.fragment = fragment
- return res
- end
- if ... then return url end
- local URL = 'foo:::///////username:::password@@@www.example.com.net::8080///hello//world.html/???arg1====val1&&&arg2=val2#frag qwe'
- print('URL: ['..URL..']') -- bad, bad, dirty url!
- print('Parsed: '..require'inspect'(url.parse(URL))) -- mmm.. pretty clear parsed url!
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement