Advertisement
Snusmumriken

Lua URL parser/cleaner

Jun 25th, 2018
316
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Lua 3.27 KB | None | 0 0
  1. -- (C) Tommy aka 16Tomatons
  2. -- MIT license
  3.  
  4. local url = {}
  5. url.cleaning = {
  6.     {rx = '%s.*',   rp = ''},    -- clear all after space character
  7.     {rx = '&+',     rp = '&'},
  8.     {rx = '=+',     rp = '='},
  9.     {rx = '@+',     rp = '@'},
  10.     {rx = ':+',     rp = ':'},
  11.     {rx = '#+',     rp = '#'},
  12.     {rx = '/+',     rp = '/'},   -- remove all '/'-duplicates
  13.     {rx = '%?+',    rp = '?'},
  14.     {rx = '^%w+:/', rp = '%1/'}, -- restore '/', if scheme is here
  15. }
  16.  
  17. function url.clear(u)
  18.     local sub, subs = 0, 0
  19.     for i, v in pairs(url.cleaning) do
  20.         u, sub = u:gsub(v.rx, v.rp)
  21.         subs = subs + sub
  22.     end
  23.     return u, subs
  24. end
  25.  
  26. -- specific string function, like table.remove returning an erased item
  27. -- a, b = url.extract('qwerty', 'w(er)') --> er, qty
  28. function url.extract(str, regex)
  29.     local a, b, matched = str:find(regex)
  30.     if not a then
  31.         return nil, str
  32.     end
  33.     local head, mid, tail = str:sub(1, a - 1), str:sub(a, b), str:sub(b + 1)
  34.     return matched, head .. tail
  35. end
  36.  
  37. -- average string-split
  38. function url.split(str, delm)
  39.     delm = delm or ' '
  40.     local res = {}
  41.     for line in (str..delm):gmatch('(.-)'..delm) do
  42.         res[#res + 1] = line
  43.     end
  44.     return res
  45. end
  46.  
  47. function url.parse_query(query)
  48.     local res = {}
  49.     for i, line in ipairs(url.split(query, '&')) do
  50.         local k, v = line:match('(.-)=(.*)')
  51.         if k then -- key-value
  52.             res[k] = v
  53.         else      -- flag, like /page?flag1&flag2
  54.             res[line] = true
  55.         end
  56.     end
  57.     return res
  58. end
  59.  
  60. function url.parse(u)
  61.     local res = {}
  62.  
  63.     -- foo://username:password@www.example.com.net:8080/hello/world.html/?arg1=val1&arg2=val2#frag
  64.     u = url.clear(u)
  65.  
  66.     -- (foo):// | username:password@www.example.com.net:8080/hello/world.html/?arg1=val1&arg2=val2#frag
  67.     local scheme, u = url.extract(u, '^(%w+)://')
  68.     res.scheme = scheme
  69.  
  70.     -- (username:password)@ | www.example.com.net:8080/hello/world.html/?arg1=val1&arg2=val2#frag
  71.     local userdata, u = url.extract(u, '^([^/]+)@')
  72.  
  73.     if userdata then
  74.         res.authority = {}
  75.         -- (username):(password) -- first priority
  76.         local user, pass = userdata:match('([^:]*):?(.*)')
  77.         res.authority.username = user
  78.         res.authority.password = pass
  79.     end
  80.  
  81.     -- (www.example.com.net:8080) | /hello/world.html/?arg1=val1&arg2=val2#frag
  82.     local hostname, u = url.extract(u, '^([^/]+)')
  83.     if hostname then
  84.         res.authority = res.authority or {}
  85.         -- (www.example.com.net):(8080) -- first priority
  86.         local host, port = hostname:match('([^:]*):?(.*)')
  87.         res.authority.hostname = host
  88.         res.authority.port     = port
  89.     end
  90.  
  91.     -- /(hello/world.html/) | ?arg1=val1&arg2=val2#frag
  92.     local path, u = url.extract(u, '^(/[^?]+)')
  93.     res.path  = (path or '//'):gsub('/$', '') -- remove endslash, '/' - is default path
  94.     res.ppath = url.split(res.path:sub(2), '/')
  95.  
  96.     -- ?(arg1=val1&arg2=val2) | #frag
  97.     local query, u = url.extract(u, '^?([^#]+)')
  98.     if query then
  99.         res.query  = query
  100.         res.pquery = url.parse_query(query)
  101.     end
  102.  
  103.     -- #(frag)
  104.     local fragment, u = url.extract(u, '^#(.+)')
  105.  
  106.     res.fragment = fragment
  107.  
  108.     return res
  109. end
  110.  
  111. if ... then return url end
  112.  
  113. local URL = 'foo:::///////username:::password@@@www.example.com.net::8080///hello//world.html/???arg1====val1&&&arg2=val2#frag qwe'
  114.  
  115. print('URL: ['..URL..']')                           -- bad, bad, dirty url!
  116. print('Parsed: '..require'inspect'(url.parse(URL))) -- mmm.. pretty clear parsed url!
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement