Z1maV1

UrlParsingTools

May 6th, 2024
191
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.64 KB | None | 0 0
  1. -- net/url.lua - a robust url parser and builder
  2. --
  3. -- Bertrand Mansion, 2011-2021; License MIT
  4. -- @module net.url
  5. -- @alias M
  6.  
  7. local M = {}
  8. M.version = "1.1.0"
  9.  
  10. --- url options
  11. -- - `separator` is set to `&` by default but could be anything like `&` or `;`
  12. -- - `cumulative_parameters` is false by default. If true, query parameters with the same name will be stored in a table.
  13. -- - `legal_in_path` is a table of characters that will not be url encoded in path components
  14. -- - `legal_in_query` is a table of characters that will not be url encoded in query values. Query parameters only support a small set of legal characters (-_.).
  15. -- - `query_plus_is_space` is true by default, so a plus sign in a query value will be converted to %20 (space), not %2B (plus)
  16. -- @todo Add option to limit the size of the argument table
  17. -- @todo Add option to limit the depth of the argument table
  18. -- @todo Add option to process dots in parameter names, ie. `param.filter=1`
  19. M.options = {
  20. separator = '&',
  21. cumulative_parameters = false,
  22. legal_in_path = {
  23. [":"] = true, ["-"] = true, ["_"] = true, ["."] = true,
  24. ["!"] = true, ["~"] = true, ["*"] = true, ["'"] = true,
  25. ["("] = true, [")"] = true, ["@"] = true, ["&"] = true,
  26. ["="] = true, ["$"] = true, [","] = true,
  27. [";"] = true
  28. },
  29. legal_in_query = {
  30. [":"] = true, ["-"] = true, ["_"] = true, ["."] = true,
  31. [","] = true, ["!"] = true, ["~"] = true, ["*"] = true,
  32. ["'"] = true, [";"] = true, ["("] = true, [")"] = true,
  33. ["@"] = true, ["$"] = true,
  34. },
  35. query_plus_is_space = true
  36. }
  37.  
  38. --- list of known and common scheme ports
  39. -- as documented in <a href="http://www.iana.org/assignments/uri-schemes.html">IANA URI scheme list</a>
  40. M.services = {
  41. acap = 674,
  42. cap = 1026,
  43. dict = 2628,
  44. ftp = 21,
  45. gopher = 70,
  46. http = 80,
  47. https = 443,
  48. iax = 4569,
  49. icap = 1344,
  50. imap = 143,
  51. ipp = 631,
  52. ldap = 389,
  53. mtqp = 1038,
  54. mupdate = 3905,
  55. news = 2009,
  56. nfs = 2049,
  57. nntp = 119,
  58. rtsp = 554,
  59. sip = 5060,
  60. snmp = 161,
  61. telnet = 23,
  62. tftp = 69,
  63. vemmi = 575,
  64. afs = 1483,
  65. jms = 5673,
  66. rsync = 873,
  67. prospero = 191,
  68. videotex = 516
  69. }
  70.  
  71. local function decode(str)
  72. return (str:gsub("%%(%x%x)", function(c)
  73. return string.char(tonumber(c, 16))
  74. end))
  75. end
  76.  
  77. local function encode(str, legal)
  78. return (str:gsub("([^%w])", function(v)
  79. if legal[v] then
  80. return v
  81. end
  82. return string.upper(string.format("%%%02x", string.byte(v)))
  83. end))
  84. end
  85.  
  86. -- for query values, + can mean space if configured as such
  87. local function decodeValue(str)
  88. if M.options.query_plus_is_space then
  89. str = str:gsub('+', ' ')
  90. end
  91. return decode(str)
  92. end
  93.  
  94. local function concat(a, b)
  95. if type(a) == 'table' then
  96. return a:build() .. b
  97. else
  98. return a .. b:build()
  99. end
  100. end
  101.  
  102. function M:addSegment(path)
  103. if type(path) == 'string' then
  104. self.path = self.path .. '/' .. encode(path:gsub("^/+", ""), M.options.legal_in_path)
  105. end
  106. return self
  107. end
  108.  
  109. --- builds the url
  110. -- @return a string representing the built url
  111. function M:build()
  112. local url = ''
  113. if self.path then
  114. local path = self.path
  115. url = url .. tostring(path)
  116. end
  117. if self.query then
  118. local qstring = tostring(self.query)
  119. if qstring ~= "" then
  120. url = url .. '?' .. qstring
  121. end
  122. end
  123. if self.host then
  124. local authority = self.host
  125. if self.port and self.scheme and M.services[self.scheme] ~= self.port then
  126. authority = authority .. ':' .. self.port
  127. end
  128. local userinfo
  129. if self.user and self.user ~= "" then
  130. userinfo = self.user
  131. if self.password then
  132. userinfo = userinfo .. ':' .. self.password
  133. end
  134. end
  135. if userinfo and userinfo ~= "" then
  136. authority = userinfo .. '@' .. authority
  137. end
  138. if authority then
  139. if url ~= "" then
  140. url = '//' .. authority .. '/' .. url:gsub('^/+', '')
  141. else
  142. url = '//' .. authority
  143. end
  144. end
  145. end
  146. if self.scheme then
  147. url = self.scheme .. ':' .. url
  148. end
  149. if self.fragment then
  150. url = url .. '#' .. self.fragment
  151. end
  152. return url
  153. end
  154.  
  155. --- builds the querystring
  156. -- @param tab The key/value parameters
  157. -- @param sep The separator to use (optional)
  158. -- @param key The parent key if the value is multi-dimensional (optional)
  159. -- @return a string representing the built querystring
  160. function M.buildQuery(tab, sep, key)
  161. local query = {}
  162. if not sep then
  163. sep = M.options.separator or '&'
  164. end
  165. local keys = {}
  166. for k in pairs(tab) do
  167. keys[#keys+1] = k
  168. end
  169. table.sort(keys, function (a, b)
  170. local function padnum(n, rest) return ("%03d"..rest):format(tonumber(n)) end
  171. return tostring(a):gsub("(%d+)(%.)",padnum) < tostring(b):gsub("(%d+)(%.)",padnum)
  172. end)
  173. for _,name in ipairs(keys) do
  174. local value = tab[name]
  175. name = encode(tostring(name), {["-"] = true, ["_"] = true, ["."] = true})
  176. if key then
  177. if M.options.cumulative_parameters and string.find(name, '^%d+$') then
  178. name = tostring(key)
  179. else
  180. name = string.format('%s[%s]', tostring(key), tostring(name))
  181. end
  182. end
  183. if type(value) == 'table' then
  184. query[#query+1] = M.buildQuery(value, sep, name)
  185. else
  186. local value = encode(tostring(value), M.options.legal_in_query)
  187. if value ~= "" then
  188. query[#query+1] = string.format('%s=%s', name, value)
  189. else
  190. query[#query+1] = name
  191. end
  192. end
  193. end
  194. return table.concat(query, sep)
  195. end
  196.  
  197. --- Parses the querystring to a table
  198. -- This function can parse multidimensional pairs and is mostly compatible
  199. -- with PHP usage of brackets in key names like ?param[key]=value
  200. -- @param str The querystring to parse
  201. -- @param sep The separator between key/value pairs, defaults to `&`
  202. -- @todo limit the max number of parameters with M.options.max_parameters
  203. -- @return a table representing the query key/value pairs
  204. function M.parseQuery(str, sep)
  205. if not sep then
  206. sep = M.options.separator or '&'
  207. end
  208.  
  209. local values = {}
  210. for key,val in str:gmatch(string.format('([^%q=]+)(=*[^%q=]*)', sep, sep)) do
  211. local key = decodeValue(key)
  212. local keys = {}
  213. key = key:gsub('%[([^%]]*)%]', function(v)
  214. -- extract keys between balanced brackets
  215. if string.find(v, "^-?%d+$") then
  216. v = tonumber(v)
  217. else
  218. v = decodeValue(v)
  219. end
  220. table.insert(keys, v)
  221. return "="
  222. end)
  223. key = key:gsub('=+.*$', "")
  224. key = key:gsub('%s', "_") -- remove spaces in parameter name
  225. val = val:gsub('^=+', "")
  226.  
  227. if not values[key] then
  228. values[key] = {}
  229. end
  230. if #keys > 0 and type(values[key]) ~= 'table' then
  231. values[key] = {}
  232. elseif #keys == 0 and type(values[key]) == 'table' then
  233. values[key] = decodeValue(val)
  234. elseif M.options.cumulative_parameters
  235. and type(values[key]) == 'string' then
  236. values[key] = { values[key] }
  237. table.insert(values[key], decodeValue(val))
  238. end
  239.  
  240. local t = values[key]
  241. for i,k in ipairs(keys) do
  242. if type(t) ~= 'table' then
  243. t = {}
  244. end
  245. if k == "" then
  246. k = #t+1
  247. end
  248. if not t[k] then
  249. t[k] = {}
  250. end
  251. if i == #keys then
  252. t[k] = val
  253. end
  254. t = t[k]
  255. end
  256.  
  257. end
  258. setmetatable(values, { __tostring = M.buildQuery })
  259. return values
  260. end
  261.  
  262. --- set the url query
  263. -- @param query Can be a string to parse or a table of key/value pairs
  264. -- @return a table representing the query key/value pairs
  265. function M:setQuery(query)
  266. local query = query
  267. if type(query) == 'table' then
  268. query = M.buildQuery(query)
  269. end
  270. self.query = M.parseQuery(query)
  271. return query
  272. end
  273.  
  274. --- set the authority part of the url
  275. -- The authority is parsed to find the user, password, port and host if available.
  276. -- @param authority The string representing the authority
  277. -- @return a string with what remains after the authority was parsed
  278. function M:setAuthority(authority)
  279. self.authority = authority
  280. self.port = nil
  281. self.host = nil
  282. self.userinfo = nil
  283. self.user = nil
  284. self.password = nil
  285.  
  286. authority = authority:gsub('^([^@]*)@', function(v)
  287. self.userinfo = v
  288. return ''
  289. end)
  290.  
  291. authority = authority:gsub(':(%d+)$', function(v)
  292. self.port = tonumber(v)
  293. return ''
  294. end)
  295.  
  296. local function getIP(str)
  297. -- ipv4
  298. local chunks = { str:match("^(%d+)%.(%d+)%.(%d+)%.(%d+)$") }
  299. if #chunks == 4 then
  300. for _, v in pairs(chunks) do
  301. if tonumber(v) > 255 then
  302. return false
  303. end
  304. end
  305. return str
  306. end
  307. -- ipv6
  308. local chunks = { str:match("^%["..(("([a-fA-F0-9]*):"):rep(8):gsub(":$","%%]$"))) }
  309. if #chunks == 8 or #chunks < 8 and
  310. str:match('::') and not str:gsub("::", "", 1):match('::') then
  311. for _,v in pairs(chunks) do
  312. if #v > 0 and tonumber(v, 16) > 65535 then
  313. return false
  314. end
  315. end
  316. return str
  317. end
  318. return nil
  319. end
  320.  
  321. local ip = getIP(authority)
  322. if ip then
  323. self.host = ip
  324. elseif type(ip) == 'nil' then
  325. -- domain
  326. if authority ~= '' and not self.host then
  327. local host = authority:lower()
  328. if string.match(host, '^[%d%a%-%.]+$') ~= nil and
  329. string.sub(host, 0, 1) ~= '.' and
  330. string.sub(host, -1) ~= '.' and
  331. string.find(host, '%.%.') == nil then
  332. self.host = host
  333. end
  334. end
  335. end
  336.  
  337. if self.userinfo then
  338. local userinfo = self.userinfo
  339. userinfo = userinfo:gsub(':([^:]*)$', function(v)
  340. self.password = v
  341. return ''
  342. end)
  343. if string.find(userinfo, "^[%w%+%.]+$") then
  344. self.user = userinfo
  345. else
  346. -- incorrect userinfo
  347. self.userinfo = nil
  348. self.user = nil
  349. self.password = nil
  350. end
  351. end
  352.  
  353. return authority
  354. end
  355.  
  356. --- Parse the url into the designated parts.
  357. -- Depending on the url, the following parts can be available:
  358. -- scheme, userinfo, user, password, authority, host, port, path,
  359. -- query, fragment
  360. -- @param url Url string
  361. -- @return a table with the different parts and a few other functions
  362. function M.parse(url)
  363. local comp = {}
  364. M.setAuthority(comp, "")
  365. M.setQuery(comp, "")
  366.  
  367. local url = tostring(url or '')
  368. url = url:gsub('#(.*)$', function(v)
  369. comp.fragment = v
  370. return ''
  371. end)
  372. url =url:gsub('^([%w][%w%+%-%.]*)%:', function(v)
  373. comp.scheme = v:lower()
  374. return ''
  375. end)
  376. url = url:gsub('%?(.*)', function(v)
  377. M.setQuery(comp, v)
  378. return ''
  379. end)
  380. url = url:gsub('^//([^/]*)', function(v)
  381. M.setAuthority(comp, v)
  382. return ''
  383. end)
  384.  
  385. comp.path = url:gsub("([^/]+)", function (s) return encode(decode(s), M.options.legal_in_path) end)
  386.  
  387. setmetatable(comp, {
  388. __index = M,
  389. __tostring = M.build,
  390. __concat = concat,
  391. __div = M.addSegment
  392. })
  393. return comp
  394. end
  395.  
  396. --- removes dots and slashes in urls when possible
  397. -- This function will also remove multiple slashes
  398. -- @param path The string representing the path to clean
  399. -- @return a string of the path without unnecessary dots and segments
  400. function M.removeDotSegments(path)
  401. local fields = {}
  402. if string.len(path) == 0 then
  403. return ""
  404. end
  405. local startslash = false
  406. local endslash = false
  407. if string.sub(path, 1, 1) == "/" then
  408. startslash = true
  409. end
  410. if (string.len(path) > 1 or startslash == false) and string.sub(path, -1) == "/" then
  411. endslash = true
  412. end
  413.  
  414. path:gsub('[^/]+', function(c) table.insert(fields, c) end)
  415.  
  416. local new = {}
  417. local j = 0
  418.  
  419. for i,c in ipairs(fields) do
  420. if c == '..' then
  421. if j > 0 then
  422. j = j - 1
  423. end
  424. elseif c ~= "." then
  425. j = j + 1
  426. new[j] = c
  427. end
  428. end
  429. local ret = ""
  430. if #new > 0 and j > 0 then
  431. ret = table.concat(new, '/', 1, j)
  432. else
  433. ret = ""
  434. end
  435. if startslash then
  436. ret = '/'..ret
  437. end
  438. if endslash then
  439. ret = ret..'/'
  440. end
  441. return ret
  442. end
  443.  
  444. local function reducePath(base_path, relative_path)
  445. if string.sub(relative_path, 1, 1) == "/" then
  446. return '/' .. string.gsub(relative_path, '^[%./]+', '')
  447. end
  448. local path = base_path
  449. local startslash = string.sub(path, 1, 1) ~= "/";
  450. if relative_path ~= "" then
  451. path = (startslash and '' or '/') .. path:gsub("[^/]*$", "")
  452. end
  453. path = path .. relative_path
  454. path = path:gsub("([^/]*%./)", function (s)
  455. if s ~= "./" then return s else return "" end
  456. end)
  457. path = string.gsub(path, "/%.$", "/")
  458. local reduced
  459. while reduced ~= path do
  460. reduced = path
  461. path = string.gsub(reduced, "([^/]*/%.%./)", function (s)
  462. if s ~= "../../" then return "" else return s end
  463. end)
  464. end
  465. path = string.gsub(path, "([^/]*/%.%.?)$", function (s)
  466. if s ~= "../.." then return "" else return s end
  467. end)
  468. local reduced
  469. while reduced ~= path do
  470. reduced = path
  471. path = string.gsub(reduced, '^/?%.%./', '')
  472. end
  473. return (startslash and '' or '/') .. path
  474. end
  475.  
  476. --- builds a new url by using the one given as parameter and resolving paths
  477. -- @param other A string or a table representing a url
  478. -- @return a new url table
  479. function M:resolve(other)
  480. if type(self) == "string" then
  481. self = M.parse(self)
  482. end
  483. if type(other) == "string" then
  484. other = M.parse(other)
  485. end
  486. if other.scheme then
  487. return other
  488. else
  489. other.scheme = self.scheme
  490. if not other.authority or other.authority == "" then
  491. other:setAuthority(self.authority)
  492. if not other.path or other.path == "" then
  493. other.path = self.path
  494. local query = other.query
  495. if not query or not next(query) then
  496. other.query = self.query
  497. end
  498. else
  499. other.path = reducePath(self.path, other.path)
  500. end
  501. end
  502. return other
  503. end
  504. end
  505.  
  506. --- normalize a url path following some common normalization rules
  507. -- described on <a href="http://en.wikipedia.org/wiki/URL_normalization">The URL normalization page of Wikipedia</a>
  508. -- @return the normalized path
  509. function M:normalize()
  510. if type(self) == 'string' then
  511. self = M.parse(self)
  512. end
  513. if self.path then
  514. local path = self.path
  515. path = reducePath(path, "")
  516. -- normalize multiple slashes
  517. path = string.gsub(path, "//+", "/")
  518. self.path = path
  519. end
  520. return self
  521. end
  522.  
  523. return M
Add Comment
Please, Sign In to add comment