Advertisement
Guest User

URI Grammar fragment with Arpeggio

a guest
Nov 14th, 2016
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 2.06 KB | None | 0 0
  1. # -*- coding: utf-8 -*-
  2. from __future__ import print_function
  3. from __future__ import unicode_literals, print_function
  4. try:
  5.    text=unicode
  6. except:
  7.    text=str
  8.  
  9. from arpeggio import Optional, ZeroOrMore, OneOrMore, EOF, \
  10.     ParserPython, PTNodeVisitor, visit_parse_tree
  11. from arpeggio import RegExMatch as _
  12.  
  13. def authority():
  14.     return Optional(userinfo, '@'), host, Optional(':', port)
  15.  
  16. def userinfo():
  17.     return ZeroOrMore([ unreserved, pct_encoded, sub_delims, ':'])
  18.  
  19. def host():
  20.     return [ ip_literal, ipv4address, reg_name ]
  21.  
  22. def port():
  23.     return _('\d*')
  24.  
  25. def ip_literal():
  26.     return '[', [ ipv6address, ipvfuture ], ']'
  27.  
  28. def ipvfuture():
  29.     return 'v', OneOrMore(hexdig), '.', OneOrMore((unreserved, sub_delims, ':'))
  30.  
  31. def ipv6address():
  32.     return [ (h16, ':', h16, ':', h16, ':', h16, ':', h16, ':', h16, ':', ls32),
  33.              ('::', h16, ':', h16, ':', h16, ':', h16, ':', h16, ':', ls32),
  34.              (Optional(h16), '::', h16, ':', h16, ':', h16, ':', h16, ':', ls32) ]
  35.  
  36. def ipv4address():
  37.     return dec_octect, '.', dec_octect, '.', dec_octect, '.', dec_octect
  38.  
  39. def ls32():
  40.     return [ (h16, ':', h16), ipv4address ]
  41.  
  42. def h16():
  43.     return [ (hexdig,) * i for i in range(1, 5) ]
  44.  
  45. def dec_octect():
  46.     return [ _('25[0-5]'),
  47.              _('2[0-4]\d'),
  48.              _('1\d\d'),
  49.              _('[1-9]\d'),
  50.              _('\d') ]
  51.  
  52. def reg_name():
  53.     return ZeroOrMore([ unreserved, pct_encoded, sub_delims])
  54.  
  55. def hexdig():
  56.     return _('[0-9a-fA-F]')
  57.  
  58. def pchar():
  59.     return [ unreserved, pct_encoded, sub_delims, ':', '@' ]
  60.  
  61. def unreserved():
  62.     # return [ alpha, digit, '-', '.', '_', '~' ]
  63.     return _('[A-Za-z0-9._~-]')
  64.  
  65. def pct_encoded():
  66.     # return '%', hexdig, hexdig
  67.     return _('%[0-9A-Fa-f][0-9A-Fa-f]')
  68.  
  69. def sub_delims():
  70.     # return [ '!', '$', '&', "'", '(', ')', '*', '+', ',', ';', '=' ]
  71.     return _('[!$&\'()*+,;=]')
  72.  
  73. def alpha():
  74.     return _('[A-Za-z]')
  75.  
  76. def digit():
  77.     return _('\d')
  78.  
  79. parser = ParserPython(authority)
  80. parser.parse('//') # it does not match authority rule
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement