Advertisement
Guest User

Untitled

a guest
Aug 1st, 2015
229
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.73 KB | None | 0 0
  1. import sys
  2.  
  3.  
  4. #
  5. # ParserInput
  6. #
  7. # This class represents the input data and the current
  8. # position in the data.
  9. #
  10. # Brief note about 'max_position':
  11. #
  12. # When running a complex parser, the current position moves forward
  13. # when a particular sub-parser matches and moves backward when
  14. # it doesn't match. The 'max_position' field tracks the farthest position
  15. # over the entire execution of a complex parser.
  16. # When the complex parser fails, the farthest position is a good indicator
  17. # of the location of the error.
  18. #
  19. # The purpose of the 'max_position' field is to report
  20. # the error location when the main parser doesn't match.
  21. #
  22.  
  23. class ParserInput(object) :
  24. def __init__(self, data, position) :
  25. self._data = data
  26. self._position = position
  27. self._max_position = position
  28.  
  29. def data(self) :
  30. return self._data
  31.  
  32. def position(self) :
  33. return self._position
  34.  
  35. def remaining(self) :
  36. return len(self._data) - self._position
  37.  
  38. def read(self) :
  39. i = self._position
  40. return self._data[i:i+1]
  41.  
  42. def inc_position(self) :
  43. self._position += 1
  44. self._max_position = max(self._max_position, self._position)
  45.  
  46. def set_position(self, position) :
  47. self._position = position
  48. self._max_position = max(self._max_position, self._position)
  49.  
  50. def max_position(self) :
  51. return self._max_position
  52.  
  53.  
  54. #
  55. # Match, NoMatch
  56. #
  57. # A parser object returns either a Match or NoMatch object.
  58. # The Match object contains a 'value' property that holds
  59. # the result of parsing.
  60. #
  61.  
  62. class Match(object) :
  63. def __init__(self, value) :
  64. self.value = value
  65.  
  66. def __repr__(self) :
  67. return "Match(%s)" % (self.value,)
  68.  
  69. class NoMatch(object) :
  70. def __repr__(self) :
  71. return "NoMatch()"
  72.  
  73. def is_match(result) :
  74. return isinstance(result, Match)
  75.  
  76.  
  77. #
  78. # Parser
  79. #
  80. # This is the base-class representing parser objects.
  81. # The 'parse' method is the method that does the parsing.
  82. # It accepts a ParserInput object and returns either a Match
  83. # or NoMatch object.
  84. #
  85. # In the case of a match, the parsed result is returned
  86. # in the 'value' property of the Match object and the
  87. # parser input position is advanced by the length of the input
  88. # that was parsed.
  89. #
  90. # In the case of no-match, the parser input position must
  91. # remain unchanged from what it was when the 'parse' method
  92. # was called. If the input position was advanced during
  93. # intermediate stages, then it should be restored back to
  94. # what it was when the 'parse' method was called.
  95. #
  96. # Note on the 'match' method and 'modifier' attribute:
  97. #
  98. # The 'modifier' attribute can be set to a function that
  99. # modifies the 'value' property of the Match object in the event
  100. # of a successful match. The 'match' method is a helper method
  101. # that applies the 'modifier' if it is present.
  102. #
  103.  
  104. class Parser(object) :
  105. def match(self, value) :
  106. modifier = getattr(self, "modifier", None)
  107. if modifier is not None :
  108. value = self.modifier(value)
  109. return Match(value)
  110.  
  111. def parse(self, parser_input) :
  112. raise NotImplementedError
  113.  
  114.  
  115. #
  116. # Char
  117. #
  118. # The Char parser parses a single character matching the character
  119. # provided in the constructor if it is found at the current position
  120. # and fails if it's not found or if there are no more characters left.
  121. #
  122.  
  123. class Char(Parser) :
  124. def __init__(self, char) :
  125. self._char = char
  126.  
  127. def parse(self, parser_input) :
  128. if parser_input.remaining() == 0 :
  129. return NoMatch()
  130. c = parser_input.read()
  131. if c != self._char :
  132. return NoMatch()
  133. parser_input.inc_position()
  134. return self.match(c)
  135.  
  136.  
  137. #
  138. # CharSet
  139. #
  140. # The CharSet parser parses a single character matching any one
  141. # of the characters passed in the constructor. The input to the
  142. # constructor can be a string, a list of characters, or a set of
  143. # characters.
  144. #
  145.  
  146. class CharSet(Parser) :
  147. def __init__(self, chars) :
  148. self._chars = chars
  149.  
  150. def parse(self, parser_input) :
  151. if parser_input.remaining() == 0 :
  152. return NoMatch()
  153. c = parser_input.read()
  154. if c not in self._chars :
  155. return NoMatch()
  156. parser_input.inc_position()
  157. return self.match(c)
  158.  
  159.  
  160. #
  161. # ZeroOrMore
  162. #
  163. # ZeroOrMore takes another parser as input and applies it repeatedly
  164. # until it doesn't match. All the results of the repeated application
  165. # are collected in a list and returned as the result of parsing.
  166. #
  167. # This parser never fails to match because it simply returns an
  168. # empty list as a match if the input parser doesn't match even once.
  169. #
  170.  
  171. class ZeroOrMore(Parser) :
  172. def __init__(self, parser) :
  173. self._parser = parser
  174.  
  175. def parse(self, parser_input) :
  176. values = []
  177. while True :
  178. result = self._parser.parse(parser_input)
  179. if not is_match(result) :
  180. break
  181. values.append(result.value)
  182. return self.match(values)
  183.  
  184.  
  185. #
  186. # OneOrMore
  187. #
  188. # OneOrMore takes another parser as input and applies it repeatedly
  189. # until it doesn't match. All the results of the repeated application
  190. # are collected in a list and returned as the result of parsing.
  191. #
  192. # This parser matches only if the input parser matches atleast once.
  193. #
  194.  
  195. class OneOrMore(Parser) :
  196. def __init__(self, parser) :
  197. self._parser = parser
  198.  
  199. def parse(self, parser_input) :
  200. values = []
  201. while True :
  202. result = self._parser.parse(parser_input)
  203. if not is_match(result) :
  204. break
  205. values.append(result.value)
  206. if len(values) == 0 :
  207. return NoMatch()
  208. return self.match(values)
  209.  
  210.  
  211. #
  212. # Sequence
  213. #
  214. # The Sequence parser takes N parsers as input and applies them
  215. # one after another. Sequence succeeds in matching if all the input
  216. # parsers succeed and doesn't match even if one of input parsers don't
  217. # match. In the event of a successful match, the results of the input
  218. # parsers are collected in a list and returned as the result of parsing.
  219. #
  220.  
  221. class Sequence(Parser) :
  222. def __init__(self, *parsers) :
  223. self._parsers = parsers
  224.  
  225. def parse(self, parser_input) :
  226. values = []
  227. saved = parser_input.position()
  228. for parser in self._parsers :
  229. result = parser.parse(parser_input)
  230. if not is_match(result) :
  231. parser_input.set_position(saved)
  232. return NoMatch()
  233. values.append(result.value)
  234. return self.match(values)
  235.  
  236.  
  237. #
  238. # Choice
  239. #
  240. # The Choice parser takes N parsers as input. It applies the first one
  241. # and if it succeeds then the result is returned as a successful match.
  242. # If the first parser doesn't match, then the second parser is attempted.
  243. # This continues until one of the parsers matches. The result of the
  244. # first matching parser is returned as the match result of
  245. # the Choice parser. The Choice parser doesn't match if all the
  246. # input parsers fail to match.
  247. #
  248.  
  249. class Choice(Parser) :
  250. def __init__(self, *parsers) :
  251. self._parsers = parsers
  252.  
  253. def parse(self, parser_input) :
  254. for parser in self._parsers :
  255. result = parser.parse(parser_input)
  256. if is_match(result) :
  257. return self.match(result.value)
  258. return NoMatch()
  259.  
  260.  
  261. #
  262. # Optional
  263. #
  264. # The Optional parser takes a parser as input and always succeeds
  265. # in matching. If the input parser succeeds in matching, then its
  266. # result is returned as the result of a successful match. If the
  267. # input parser doesn't match, then None is returned as a successful
  268. # match.
  269. #
  270.  
  271. class Optional(Parser) :
  272. def __init__(self, parser) :
  273. self._parser = parser
  274.  
  275. def parse(self, parser_input) :
  276. result = self._parser.parse(parser_input)
  277. if not is_match(result) :
  278. return self.match(None)
  279. return self.match(result.value)
  280.  
  281.  
  282. #
  283. # Wrapper
  284. #
  285. # Wrapper is a simple direct wrapper around another parser.
  286. # The input parser is available in the 'parser' property.
  287. # It can be leftout during construction and can be set directly
  288. # set later. This allows for forward declaring a parser that needs
  289. # to call itself recursively.
  290. #
  291.  
  292. class Wrapper(Parser) :
  293. def __init__(self, parser=None) :
  294. self.parser = parser
  295.  
  296. def parse(self, parser_input) :
  297. result = self.parser.parse(parser_input)
  298. if not is_match(result) :
  299. return result
  300. return self.match(result.value)
  301.  
  302.  
  303. #
  304. # EOF
  305. #
  306. # The EOF parser succeeds in matching if the input posiition
  307. # is at the end. It fails to match otherwise.
  308. #
  309.  
  310. class EOF(Parser) :
  311. def parse(self, parser_input) :
  312. if parser_input.remaining() == 0 :
  313. return self.match(None)
  314. else :
  315. return NoMatch()
  316.  
  317.  
  318. #
  319. # Example 1
  320. #
  321. # Parse one or more digits
  322. #
  323.  
  324. def example1() :
  325. digit = CharSet("0123456789")
  326. digits = OneOrMore(digit)
  327. return digits
  328.  
  329.  
  330. #
  331. # Example 2
  332. #
  333. # Parse one or more digits and convert to integer
  334. #
  335.  
  336. def example2() :
  337.  
  338. # convert single digit to integer
  339. def char_to_digit(c) :
  340. return ord(c) - ord("0")
  341.  
  342. # convert list of digit values to integer
  343. def digits_to_number(a) :
  344. result = 0
  345. for x in a :
  346. result = result*10 + x
  347. return result
  348.  
  349. digit = CharSet("0123456789")
  350. digit.modifier = char_to_digit
  351. digits = OneOrMore(digit)
  352. digits.modifier = digits_to_number
  353. return digits
  354.  
  355.  
  356. #
  357. # Example 3
  358. #
  359. # An expression parser.
  360. #
  361.  
  362. def example3() :
  363.  
  364. # Helpers
  365.  
  366. def char_to_digit(c) :
  367. return ord(c) - ord("0")
  368.  
  369. def digits_to_number(a) :
  370. result = 0
  371. for x in a :
  372. result = result*10 + x
  373. return result
  374.  
  375. def bracket_inner(a) :
  376. return a[1]
  377.  
  378. def apply_sign(a) :
  379. sign = a[0]
  380. if sign is not None :
  381. return [sign, a[1]]
  382. else :
  383. return a[1]
  384.  
  385. def make_operation_tree(a) :
  386. value = a[0]
  387. for operator, value2 in a[1] :
  388. value = [operator, value, value2]
  389. return value
  390.  
  391.  
  392. # parsers
  393.  
  394. whitespace = ZeroOrMore(Char(" "))
  395.  
  396. def eat_space(parser) :
  397. parser2 = Sequence(whitespace, parser)
  398. parser2.modifier = lambda a : a[1]
  399. return Wrapper(parser2)
  400.  
  401. digit = CharSet("0123456789")
  402. digit.modifier = char_to_digit
  403.  
  404. number = eat_space(OneOrMore(digit))
  405. number.modifier = digits_to_number
  406.  
  407. expr = Wrapper()
  408.  
  409. left_parenthesis = eat_space(Char("("))
  410. right_parenthesis = eat_space(Char(")"))
  411. bracketed = Sequence(left_parenthesis, expr, right_parenthesis)
  412. bracketed.modifier = bracket_inner
  413.  
  414. sign = eat_space(CharSet("+-"))
  415. atomic = Sequence(Optional(sign), Choice(number, bracketed))
  416. atomic.modifier = apply_sign
  417.  
  418. term_operator = eat_space(CharSet("*/"))
  419. term_operation = Sequence(term_operator, atomic)
  420. term = Sequence(atomic, ZeroOrMore(term_operation))
  421. term.modifier = make_operation_tree
  422.  
  423. expr_operator = eat_space(CharSet("+-"))
  424. expr_operation = Sequence(expr_operator, term)
  425.  
  426. expr.parser = Sequence(term, ZeroOrMore(expr_operation))
  427. expr.modifier = make_operation_tree
  428.  
  429. return expr
  430.  
  431.  
  432. #
  433. # main
  434. #
  435.  
  436. main_parser = None
  437.  
  438. #
  439. # Uncomment one of the following lines.
  440. #
  441. # main_parser = example1()
  442. # main_parser = example2()
  443. # main_parser = example3()
  444.  
  445. def main() :
  446. if main_parser is None :
  447. print "Please uncomment one of the example initializer lines"
  448. return
  449.  
  450. if len(sys.argv) != 2 :
  451. print "%s <input>" % (sys.argv[0],)
  452. return
  453. s = sys.argv[1]
  454. parser_input = ParserInput(s, 0)
  455. result = main_parser.parse(parser_input)
  456. if not is_match(result) :
  457. print "ERROR: %s" % (s,)
  458. i = len("ERROR: ") + parser_input.max_position()
  459. print "%s^" % ("-"*i,)
  460. else :
  461. print "RESULT:", result.value
  462. i = parser_input.position()
  463. data = parser_input.data()
  464. if i < len(data) :
  465. print "UNPARSED LEFTOVER:", data[i:]
  466.  
  467. if __name__ == "__main__" :
  468. main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement