Advertisement
iPlain

Untitled

May 21st, 2017
68
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 22.03 KB | None | 0 0
  1. import re
  2. import sys
  3.  
  4. # Restrictions:
  5. # Integer constants must be short.
  6. # Stack size must not exceed 1024.
  7. # Integer is the only type.
  8. # Logical operators cannot be nested.
  9.  
  10. class Scanner:
  11. '''The interface comprises the methods lookahead and consume.
  12. Other methods should not be called from outside of this class.'''
  13.  
  14. def __init__(self, input_file):
  15. '''Reads the whole input_file to input_string, which remains constant.
  16. current_char_index counts how many characters of input_string have
  17. been consumed.
  18. current_token holds the most recently found token and the
  19. corresponding part of input_string.'''
  20. # source code of the program to be compiled
  21. self.input_string = input_file.read()
  22. # index where the unprocessed part of input_string starts
  23. self.current_char_index = 0
  24. # a pair (most recently read token, matched substring of input_string)
  25. self.current_token = self.get_token()
  26.  
  27. def skip_white_space(self):
  28. '''Consumes all characters in input_string up to the next
  29. non-white-space character.'''
  30. white_space = True
  31. while white_space == True:
  32. # Check if already at EOF
  33. if self.current_char_index >= len(self.input_string):
  34. return
  35. if self.input_string[self.current_char_index] in ' \t\n\r\x0b\x0c':
  36. self.current_char_index += 1
  37. else:
  38. white_space = False
  39.  
  40.  
  41. def no_token(self):
  42. '''Stop execution if the input cannot be matched to a token.'''
  43. print('lexical error: no token found at the start of ' +
  44. self.input_string[self.current_char_index:])
  45. exit()
  46.  
  47. def get_token(self):
  48. '''Returns the next token and the part of input_string it matched.
  49. The returned token is None if there is no next token.
  50. The characters up to the end of the token are consumed.
  51. TODO:
  52. Raise an exception by calling no_token() if the input contains
  53. extra non-white-space characters that do not match any token.'''
  54. self.skip_white_space()
  55. # find the longest prefix of input_string that matches a token
  56. token, longest = None, ''
  57. for (t, r) in Token.token_regexp:
  58. match = re.match(r, self.input_string[self.current_char_index:])
  59. if match and match.end() > len(longest):
  60. token, longest = t, match.group()
  61. # consume the token by moving the index to the end of the matched part
  62. if token is None and self.current_char_index < len(self.input_string):
  63. self.no_token()
  64. self.current_char_index += len(longest)
  65. return (token, longest)
  66.  
  67. def lookahead(self):
  68. '''Returns the next token without consuming it.
  69. Returns None if there is no next token.'''
  70. return self.current_token[0]
  71.  
  72. def unexpected_token(self, found_token, expected_tokens):
  73. '''Stop execution because an unexpected token was found.
  74. found_token contains just the token, not its value.
  75. expected_tokens is a sequence of tokens.'''
  76. print('syntax error: token in ' + repr(sorted(expected_tokens)) +
  77. ' expected but ' + repr(found_token) + ' found')
  78. exit()
  79.  
  80. def consume(self, *expected_tokens):
  81. '''Returns the next token and consumes it, if it is in
  82. expected_tokens. Calls unexpected_token(...) otherwise.
  83. If the token is a number or an identifier, not just the
  84. token but a pair of the token and its value is returned.'''
  85. t = self.current_token
  86. self.current_token = self.get_token()
  87. if t[0] not in expected_tokens:
  88. self.unexpected_token(t[0], expected_tokens)
  89.  
  90. if t[0] in [Token.NUM, Token.ID]:
  91. return(t[0], t[1])
  92. else:
  93. return(t[0])
  94.  
  95.  
  96. #raise Exception('consume not implemented')
  97.  
  98. class Token:
  99. # The following enumerates all tokens.
  100. DO = 'DO'
  101. ELSE = 'ELSE'
  102. END = 'END'
  103. IF = 'IF'
  104. THEN = 'THEN'
  105. WHILE = 'WHILE'
  106. SEM = 'SEM'
  107. BEC = 'BEC'
  108. LESS = 'LESS'
  109. EQ = 'EQ'
  110. GRTR = 'GRTR'
  111. LEQ = 'LEQ'
  112. NEQ = 'NEQ'
  113. GEQ = 'GEQ'
  114. ADD = 'ADD'
  115. SUB = 'SUB'
  116. MUL = 'MUL'
  117. DIV = 'DIV'
  118. LPAR = 'LPAR'
  119. RPAR = 'RPAR'
  120. NUM = 'NUM'
  121. ID = 'ID'
  122. READ = 'READ'
  123. WRITE = 'WRITE'
  124. NOT = 'NOT'
  125. AND = 'AND'
  126. OR = 'OR'
  127.  
  128. # The following list gives the regular expression to match a token.
  129. # The order in the list matters for mimicking Flex behaviour.
  130. # Longer matches are preferred over shorter ones.
  131. # For same-length matches, the first in the list is preferred.
  132. token_regexp = [
  133. (READ, 'read'), # placed at top to be the preferred, added
  134. (WRITE, 'write'), # placed at top to be the preferred, added
  135. (NOT, 'not'), #added
  136. (AND, 'and'), #added
  137. (OR, 'or'), #added
  138. (DO, 'do'),
  139. (ELSE, 'else'),
  140. (END, 'end'),
  141. (IF, 'if'),
  142. (THEN, 'then'),
  143. (WHILE, 'while'),
  144. (SEM, ';'),
  145. (BEC, ':='),
  146. (LESS, '<'),
  147. (EQ, '='),
  148. (GRTR, '>'),
  149. (LEQ, '<='),
  150. (NEQ, '!='), # added
  151. (GEQ, '>='),
  152. (ADD, '\\+'), # + is special in regular expressions, added
  153. (SUB, '-'), # added
  154. (MUL, '\\*'), # * is special in regular expressions, added
  155. (DIV, '\\/'), # / is special in regular expressions, added
  156. (LPAR, '\\('), # ( is special in regular expressions
  157. (RPAR, '\\)'), # ) is special in regular expressions
  158. (NUM, '[0-9]+'), # added
  159. (ID, '[a-z]+'),
  160. ]
  161.  
  162. class Symbol_Table:
  163. '''A symbol table maps identifiers to locations.'''
  164. def __init__(self):
  165. self.symbol_table = {}
  166. def size(self):
  167. '''Returns the number of entries in the symbol table.'''
  168. return len(self.symbol_table)
  169. def location(self, identifier):
  170. '''Returns the location of an identifier. If the identifier is not in
  171. the symbol table, it is entered with a new location. Locations are
  172. numbered sequentially starting with 0.'''
  173. if identifier in self.symbol_table:
  174. return self.symbol_table[identifier]
  175. index = len(self.symbol_table)
  176. self.symbol_table[identifier] = index
  177. return index
  178.  
  179. class Label:
  180. def __init__(self):
  181. self.current_label = 0
  182. def next(self):
  183. '''Returns a new, unique label.'''
  184. self.current_label += 1
  185. return 'l' + str(self.current_label)
  186.  
  187. def indent(s, level):
  188. return ' '*level + s + '\n'
  189.  
  190. # Each of the following classes is a kind of node in the abstract syntax tree.
  191. # indented(level) returns a string that shows the tree levels by indentation.
  192. # code() returns a string with JVM bytecode implementing the tree fragment.
  193. # true_code/false_code(label) jumps to label if the condition is/is not true.
  194. # Execution of the generated code leaves the value of expressions on the stack.
  195.  
  196. class Program_AST:
  197. def __init__(self, program):
  198. self.program = program
  199. def __repr__(self):
  200. return repr(self.program)
  201. def indented(self, level):
  202. return self.program.indented(level)
  203. def code(self):
  204. program = self.program.code()
  205. local = symbol_table.size()
  206. java_scanner = symbol_table.location('Java Scanner')
  207. return '.class public Program\n' + \
  208. '.super java/lang/Object\n' + \
  209. '.method public <init>()V\n' + \
  210. 'aload_0\n' + \
  211. 'invokenonvirtual java/lang/Object/<init>()V\n' + \
  212. 'return\n' + \
  213. '.end method\n' + \
  214. '.method public static main([Ljava/lang/String;)V\n' + \
  215. '.limit locals ' + str(local) + '\n' + \
  216. '.limit stack 1024\n' + \
  217. 'new java/util/Scanner\n' + \
  218. 'dup\n' + \
  219. 'getstatic java/lang/System.in Ljava/io/InputStream;\n' + \
  220. 'invokespecial java/util/Scanner.<init>(Ljava/io/InputStream;)V\n' + \
  221. 'astore ' + str(java_scanner) + '\n' + \
  222. program + \
  223. 'return\n' + \
  224. '.end method\n'
  225.  
  226. class Statements_AST:
  227. def __init__(self, statements):
  228. self.statements = statements
  229. def __repr__(self):
  230. result = repr(self.statements[0])
  231. for st in self.statements[1:]:
  232. result += '; ' + repr(st)
  233. return result
  234. def indented(self, level):
  235. result = indent('Statements', level)
  236. for st in self.statements:
  237. result += st.indented(level+1)
  238. return result
  239. def code(self):
  240. result = ''
  241. for st in self.statements:
  242. result += st.code()
  243. return result
  244.  
  245. class If_AST:
  246. def __init__(self, condition, then):
  247. self.condition = condition
  248. self.then = then
  249. def __repr__(self):
  250. return 'if ' + repr(self.condition) + ' then ' + \
  251. repr(self.then) + ' end'
  252. def indented(self, level):
  253. return indent('If', level) + \
  254. self.condition.indented(level+1) + \
  255. self.then.indented(level+1)
  256. def code(self):
  257. l1 = label_generator.next()
  258. l2 = label_generator.next()
  259. return self.condition.code(l1) + \
  260. 'goto ' + l2 + '\n' + \
  261. l1 + ':\n' + \
  262. self.then.code() + \
  263. l2 + ':\n'
  264.  
  265. class If_Else_AST:
  266. def __init__(self, condition, then, elsethen):
  267. self.condition = condition
  268. self.then = then
  269. self.elsethen = elsethen
  270. def __repr__(self):
  271. return 'if ' + repr(self.condition) + ' then ' + \
  272. repr(self.then) + ' else ' + repr(self.elsethen) + ' end'
  273. def indented(self, level):
  274. return indent('If-Else', level) + \
  275. self.condition.indented(level+1) + \
  276. self.then.indented(level+1) + \
  277. self.elsethen.indented(level+1)
  278. def code(self):
  279. l1 = label_generator.next()
  280. l2 = label_generator.next()
  281. l3 = label_generator.next()
  282. return self.condition.code(l1) + \
  283. 'goto ' + l2 + '\n' + \
  284. l1 + ':\n' + \
  285. self.then.code() + \
  286. 'goto ' + l3 + '\n' + \
  287. l2 + ':\n' + \
  288. self.elsethen.code() + \
  289. l3 + ':\n'
  290.  
  291. class While_AST:
  292. def __init__(self, condition, body):
  293. self.condition = condition
  294. self.body = body
  295. def __repr__(self):
  296. return 'while ' + repr(self.condition) + ' do ' + \
  297. repr(self.body) + ' end'
  298. def indented(self, level):
  299. return indent('While', level) + \
  300. self.condition.indented(level+1) + \
  301. self.body.indented(level+1)
  302. def code(self):
  303. l1 = label_generator.next()
  304. l2 = label_generator.next()
  305. l3 = label_generator.next()
  306. return l1 + ':\n' + \
  307. self.condition.code(l3) + \
  308. 'goto ' + l2 + '\n' + \
  309. l3 + ':\n' + \
  310. self.body.code() + \
  311. 'goto ' + l1 + '\n' + \
  312. l2 + ':\n'
  313.  
  314. class Assign_AST:
  315. def __init__(self, identifier, expression):
  316. self.identifier = identifier
  317. self.expression = expression
  318. def __repr__(self):
  319. return repr(self.identifier) + ':=' + repr(self.expression)
  320. def indented(self, level):
  321. return indent('Assign', level) + \
  322. self.identifier.indented(level+1) + \
  323. self.expression.indented(level+1)
  324. def code(self):
  325. loc = symbol_table.location(self.identifier.identifier)
  326. return self.expression.code() + \
  327. 'istore ' + str(loc) + '\n'
  328.  
  329. class Write_AST:
  330. def __init__(self, expression):
  331. self.expression = expression
  332. def __repr__(self):
  333. return 'write ' + repr(self.expression)
  334. def indented(self, level):
  335. return indent('Write', level) + self.expression.indented(level+1)
  336. def code(self):
  337. return 'getstatic java/lang/System/out Ljava/io/PrintStream;\n' + \
  338. self.expression.code() + \
  339. 'invokestatic java/lang/String/valueOf(I)Ljava/lang/String;\n' + \
  340. 'invokevirtual java/io/PrintStream/println(Ljava/lang/String;)V\n'
  341.  
  342. class Read_AST:
  343. def __init__(self, identifier):
  344. self.identifier = identifier
  345. def __repr__(self):
  346. return 'read ' + repr(self.identifier)
  347. def indented(self, level):
  348. return indent('Read', level) + self.identifier.indented(level+1)
  349. def code(self):
  350. java_scanner = symbol_table.location('Java Scanner')
  351. loc = symbol_table.location(self.identifier.identifier)
  352. return 'aload ' + str(java_scanner) + '\n' + \
  353. 'invokevirtual java/util/Scanner.nextInt()I\n' + \
  354. 'istore ' + str(loc) + '\n'
  355.  
  356. class BooleanExpression_AST():
  357. def __init__(self, left, right):
  358. self.left = left
  359. self.right = right
  360. def __repr__(self):
  361. return '(' + repr(self.left) + ' or ' + repr(self.right) + ')'
  362. def indented(self, level):
  363. return self.left.indented(level+1) + \
  364. self.right.indented(level+1)
  365. def code(self, t):
  366. return self.left.code(t) + \
  367. self.right.code(t)
  368.  
  369. class BooleanTerm_AST():
  370. def __init__(self, left, right):
  371. self.left = left
  372. self.right = right
  373. def __repr__(self):
  374. return '(' + repr(self.left) + ' and ' + repr(self.right) + ')'
  375. def indented(self, level):
  376. return self.left.indented(level+1) + \
  377. self.right.indented(level+1)
  378. def code(self, t):
  379. l1 = label_generator.next()
  380. return self.left.false_code(l1) + \
  381. self.right.false_code(l1) + \
  382. 'goto ' + t + '\n' + \
  383. l1 + ':\n'
  384.  
  385. class Comparison_AST:
  386. def __init__(self, left, op, right):
  387. self.left = left
  388. self.op = op
  389. self.right = right
  390. def __repr__(self):
  391. op = { Token.LESS:'<', Token.EQ:'=', Token.GRTR:'>',
  392. Token.LEQ:'<=', Token.NEQ:'!=', Token.GEQ:'>=' }
  393. return repr(self.left) + op[self.op] + repr(self.right)
  394. def indented(self, level):
  395. return indent(self.op, level) + \
  396. self.left.indented(level+1) + \
  397. self.right.indented(level+1)
  398. def true_code(self, label):
  399. op = { Token.LESS:'if_icmplt', Token.EQ:'if_icmpeq',
  400. Token.GRTR:'if_icmpgt', Token.LEQ:'if_icmple',
  401. Token.NEQ:'if_icmpne', Token.GEQ:'if_icmpge' }
  402. return self.left.code() + \
  403. self.right.code() + \
  404. op[self.op] + ' ' + label + '\n'
  405. def false_code(self, label):
  406. # Negate each comparison because of jump to "false" label.
  407. op = { Token.LESS:'if_icmpge', Token.EQ:'if_icmpne',
  408. Token.GRTR:'if_icmple', Token.LEQ:'if_icmpgt',
  409. Token.NEQ:'if_icmpeq', Token.GEQ:'if_icmplt' }
  410. return self.left.code() + \
  411. self.right.code() + \
  412. op[self.op] + ' ' + label + '\n'
  413. def code(self, t):
  414. return self.true_code(t)
  415.  
  416. class Expression_AST:
  417. def __init__(self, left, op, right):
  418. self.left = left
  419. self.op = op
  420. self.right = right
  421. def __repr__(self):
  422. op = { Token.ADD:'+', Token.SUB:'-', Token.MUL:'*', Token.DIV:'/' }
  423. return '(' + repr(self.left) + op[self.op] + repr(self.right) + ')'
  424. def indented(self, level):
  425. return indent(self.op, level) + \
  426. self.left.indented(level+1) + \
  427. self.right.indented(level+1)
  428. def code(self):
  429. op = { Token.ADD:'iadd', Token.SUB:'isub',
  430. Token.MUL:'imul', Token.DIV:'idiv' }
  431. return self.left.code() + \
  432. self.right.code() + \
  433. op[self.op] + '\n'
  434.  
  435. class Number_AST:
  436. def __init__(self, number):
  437. self.number = number
  438. def __repr__(self):
  439. return self.number
  440. def indented(self, level):
  441. return indent(self.number, level)
  442. def code(self): # works only for short numbers
  443. return 'sipush ' + self.number + '\n'
  444.  
  445. class Identifier_AST:
  446. def __init__(self, identifier):
  447. self.identifier = identifier
  448. def __repr__(self):
  449. return self.identifier
  450. def indented(self, level):
  451. return indent(self.identifier, level)
  452. def code(self):
  453. loc = symbol_table.location(self.identifier)
  454. return 'iload ' + str(loc) + '\n'
  455.  
  456. # The following methods comprise the recursive-descent parser.
  457.  
  458. def program():
  459. sts = statements()
  460. return Program_AST(sts)
  461.  
  462. def statements():
  463. result = [statement()]
  464. while scanner.lookahead() == Token.SEM:
  465. scanner.consume(Token.SEM)
  466. st = statement()
  467. result.append(st)
  468. return Statements_AST(result)
  469.  
  470. def statement():
  471. if scanner.lookahead() == Token.IF:
  472. return if_statement()
  473. elif scanner.lookahead() == Token.WHILE:
  474. return while_statement()
  475. elif scanner.lookahead() == Token.ID:
  476. return assignment()
  477. elif scanner.lookahead() == Token.WRITE:
  478. return write()
  479. elif scanner.lookahead() == Token.READ:
  480. return read()
  481. else: # error
  482. return scanner.consume(Token.IF, Token.WHILE, Token.ID, Token.WRITE, Token.READ)
  483.  
  484. def if_statement():
  485. scanner.consume(Token.IF)
  486. condition = booleanexpression()
  487. scanner.consume(Token.THEN)
  488. then = statements()
  489. if scanner.lookahead() == Token.ELSE:
  490. scanner.consume(Token.ELSE)
  491. elsethen = statements()
  492. scanner.consume(Token.END)
  493. return If_Else_AST(condition, then, elsethen)
  494. scanner.consume(Token.END)
  495. return If_AST(condition, then)
  496.  
  497. def while_statement():
  498. scanner.consume(Token.WHILE)
  499. condition = booleanexpression()
  500. scanner.consume(Token.DO)
  501. body = statements()
  502. scanner.consume(Token.END)
  503. return While_AST(condition, body)
  504.  
  505. def assignment():
  506. ident = identifier()
  507. scanner.consume(Token.BEC)
  508. expr = expression()
  509. return Assign_AST(ident, expr)
  510.  
  511. def write():
  512. scanner.consume(Token.WRITE)
  513. expr = expression()
  514. return Write_AST(expr)
  515.  
  516. def read():
  517. scanner.consume(Token.READ)
  518. ident = identifier()
  519. return Read_AST(ident)
  520.  
  521. def booleanexpression():
  522. result = booleanterm()
  523. while scanner.lookahead() is Token.OR:
  524. scanner.consume(Token.OR)
  525. tree = booleanterm()
  526. result = BooleanExpression_AST(result, tree)
  527. return result
  528.  
  529. def booleanterm():
  530. result = booleanfactor()
  531. while scanner.lookahead() is Token.AND:
  532. scanner.consume(Token.AND)
  533. tree = booleanfactor()
  534. result = BooleanTerm_AST(result, tree)
  535. return result
  536.  
  537. def booleanfactor():
  538. neg = False
  539. while scanner.lookahead() is Token.NOT:
  540. scanner.consume(Token.NOT)
  541. neg = not neg
  542. left = expression()
  543. op = scanner.consume(Token.LESS, Token.EQ, Token.GRTR,
  544. Token.LEQ, Token.NEQ, Token.GEQ)
  545. right = expression()
  546. if neg:
  547. negs = { Token.LESS:Token.GEQ, Token.EQ:Token.NEQ,
  548. Token.GRTR:Token.LEQ, Token.LEQ:Token.GRTR,
  549. Token.NEQ:Token.EQ, Token.GEQ:Token.LESS }
  550. op = negs[op]
  551. return Comparison_AST(left, op, right)
  552.  
  553.  
  554. def comparison():
  555. left = expression()
  556. op = scanner.consume(Token.LESS, Token.EQ, Token.GRTR,
  557. Token.LEQ, Token.NEQ, Token.GEQ)
  558. right = expression()
  559. return Comparison_AST(left, op, right)
  560.  
  561. def expression():
  562. result = term()
  563. while scanner.lookahead() in [Token.ADD, Token.SUB]:
  564. op = scanner.consume(Token.ADD, Token.SUB)
  565. tree = term()
  566. result = Expression_AST(result, op, tree)
  567. return result
  568.  
  569. def term():
  570. result = factor()
  571. while scanner.lookahead() in [Token.MUL, Token.DIV]:
  572. op = scanner.consume(Token.MUL, Token.DIV)
  573. tree = factor()
  574. result = Expression_AST(result, op, tree)
  575. return result
  576.  
  577. def factor():
  578. if scanner.lookahead() == Token.LPAR:
  579. scanner.consume(Token.LPAR)
  580. result = expression()
  581. scanner.consume(Token.RPAR)
  582. return result
  583. elif scanner.lookahead() == Token.NUM:
  584. value = scanner.consume(Token.NUM)[1]
  585. return Number_AST(value)
  586. elif scanner.lookahead() == Token.ID:
  587. return identifier()
  588. else: # error
  589. return scanner.consume(Token.LPAR, Token.NUM, Token.ID)
  590.  
  591. def identifier():
  592. value = scanner.consume(Token.ID)[1]
  593. return Identifier_AST(value)
  594.  
  595. # Initialise scanner, symbol table and label generator.
  596.  
  597. scanner = Scanner(sys.stdin)
  598. symbol_table = Symbol_Table()
  599. symbol_table.location('Java Scanner') # fix a location for the Java Scanner
  600. label_generator = Label()
  601.  
  602. # Uncomment the following to test the scanner without the parser.
  603. # Show all tokens in the input.
  604. #
  605. # token = scanner.lookahead()
  606. # while token != None:
  607. # if token in [Token.NUM, Token.ID]:
  608. # token, value = scanner.consume(token)
  609. # print(token, value)
  610. # else:
  611. # print(scanner.consume(token))
  612. # token = scanner.lookahead()
  613. # exit()
  614.  
  615. # Call the parser.
  616.  
  617. ast = program()
  618. if scanner.lookahead() != None:
  619. print('syntax error: end of input expected but token ' +
  620. repr(scanner.lookahead()) + ' found')
  621. exit()
  622.  
  623. # Uncomment the following to test the parser without the code generator.
  624. # Show the syntax tree with levels indicated by indentation.
  625. #
  626. # print(ast.indented(0), end='')
  627. # exit()
  628.  
  629. # Call the code generator.
  630.  
  631. # Translate the abstract syntax tree to JVM bytecode.
  632. # It can be assembled to a class file by Jasmin: http://jasmin.sourceforge.net/
  633.  
  634. print(ast.code(), end='')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement