Advertisement
Guest User

Untitled

a guest
Aug 2nd, 2015
286
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.86 KB | None | 0 0
  1. /**
  2. * = LL parser =
  3. *
  4. * by Dmitry Soshnikov <dmitry.soshnikov@gmail.com>
  5. * MIT Style license
  6. *
  7. * Often one can see manually written LL parsers implemented as
  8. * recursive descent. Approach in this diff is a classical parse table
  9. * state machine.
  10. *
  11. * LL parser consists of:
  12. *
  13. * 1. input buffer (source code)
  14. * 2. stack
  15. * 3. parsing table (state machine)
  16. *
  17. * Parsing table:
  18. *
  19. * Table is used to get the next production number to apply, based on current
  20. * symbol from the buffer, and the symbol (terminal or non-terminal)
  21. * on top of the stack.
  22. *
  23. * - Rows in the table are non-terminals
  24. * - Columns are terminals
  25. *
  26. * Parsing algorithm:
  27. *
  28. * - if the top of the stack is *terminal*, and matches current symbol in
  29. * buffer, then just discard it from the stack, and move cursor further.
  30. * (if doesn't match -- parse error).
  31. *
  32. * - Else (it must be a non-terminal), replace it with an
  33. * alternative production, corresponding to the production number.
  34. * (if no production -- parse error).
  35. *
  36. * $ - is a special symbol used to mark bottom of the stack
  37. * and end of the buffer.
  38. *
  39. * S - is a start symbol.
  40. *
  41. * At the beginning stack is:
  42. *
  43. * [S, $]
  44. *
  45. * Example:
  46. *
  47. * Grammar:
  48. *
  49. * 1. S -> F
  50. * 2. S -> (S + F)
  51. * 3. F -> a
  52. *
  53. * Input:
  54. *
  55. * (a + a)
  56. *
  57. * Parse table:
  58. *
  59. * +------------------+
  60. * | ( ) a + $ |
  61. * +------------------+
  62. * | S 2 - 1 - - |
  63. * | F - - 3 - - |
  64. * +------------------+
  65. *
  66. * The production rules which are applied to parse `(a + a)` are: 2, 1, 3, 3:
  67. *
  68. * S -> ( S + F ) -> ( F + F ) -> ( a + F ) -> ( a + a )
  69. *
  70. * We see that each time the *left most* non-terminal is replaced. Hence, the
  71. * name of the parser: LL - scan source from Left to right, and apply the
  72. * Left most derivation.
  73. */
  74.  
  75.  
  76. /**
  77. * Our grammar representation. Key is a production number from
  78. * the grammar, the value is: 0 - LHS, 1 - RHS of the production.
  79. */
  80. var grammar = {
  81. 1: ['S', 'F'], // 1. S -> F
  82. 2: ['S', '(S + F)'], // 2. S -> (S + F)
  83. 3: ['F', 'a'], // 3. F -> a
  84. };
  85.  
  86. /**
  87. * Initial stack: bottom is the "end of the stack" ($),
  88. * and the start symbol ('S' in our case) is there too.
  89. */
  90. var stack = ['S', '$'];
  91.  
  92. function parse(source) {
  93. return parseFromTable(source, buildTable(grammar, source));
  94. }
  95.  
  96. function printGrammar(grammar) {
  97. console.log('Grammar:\n');
  98. for (var k in grammar) {
  99. console.log(' ' + k + '.', grammar[k][0], '->', grammar[k][1]);
  100. }
  101. console.log('');
  102. }
  103.  
  104. /**
  105. * Builds a state-machine table where table[non-terminal][terminal]
  106. * coordinates determine which next production rule to apply.
  107. */
  108. function buildTable(grammar, source) {
  109. // For now we assume a correct table was already built from
  110. // the grammar and source for us. We'll cover how to build it
  111. // automatically in the next lessons (see "first" and "follow"
  112. // sets topic). We encode only valid rules here and skip all other
  113. // (they return `undefined` meaning a parse error).
  114. //
  115. // +------------------+
  116. // | ( ) a + $ |
  117. // +------------------+
  118. // | S 2 - 1 - - |
  119. // | F - - 3 - - |
  120. // +------------------+
  121. //
  122. return {
  123. 'S': {'(': 2, 'a': 1},
  124. 'F': {'a': 3}
  125. };
  126. }
  127.  
  128. var productionNumbers = [];
  129.  
  130. /**
  131. * Parses a source using parse table.
  132. * Doesn't build a parse tree yet, but just checks a source
  133. * string for acceptance (prints production rules appled in case
  134. * of successful parse, or throws on parse errors).
  135. */
  136. function parseFromTable(source, table) {
  137. printGrammar(grammar);
  138. console.log('Source:', source);
  139. source = source.replace(/\s+/g, '');
  140. for (var cursor = 0; cursor < source.length;) {
  141. var current = source[cursor];
  142. var top = stack.shift();
  143. // Terminal is on the stack, just advance.
  144. if (isTerminal(top, table) && top === current) {
  145. // We already shifted the symbol from the stack,
  146. // so just advance the cursor.
  147. cursor++;
  148. continue;
  149. }
  150. // Else, it's a non-terminal, do derivation (replace it
  151. // in the stack with corresponding production).
  152. stack.unshift.apply(stack, getProduction(table, top, current));
  153. }
  154. console.log('Accepted. Productions:', productionNumbers.join(', '), '\n');
  155. }
  156.  
  157. function isTerminal(symbol, table) {
  158. return !table.hasOwnProperty(symbol);
  159. }
  160.  
  161. function getProduction(table, top, current) {
  162. var nextProductionNumber = table[top][current];
  163.  
  164. if (!nextProductionNumber) {
  165. throw Error('Parse error, unexpected token: ' + current);
  166. }
  167.  
  168. var nextProduction = grammar[nextProductionNumber];
  169.  
  170. productionNumbers.push(nextProductionNumber);
  171.  
  172. // Return an array of symbols from a production, e.g.
  173. // '(', 'S', '+', 'F', ')' for '(S + F)', since
  174. // each symbol should be pushed onto the stack.
  175. return nextProduction[1].split(/\s*/);
  176. }
  177.  
  178. // Test:
  179. parse('(a + a)');
  180.  
  181. // Output:
  182.  
  183. // Grammar:
  184. //
  185. // 1. S -> F
  186. // 2. S -> (S + F)
  187. // 3. F -> a
  188. //
  189. // Source: (a + a)
  190. // Accepted. Productions: 2, 1, 3, 3
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement