Advertisement
Guest User

Untitled

a guest
Jul 26th, 2016
129
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.44 KB | None | 0 0
  1. # A lexical analyzer with recursive descent parser system for EBNF grammar.
  2. # It parses rules correctly if all tokens in the input are delimited by whitespace
  3. # e.g. A = B [ R , C] ;
  4.  
  5. #Determine if a token is an operator and what its type is
  6. proc lookup { ch } {
  7. if { $ch == "(" } {
  8. return "LEFT_PAREN"
  9. }
  10. if { $ch == ")" } {
  11. return "RIGHT_PAREN"
  12. }
  13. if { $ch == "\[" } {
  14. return "LEFT_BRACKET"
  15. }
  16. if { $ch == "\]" } {
  17. return "RIGHT_BRACKET"
  18. }
  19. if { $ch == "\{" } {
  20. return "LEFT_BRACE"
  21. }
  22. if { $ch == "\}" } {
  23. return "RIGHT_BRACE"
  24. }
  25. if { $ch == "|" } {
  26. return "ALTERNATE_OP"
  27. }
  28. if { $ch == "," } {
  29. return "CONCATENATE_OP"
  30. }
  31. if { $ch == "=" } {
  32. return "EQUALS_OP"
  33. }
  34. if { $ch == ";" } {
  35. return "END_OF_RULE"
  36. }
  37. return "ERROR"
  38. }
  39.  
  40.  
  41. proc lex {} {
  42. global nextToken
  43. global lexeme
  44. global tokens
  45. global count
  46.  
  47. set lexeme [lindex $tokens $count]
  48. incr count
  49. puts -nonewline "Next lexeme is $lexeme . "
  50.  
  51. if { [regexp -nocase {^[a-z]([a-z]|[0-9]|_)*} $lexeme] } {
  52. set nextToken "IDENTIFIER"
  53. } elseif { [regexp -nocase {^\"([a-z]|[0-9])+\"$} $lexeme] || [regexp -nocase {^\'([a-z]|[0-9])+\'$} $lexeme] } {
  54. set nextToken "TERMINAL"
  55. } else {
  56. set nextToken [lookup $lexeme]
  57. }
  58.  
  59. puts "Next token is $nextToken"
  60. }
  61.  
  62. # rule -> lhs = rhs ;
  63. proc rule {} {
  64. global nextToken
  65. puts "Enter rule"
  66. lhs
  67. lex
  68. if { $nextToken == "EQUALS_OP" } {
  69. rhs
  70. if { $nextToken == "END_OF_RULE" } {
  71. puts "Exit rule"
  72. } else {
  73. puts "Error: ; is expected in the end of rule. Terminating program."
  74. exit
  75. }
  76. } else {
  77. puts "Error: = is expected after lhs in rule. Terminating program."
  78. exit
  79. }
  80. }
  81.  
  82. # lhs -> identifier
  83. proc lhs {} {
  84. global nextToken
  85. puts "Enter lhs"
  86. lex
  87. if {$nextToken == "IDENTIFIER"} {
  88. puts "Exit lhs"
  89. return
  90. } else {
  91. puts "Error: identifier is expected in lhs. Terminating program."
  92. exit
  93. }
  94. }
  95.  
  96.  
  97. # rhs -> term | rhs
  98. # | term , rhs
  99. # | term
  100. proc rhs {} {
  101. global nextToken
  102. puts "Enter rhs"
  103. term
  104. lex
  105. if {$nextToken == "ALTERNATE_OP" || $nextToken == "CONCATENATE_OP"} {
  106. rhs
  107. puts "Exit rhs"
  108. return
  109. }
  110. puts "Exit rhs"
  111. }
  112.  
  113.  
  114. # term -> identifier
  115. # | terminal
  116. # | [ rhs ]
  117. # | ( rhs )
  118. # | { rhs }
  119. proc term {} {
  120. global nextToken
  121. global count
  122. puts "Enter term"
  123. lex
  124. if {$nextToken == "IDENTIFIER"} {
  125. puts "Exit term"
  126. return
  127. }
  128. if {$nextToken == "TERMINAL"} {
  129. puts "Exit term"
  130. return
  131. }
  132. if {$nextToken == "LEFT_BRACKET"} {
  133. rhs
  134. if {$nextToken == "RIGHT_BRACKET"} {
  135. puts "Exit term"
  136. return
  137. }
  138. }
  139. if {$nextToken == "LEFT_BRACE"} {
  140. rhs
  141. if {$nextToken == "RIGHT_BRACE"} {
  142. puts "Exit term"
  143. return
  144. }
  145. }
  146. if {$nextToken == "LEFT_PAREN"} {
  147. rhs
  148. if {$nextToken == "RIGHT_PAREN"} {
  149. puts "Exit term"
  150. return
  151. }
  152. }
  153. }
  154.  
  155.  
  156. # Main function
  157. proc runs3 { userinput } {
  158. #split input into tokens using whitespace as a delimeter
  159. set tokens [split $userinput " "]
  160. set count 0
  161. set nextToken ""
  162. set lexeme ""
  163.  
  164. while { $count < [llength $tokens] } {
  165. rule
  166. }
  167. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement