Guest User

Untitled

a guest
May 23rd, 2018
78
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.89 KB | None | 0 0
  1. # ----------------------------------------------------------------------
  2. # phplex.py
  3. #
  4. # A lexer for PHP.
  5. # ----------------------------------------------------------------------
  6.  
  7. import ply.lex as lex
  8.  
  9. # todo: literal html
  10. # todo: double-quoted strings
  11. # todo: number literals (LNUMBER, DNUMBER)
  12. # todo: heredocs
  13. # todo: backticks
  14. # todo: namespaces
  15. # todo: casts
  16. # todo: "die" as alias for "exit"
  17. # todo: BAD_CHARACTER
  18. # todo: CURLY_OPEN, DOLLAR_OPEN_CURLY_BRACES, STRING_VARNAME
  19. # todo: <script> syntax (does anyone use this?)
  20. # todo: HALT_COMPILER (??)
  21.  
  22. # Reserved words
  23. reserved = (
  24. 'ARRAY', 'AS', 'BREAK', 'CASE', 'CLASS', 'CONST', 'CONTINUE', 'DECLARE',
  25. 'DEFAULT', 'DO', 'ECHO', 'ELSE', 'ELSEIF', 'EMPTY', 'ENDDECLARE',
  26. 'ENDFOR', 'ENDFOREACH', 'ENDIF', 'ENDSWITCH', 'ENDWHILE', 'EVAL', 'EXIT',
  27. 'EXTENDS', 'FOR', 'FOREACH', 'FUNCTION', 'GLOBAL', 'IF', 'INCLUDE',
  28. 'INCLUDE_ONCE', 'INSTANCEOF', 'ISSET', 'LIST', 'NEW', 'PRINT', 'REQUIRE',
  29. 'REQUIRE_ONCE', 'RETURN', 'STATIC', 'SWITCH', 'UNSET', 'USE', 'VAR',
  30. 'WHILE', 'FINAL', 'INTERFACE', 'IMPLEMENTS', 'PUBLIC', 'PRIVATE',
  31. 'PROTECTED', 'ABSTRACT', 'CLONE', 'TRY', 'CATCH', 'THROW', 'CFUNCTION',
  32. 'OLD_FUNCTION',
  33. )
  34.  
  35. tokens = reserved + (
  36. # Generic
  37. 'WHITESPACE', 'OP',
  38.  
  39. # Operators
  40. 'SL', 'SR', 'BOOLEAN_OR', 'BOOLEAN_AND', 'IS_SMALLER_OR_EQUAL',
  41. 'IS_GREATER_OR_EQUAL', 'IS_EQUAL', 'IS_NOT_EQUAL', 'IS_IDENTICAL',
  42. 'IS_NOT_IDENTICAL',
  43.  
  44. # Assignment operators
  45. 'MUL_EQUAL', 'DIV_EQUAL', 'MOD_EQUAL', 'PLUS_EQUAL', 'MINUS_EQUAL',
  46. 'SL_EQUAL', 'SR_EQUAL', 'AND_EQUAL', 'OR_EQUAL', 'XOR_EQUAL',
  47. 'CONCAT_EQUAL',
  48.  
  49. # Increment/decrement
  50. 'INC', 'DEC',
  51.  
  52. # Arrows
  53. 'OBJECT_OPERATOR', 'DOUBLE_ARROW', 'DOUBLE_COLON',
  54.  
  55. # Comments
  56. 'COMMENT', 'DOC_COMMENT',
  57.  
  58. # Escaping from HTML
  59. 'OPEN_TAG', 'OPEN_TAG_WITH_ECHO', 'CLOSE_TAG'
  60.  
  61. # Identifiers and reserved words
  62. 'DIR', 'FILE', 'LINE', 'FUNC_C', 'CLASS_C', 'METHOD_C', 'NS_C',
  63. 'LOGICAL_AND', 'LOGICAL_OR', 'LOGICAL_XOR',
  64. 'STRING', 'VARIABLE',
  65. 'LNUMBER', 'DNUMBER',
  66. 'CONSTANT_ENCAPSED_STRING',
  67. )
  68.  
  69. # Newlines
  70. def t_WHITESPACE(t):
  71. r'[ \t\r\n]+'
  72. t.lexer.lineno += t.value.count("\n")
  73. return t
  74.  
  75. # Assignment operators
  76. def t_SL_EQUAL(t): r'<<='; return t
  77. def t_SR_EQUAL(t): r'>>='; return t
  78. def t_AND_EQUAL(t): r'&='; return t
  79. def t_OR_EQUAL(t): r'\|='; return t
  80. def t_XOR_EQUAL(t): r'\^='; return t
  81. def t_MUL_EQUAL(t): r'\*='; return t
  82. def t_DIV_EQUAL(t): r'/='; return t
  83. def t_MOD_EQUAL(t): r'%='; return t
  84. def t_PLUS_EQUAL(t): r'\+='; return t
  85. def t_MINUS_EQUAL(t): r'-='; return t
  86. def t_CONCAT_EQUAL(t): r'\.='; return t
  87.  
  88. # Operators
  89. def t_SL(t): r'<<'; return t
  90. def t_SR(t): r'>>'; return t
  91. def t_BOOLEAN_AND(t): r'&&'; return t
  92. def t_BOOLEAN_OR(t): r'\|\|'; return t
  93. def t_IS_SMALLER_OR_EQUAL(t): r'<='; return t
  94. def t_IS_GREATER_OR_EQUAL(t): r'>='; return t
  95. def t_IS_IDENTICAL(t): r'==='; return t
  96. def t_IS_NOT_IDENTICAL(t): r'!=='; return t
  97. def t_IS_EQUAL(t): r'=='; return t
  98. def t_IS_NOT_EQUAL(t): r'(!=)|(<>)'; return t
  99.  
  100. # Increment/decrement
  101. def t_INC(t): r'\+\+'; return t
  102. def t_DEC(t): r'--'; return t
  103.  
  104. # Arrows
  105. def t_OBJECT_OPERATOR(t): r'->'; return t
  106. def t_DOUBLE_ARROW(t): r'=>'; return t
  107. def t_DOUBLE_COLON(t): r'::'; return t
  108.  
  109. # Comments
  110.  
  111. def t_DOC_COMMENT(t):
  112. r'/\*\*(.|\n)*?\*/'
  113. t.lexer.lineno += t.value.count("\n")
  114. return t
  115.  
  116. def t_COMMENT(t):
  117. r'(/\*(.|\n)*?\*/)|(//.*?\n)|(\#.*?\n)'
  118. t.lexer.lineno += t.value.count("\n")
  119. return t
  120.  
  121. # Escaping from HTML
  122.  
  123. def t_OPEN_TAG(t):
  124. r'<[?%]((php)|=)?\n?'
  125. if t.value.endswith('='): t.type = 'OPEN_TAG_WITH_ECHO'
  126. t.lexer.lineno += t.value.count("\n")
  127. return t
  128.  
  129. def t_CLOSE_TAG(t):
  130. r'[?%]>\n?'
  131. t.lexer.lineno += t.value.count("\n")
  132. return t
  133.  
  134. # Identifiers and reserved words
  135.  
  136. reserved_map = {
  137. '__DIR__': 'DIR',
  138. '__FILE__': 'FILE',
  139. '__LINE__': 'LINE',
  140. '__FUNCTION__': 'FUNC_C',
  141. '__CLASS__': 'CLASS_C',
  142. '__METHOD__': 'METHOD_C',
  143. '__NAMESPACE__': 'NS_C',
  144.  
  145. 'AND': 'LOGICAL_AND',
  146. 'OR': 'LOGICAL_OR',
  147. 'XOR': 'LOGICAL_XOR',
  148. }
  149.  
  150. for r in reserved:
  151. reserved_map[r] = r
  152.  
  153. # Identifier
  154. def t_STRING(t):
  155. r'[A-Za-z_][\w_]*'
  156. t.type = reserved_map.get(t.value.upper(), 'STRING')
  157. return t
  158.  
  159. # Variable
  160. def t_VARIABLE(t):
  161. r'\$[A-Za-z_][\w_]*'
  162. return t
  163.  
  164. # Integer literal (todo)
  165. def t_LNUMBER(t):
  166. r'\d+([uU]|[lL]|[uU][lL]|[lL][uU])?'
  167. return t
  168.  
  169. # Floating literal (todo)
  170. def t_DNUMBER(t):
  171. r'((\d+)(\.\d+)(e(\+|-)?(\d+))? | (\d+)e(\+|-)?(\d+))([lL]|[fF])?'
  172. return t
  173.  
  174. # String literal
  175. def t_CONSTANT_ENCAPSED_STRING(t):
  176. r'(\"([^\\\n]|(\\.))*?\")|(\'([^\\\n]|(\\.))*?\')'
  177. return t
  178.  
  179. # Simple operator
  180. def t_OP(t):
  181. r'[\(\)\{\}\[\]+-/*%^&|~=<>.!,?:;@]'
  182. t.type = 'OP'
  183. return t
  184.  
  185. def t_error(t):
  186. print("Illegal character %s" % repr(t.value[0]))
  187. t.lexer.skip(1)
  188.  
  189. lexer = lex.lex(optimize=1)
  190. if __name__ == "__main__":
  191. lex.runmain(lexer)
Add Comment
Please, Sign In to add comment