Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/usr/bin/env python
- import re
- import tatsu
- import binascii
- from tatsu.exceptions import TatSuException
- class Node:
- def __init__(self, *args, **kwargs):
- for index, key in enumerate(self.__slots__):
- if key in kwargs:
- setattr(self, key, kwargs[key])
- elif index < len(args):
- setattr(self, key, args[index])
- else:
- setattr(self, key, None)
- def __repr__(self):
- rv = []
- for item in dir(self):
- if item[0] != '_':
- rv.append('%s=%r' % (item, getattr(self, item)))
- return '%s(%s)' % (type(self).__name__, ', '.join(rv))
- class Variable(Node):
- __slots__ = ('name',)
- class Word(Node):
- __slots__ = ('value',)
- class Bareword(Word):
- pass
- class DoubleQuoted(Word):
- pass
- class SingleQuoted(Word):
- pass
- class Redirection(Node):
- __slots__ = ('fd', 'type', 'file')
- class CompoundCommand(Node):
- pass
- class ForClause(CompoundCommand):
- __slots__ = ('variable', 'sequence', 'command', 'redirections')
- class WhileClause(CompoundCommand):
- __slots__ = ('condition', 'invert', 'command', 'redirections')
- class CaseClause(CompoundCommand):
- __slots__ = ('word', 'conditions', 'redirections')
- class VariableDeclaration(Node):
- __slots__ = ('name', 'value')
- class CommandSubstitution(Node):
- __slots__ = ('command',)
- class SimpleCommand(Node):
- __slots__ = ('variables', 'arguments', 'redirections', 'background')
- special_words = ['elif', 'else', 'fi', 'esac']
- class ShellSemantics:
- def wspace(self, ast):
- pass
- def command_seperator(self, ast):
- pass
- def action_seperator(self, ast):
- pass
- def bareword(self, ast):
- rv = []
- # If we find a single character, it was extracted by bareword_nonspecial
- # so, attach it with the previous character (if any). Otherwise, if it
- # is a bareword_nonspecial with no previous character, a variable or a
- # command substitution, add it to the list as a seperate element.
- for i in ast:
- if rv and (isinstance(i, str) and isinstance(rv[-1], str)):
- rv[-1] += i
- else:
- rv.append(i)
- return Bareword(rv)
- def double_quoted(self, ast):
- rv = []
- ast = ast[1:-1]
- # Same reasoning as for bareword (see above) but with
- # doublequoted_nonspecial involved here.
- for i in ast:
- if rv and (isinstance(i, str) and isinstance(rv[-1], str)):
- rv[-1] += i
- else:
- rv.append(i)
- return DoubleQuoted(rv)
- def literal_quoted(self, ast):
- def replace(s):
- if s[1][0] == 'x':
- return binascii.unhexlify(s[1:]).decode()
- return {
- 'r': '\r', 'n': '\n', 'f': '\f', 't': '\t', 'v': '\v'
- }[s[1]]
- return SingleQuoted(re.sub(r'\\([nrftv]|x[0-9]{2})', replace, ast[2:-1]))
- def variable_declaration(self, ast):
- return VariableDeclaration(**ast)
- def while_clause(self, ast):
- return WhileClause(
- condition=ast[1]['command'],
- invert=bool(ast[1]['invert']),
- command=ast[3]
- )
- def until_clause(self, ast):
- return WhileClause(
- condition=ast[1]['command'],
- invert=(not ast[1]['invert']),
- command=ast[3]
- )
- def for_clause(self, ast):
- if not 'sequence' in ast:
- ast['sequence'] = []
- return ForClause(**ast)
- def case_clause(self, ast):
- return CaseClause(**ast)
- def compound_command(self, ast):
- ast[0].redirections = ast[1]
- return ast
- def simple_command(self, ast):
- arguments = []
- variables = []
- redirections = []
- background = 'background' in ast
- if 'variables' in ast:
- variables.extend(ast['variables'])
- if 'rest' in ast:
- for i in ast['rest']:
- if type(i) is Redirection:
- redirections.append(i)
- else:
- arguments.append(i)
- # This is not handled in the grammar itself.
- if isinstance(arguments[0], Word) and arguments[0].value[0] in special_words:
- raise TatSuException("syntax error near unexpected token `%s'" % arguments[0].value[0])
- return SimpleCommand(variables, arguments, redirections, background)
- def command_substitution(self, ast):
- return CommandSubstitution(ast[1])
- def variable(self, ast):
- return Variable(**ast)
- def redirection(self, ast):
- return Redirection(**ast)
- def single_quoted(self, ast):
- return SingleQuoted(ast[1:-1])
- parser = tatsu.compile(r'''
- @@nameguard :: False
- @@left_recursion :: False
- start = (command_seperator).{ command }* $
- ;
- command_seperator = /[ \t]*[;\n][ \t\n]*(?!;)/
- ;
- wspace = /[ \t]+/
- ;
- action_seperator = /[ \t\n]*/
- ;
- command = compound_command
- | simple_command
- ;
- simple_command = (
- variables:{ variable_declaration [ wspace ] }+
- rest:{ ( redirection | word ) [ wspace ] }*
- |
- rest:{ ( redirection | word ) [ wspace ] }+
- ) [ background:'&' ]
- ;
- compound_command = ( while_clause
- | until_clause
- | if_clause
- | for_clause
- | case_clause
- ) [ wspace ]
- { redirection [ wspace ] }*
- ;
- redirection = ( type:'<'
- | [ fd:/[0-9]+/ ] type:( '>' | '>>' | '|>' )
- ) [ wspace ] file:word
- ;
- while_clause = 'while' ~condition 'do'
- commands_until_done
- 'done'
- ;
- until_clause = 'until' ~condition 'do'
- commands_until_done
- 'done'
- ;
- commands_until_done = [ wspace ] { ( ! 'done' command ) command_seperator }+
- ;
- if_clause = 'if' ~condition 'then' action_seperator
- commands_until_if
- 'fi'
- ;
- elif_clause = 'elif' ~condition 'then' commands_until_if
- ;
- else_clause = 'else' ~commands_until_if
- ;
- commands_until_if = { ( ! ( 'elif' | 'else' | 'fi' )
- command [ command_seperator ] )
- }+
- ;
- condition = wspace [ invert:'!' wspace ]
- command:simple_command
- command_seperator
- ;
- for_clause = 'for' ~wspace variable:identifier wspace 'in'
- [ wspace sequence:{ word [ wspace ] }* ] command_seperator
- 'do' action_seperator
- command:commands_until_done
- 'done'
- ;
- case_clause = 'case' ~wspace word:word wspace 'in' wspace
- conditions:{ case_conditions }+
- 'esac'
- ;
- case_conditions = word:word [ wspace ] ')' [ wspace ]
- command:{ commands_until_esac }+ [ wspace ]
- ';;'
- [ wspace ]
- ;
- commands_until_esac = { ( ! 'esac' command [ command_seperator ] ) }+
- ;
- variable_declaration = name:identifier '=' [ value:word ]
- ;
- word = literal_quoted
- | single_quoted
- | double_quoted
- | bareword
- ;
- literal_quoted = /\$'[^']*'/
- ;
- single_quoted = /'[^']*'/
- ;
- double_quoted = '"' { ( command_substitution
- | variable
- | doublequoted_nonspecial
- ) }* '"'
- ;
- command_substitution = '$(' command ')' | '`' command '`'
- ;
- variable = '$' ( name:( identifier | '?')
- | '{' name:( identifier | '?' ) '}'
- )
- ;
- doublequoted_nonspecial = /(?:\\.|[^\\$`])/
- ;
- identifier = /[a-zA-Z_][a-zA-Z0-9_]*/
- ;
- bareword = { ( command_substitution
- | variable
- | bareword_nonspecial
- ) }+
- ;
- bareword_nonspecial = /(?:\\.|[^'"&\s$()<>`|;#])/
- ;
- ''')
- if __name__ == '__main__':
- from pprint import pprint
- semantics = ShellSemantics()
- while True:
- try:
- cmd = input('[root@localhost ~]# ')
- if cmd == 'exit':
- break
- pprint(parser.parse(cmd, whitespace='', semantics=semantics))
- except KeyboardInterrupt:
- print()
- except EOFError:
- print()
- break
- except TatSuException as e:
- if getattr(e, 'pos', None):
- print("-bash: syntax error near unexpected token: `%s'" % cmd[e.pos])
- else:
- print('-bash: %s' % e.args[0])
Add Comment
Please, Sign In to add comment