Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import re
- class Token:
- def __init__(self, type, value):
- self.type = type
- self.value = value
- def __repr__(self):
- return f"Token({self.type}, {self.value})"
- class Parser:
- def __init__(self, tokens):
- self.tokens = tokens
- self.current_token = None
- self.token_index = -1
- self.advance()
- def advance(self):
- self.token_index += 1
- if self.token_index < len(self.tokens):
- self.current_token = self.tokens[self.token_index]
- else:
- self.current_token = None
- def eat(self, token_type):
- if self.current_token and self.current_token.type == token_type:
- self.advance()
- else:
- raise Exception(f"Expected {token_type}, but got {self.current_token}")
- def parse_main(self):
- self.eat('PUBLIC')
- self.eat('CLASS')
- class_name = self.current_token.value
- self.eat('MAIN')
- self.eat('LPAREN')
- self.eat('AT')
- self.eat('SELF')
- self.eat('RPAREN')
- self.eat('LBRACE')
- main_body = self.parse_main_body()
- self.eat('RBRACE')
- return {"type": "main_definition", "name": class_name, "body": main_body}
- def parse_main_body(self):
- body = []
- while self.current_token and self.current_token.type != 'RBRACE':
- if self.current_token.type == 'PUBLIC':
- body.append(self.parse_class_definition())
- elif self.current_token.type == 'DEF':
- body.append(self.parse_method_definition())
- elif self.current_token.type == 'PRINT':
- body.append(self.parse_print_statement())
- elif self.current_token.type == 'LET':
- body.append(self.parse_const())
- else:
- body.append(self.parse_pass())
- return body
- def parse_pass(self):
- self.eat('PASS')
- self.eat('COLON')
- return {"type": "pass"}
- def parse_const(self):
- self.eat('LET')
- self.eat('CONST')
- identifier = self.current_token.value
- self.eat('IDENTIFIER')
- self.eat('EQUAL')
- value = None
- if self.current_token.type == 'STRING_LITERAL':
- value = self.current_token.value
- self.eat('STRING_LITERAL')
- elif self.current_token.type == 'NUMBER':
- value = self.current_token.value
- self.eat('NUMBER')
- self.eat('SEMICOLON')
- return {"type": "const", "identifier": identifier, "value": value}
- def parse_class_definition(self):
- self.eat('PUBLIC')
- self.eat('CLASS')
- class_name = self.current_token.value
- self.eat('IDENTIFIER')
- self.eat('LPAREN')
- self.eat('AT')
- self.eat('INNER_SELF')
- self.eat('RPAREN')
- self.eat('LBRACE')
- class_body = self.parse_class_body()
- self.eat('RBRACE')
- self.eat('SEMICOLON')
- return {"type": "class_definition", "name": class_name, "body": class_body}
- def parse_class_body(self):
- body = []
- while self.current_token and self.current_token.type != 'RBRACE':
- if self.current_token.type == 'DEF':
- body.append(self.parse_method_definition())
- else:
- body.append(self.parse_pass())
- return body
- def parse_method_definition(self):
- self.eat('DEF')
- method_name = self.current_token.value
- self.eat('IDENTIFIER')
- self.eat('LPAREN')
- self.eat('AT')
- self.eat('SELF')
- self.parse_method_head()
- self.eat('RPAREN')
- self.eat('LBRACE')
- method_body = self.parse_method_body()
- self.eat('RBRACE')
- self.eat('SEMICOLON')
- return {"type": "method_definition", "name": method_name, "body": method_body}
- def parse_extend_method(self):
- self.eat('COMMA')
- self.eat('IDENTIFIER')
- if self.current_token and self.current_token.type == 'COMMA':
- self.parse_extend_method()
- def parse_method_head(self):
- if self.current_token and self.current_token.type == 'COMMA':
- self.parse_extend_method()
- def parse_method_body(self):
- body = []
- while self.current_token and self.current_token.type != 'RBRACE':
- if self.current_token.type == 'PUBLIC':
- raise Exception("Cannot nest classes in methods")
- elif self.current_token.type == 'DEF':
- body.append(self.parse_method_definition())
- elif self.current_token.type == 'PRINT':
- body.append(self.parse_print_statement())
- elif self.current_token.type == 'LET':
- body.append(self.parse_const())
- else:
- body.append(self.parse_pass())
- return body
- def parse_print_statement(self):
- self.eat('PRINT')
- self.eat('LPAREN')
- string_literal =self.parse_printable()
- self.eat('RPAREN')
- self.eat('SEMICOLON')
- return {"type": "print_statement", "value": string_literal}
- def parse_printable(self):
- if self.current_token and self.current_token.type=='STRING_LITERAL':
- return self.current_token.value
- if self.current_token and self.current_token.type=='NUMBER':
- self.eat('NUMBER')
- while self.current_token and self.current_token.type=='NUMBER':
- self.eat('NUMBER')
- return self.current_token.value
- def parse(self):
- return self.parse_main()
- class Lexer:
- def __init__(self, text):
- self.text = self.remove_comments(text)
- self.pos = 0
- self.current_char = self.text[self.pos] if len(self.text) > 0 else None
- def remove_comments(self, text):
- text = re.sub(r'//.*', '', text)
- text = re.sub(r'/\*.*?\*/', '', text, flags=re.DOTALL)
- return text
- def advance(self):
- self.pos += 1
- if self.pos < len(self.text):
- self.current_char = self.text[self.pos]
- else:
- self.current_char = None
- def skip_whitespace(self):
- while self.current_char is not None and self.current_char.isspace():
- self.advance()
- def identifier(self):
- result = ''
- while self.current_char is not None and (self.current_char.isalnum() or self.current_char == '_'):
- result += self.current_char
- self.advance()
- return result
- def string_literal(self):
- result = '"'
- self.advance()
- while self.current_char is not None and self.current_char != '"':
- result += self.current_char
- self.advance()
- if self.current_char == '"':
- result += '"'
- self.advance()
- return result
- else:
- raise Exception("Unterminated string literal")
- def get_next_token(self):
- while self.current_char is not None:
- if self.current_char.isspace():
- self.skip_whitespace()
- continue
- if self.current_char.isalpha() or self.current_char == '_':
- identifier = self.identifier()
- if identifier == 'public':
- return Token('PUBLIC', 'public')
- elif identifier == 'class':
- return Token('CLASS', 'class')
- elif identifier == 'main':
- return Token('MAIN', 'main')
- elif identifier == 'self':
- return Token('SELF', 'self')
- elif identifier == 'inner_self':
- return Token('INNER_SELF', 'inner_self')
- elif identifier == 'def':
- return Token('DEF', 'def')
- elif identifier == 'print':
- return Token('PRINT', 'print')
- elif identifier == 'pass':
- return Token('PASS', 'pass')
- elif identifier == 'let':
- return Token('LET', 'let')
- elif identifier == 'const':
- return Token('CONST', 'const')
- else:
- return Token('IDENTIFIER', identifier)
- if self.current_char == '(':
- self.advance()
- return Token('LPAREN', '(')
- if self.current_char == ')':
- self.advance()
- return Token('RPAREN', ')')
- if self.current_char == '{':
- self.advance()
- return Token('LBRACE', '{')
- if self.current_char == '}':
- self.advance()
- return Token('RBRACE', '}')
- if self.current_char == ':':
- self.advance()
- return Token('COLON', ':')
- if self.current_char == ';':
- self.advance()
- return Token('SEMICOLON', ';')
- if self.current_char == '"':
- return Token('STRING_LITERAL', self.string_literal())
- if self.current_char == '=':
- self.advance()
- return Token('EQUAL', '=')
- if self.current_char=="@":
- self.advance()
- return Token('AT',"@")
- if self.current_char.isdigit():
- number = self.current_char
- self.advance()
- return Token('NUMBER', number)
- raise Exception(f"Invalid character: {self.current_char}")
- return None
- def tokenize(self):
- tokens = []
- token = self.get_next_token()
- while token:
- tokens.append(token)
- token = self.get_next_token()
- return tokens
- def run(code):
- lexer = Lexer(code)
- tokens = lexer.tokenize()
- parser = Parser(tokens)
- try:
- ast = parser.parse()
- for item in ast['body']:
- if item['type'] == 'print_statement':
- print(item['value'][1:-1])
- except Exception as e:
- print(e)
- run('public class main(@self){print(78);}')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement