Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- use regex;
- struct Token {
- type: str,
- val: str
- }
- fn tokenize(code: str, rules: dict) -> List[Token] {
- let idx = 1;
- let regex_parts = [];
- let group_type = dict({});
- for type in rules {
- let regex = rules[type];
- let groupname = "GROUP{}".format(idx);
- regex_parts.append("(?P<{}>{})".format(groupname, regex))
- group_type[groupname] = type;
- idx += 1;
- }
- let tokens: List[Token] = [];
- while 1 {
- let tok = None;
- let m = find("|".join(regex_parts), code);
- if m {
- let groupname = str(m.lastgroup);
- let tok_type = group_type[groupname];
- tok = Token();
- tok.type = tok_type;
- tok.val = m.group(groupname);
- code = code[m.end():];
- }
- else {
- break;
- }
- if tok == None {
- break;
- }
- tokens += [tok];
- }
- return tokens;
- }
- fn lex(code: str) -> List[Token] {
- let NUMBER = "((0x[0-9A-F]+)|([0-9]+))";
- let rules = dict({
- "ELSE-IF": "else( )+if",
- "INCLUDE": "#include\([a-zA-Z_][a-zA-Z_0-9\.]*:[a-zA-Z_][a-zA-Z_0-9]*\)",
- "IMPORT": "use( )*([a-zA-Z_][a-zA-Z_0-9]*(::|))*;",
- "DICT": "dict( )*\({|}\)",
- "STR": "\"(\\\"|\\\\|[^\"\n])*?\"i?",
- "ID": "[a-zA-Z_][a-zA-Z_0-9]*"
- });
- rules["::"] = "::";
- rules["->"] = "->";
- rules["=="] = "==";
- rules["RANGE"] = NUMBER + "( )*\.\.( )*" + NUMBER + "|(\(.*\.\..*\))";
- rules["NUMBER"] = NUMBER;
- for i in "@{<([])>}+-*/%;:.,=!" {
- rules[i] = "\\" + i;
- }
- rules["newline"] = "\n";
- rules["whitespace"] = " |\t";
- return tokenize(code, rules);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement