Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- module ast_parser ;
- import std.stdio ;
- import core.stdc.ctype ; //isspace
- class TokenList
- {
- enum
- {
- ID_VOID = 0,
- ID_STRING = 1,
- ID_LIST = 2,
- } ;
- enum
- {
- LEFT_TO_RIGHT = 1,
- RIGHT_TO_LEFT = 2,
- }
- enum
- {
- INFIX = 0,
- PREFIX = 1,
- POSTFIX = 2,
- PAREN = 3,
- //SEPARATOR = 4 //not yet done
- }
- struct OperatorProperties
- {
- int type ; //prefix, postfix, infix, paren
- int priority ;
- int direction ; // left-to-right, right-to-left
- } ;
- static OperatorProperties [string] ms_operator ;
- static this ()
- {
- {
- OperatorProperties w = {INFIX, 0, LEFT_TO_RIGHT} ;
- ms_operator [";"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 1, LEFT_TO_RIGHT} ;
- ms_operator ["foreach"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 1, LEFT_TO_RIGHT} ;
- ms_operator ["for"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 1, LEFT_TO_RIGHT} ;
- ms_operator ["while"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 1, LEFT_TO_RIGHT} ;
- ms_operator ["do"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 2, LEFT_TO_RIGHT} ;
- ms_operator ["return"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 3, LEFT_TO_RIGHT} ;
- ms_operator [","] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator ["="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator ["+="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator ["-="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator ["*="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator ["/="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator ["%="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator ["&="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator ["|="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator ["^="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator ["<<="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator [">>="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 4, RIGHT_TO_LEFT} ;
- ms_operator [">>>="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 5, RIGHT_TO_LEFT} ;
- ms_operator ["?"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 6, LEFT_TO_RIGHT} ;
- ms_operator [":"] = w ;
- }
- { //TODO : check
- //OperatorProperties w = {INFIX, 7, RIGHT_TO_LEFT} ;
- //ms_operator ["?:"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 8, LEFT_TO_RIGHT} ;
- ms_operator ["||"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 9, LEFT_TO_RIGHT} ;
- ms_operator ["&&"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 10, LEFT_TO_RIGHT} ;
- ms_operator ["|"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 11, LEFT_TO_RIGHT} ;
- ms_operator ["^"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 12, LEFT_TO_RIGHT} ;
- ms_operator ["&"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 13, LEFT_TO_RIGHT} ;
- ms_operator ["=="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 13, LEFT_TO_RIGHT} ;
- ms_operator ["!="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 14, LEFT_TO_RIGHT} ;
- ms_operator [">"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 14, LEFT_TO_RIGHT} ;
- ms_operator [">="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 14, LEFT_TO_RIGHT} ;
- ms_operator ["<"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 14, LEFT_TO_RIGHT} ;
- ms_operator ["<="] = w ;
- }
- {
- OperatorProperties w = {INFIX, 15, LEFT_TO_RIGHT} ;
- ms_operator [">>"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 15, LEFT_TO_RIGHT} ;
- ms_operator [">>>"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 15, LEFT_TO_RIGHT} ;
- ms_operator ["<<"] = w ;
- }
- { //TODO : check priority
- OperatorProperties w = {INFIX, 16, LEFT_TO_RIGHT} ;
- ms_operator ["is"] = w ;
- }
- { //TODO : check priority
- OperatorProperties w = {INFIX, 16, LEFT_TO_RIGHT} ;
- ms_operator ["!is"] = w ;
- }
- { //TODO : check priority
- OperatorProperties w = {INFIX, 16, LEFT_TO_RIGHT} ;
- ms_operator ["in"] = w ;
- }
- { //TODO : check priority
- OperatorProperties w = {INFIX, 16, LEFT_TO_RIGHT} ;
- ms_operator ["!in"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 17, LEFT_TO_RIGHT} ;
- ms_operator ["+"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 17, LEFT_TO_RIGHT} ;
- ms_operator ["-"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 18, LEFT_TO_RIGHT} ;
- ms_operator ["*"] = w ;
- }
- { //TODO : check priority
- OperatorProperties w = {INFIX, 18, LEFT_TO_RIGHT} ;
- ms_operator ["~"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 18, LEFT_TO_RIGHT} ;
- ms_operator ["/"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 18, LEFT_TO_RIGHT} ;
- ms_operator ["%"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 19, LEFT_TO_RIGHT} ;
- ms_operator ["%list"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 20, RIGHT_TO_LEFT} ;
- ms_operator ["%o1+"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 20, RIGHT_TO_LEFT} ;
- ms_operator ["%o1-"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 20, RIGHT_TO_LEFT} ;
- ms_operator ["%o1*"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 20, RIGHT_TO_LEFT} ;
- ms_operator ["%o1&"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 20, RIGHT_TO_LEFT} ; // virtual, it convers to prefix or postfix form
- ms_operator ["++"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 20, RIGHT_TO_LEFT} ; // virtual
- ms_operator ["--"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 20, RIGHT_TO_LEFT} ;
- ms_operator ["%o1++"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 20, RIGHT_TO_LEFT} ;
- ms_operator ["%o1--"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 20, RIGHT_TO_LEFT} ;
- ms_operator ["%o1!"] = w ;
- }
- {
- OperatorProperties w = {PREFIX, 20, RIGHT_TO_LEFT} ;
- ms_operator ["%o1~"] = w ;
- }
- //postfix: hightest priority
- {
- OperatorProperties w = {POSTFIX, 21, LEFT_TO_RIGHT} ;
- ms_operator ["%o2--"] = w ;
- }
- {
- OperatorProperties w = {POSTFIX, 21, LEFT_TO_RIGHT} ;
- ms_operator ["%o2++"] = w ;
- }
- {
- OperatorProperties w = {INFIX, 21, LEFT_TO_RIGHT} ;
- ms_operator ["."] = w ;
- }
- {
- OperatorProperties w = {INFIX, 21, LEFT_TO_RIGHT} ;
- ms_operator ["!"] = w ;
- }
- {
- OperatorProperties w = {PAREN, 21, LEFT_TO_RIGHT} ;
- ms_operator ["%lparen"] = w ;
- }
- {
- OperatorProperties w = {PAREN, 21, RIGHT_TO_LEFT} ;
- ms_operator ["%rparen"] = w ;
- }
- {
- OperatorProperties w = {PAREN, 21, LEFT_TO_RIGHT} ;
- ms_operator ["%lbracket"] = w ;
- }
- {
- OperatorProperties w = {PAREN, 21, RIGHT_TO_LEFT} ;
- ms_operator ["%rbracket"] = w ;
- }
- {
- OperatorProperties w = {PAREN, 21, LEFT_TO_RIGHT} ;
- ms_operator ["%lbrace"] = w ;
- }
- {
- OperatorProperties w = {PAREN, 21, RIGHT_TO_LEFT} ;
- ms_operator ["%rbrace"] = w ;
- }
- }
- private :
- int m_typeId ;
- bool m_isSameOperator = false ;
- string m_str ;
- TokenList[] m_vals ;
- TokenList m_parent ;
- static int m_numParenths = 0 ;
- void setString (string s)
- {
- m_str = s ;
- m_typeId = ID_STRING ;
- m_vals = null ;
- }
- void add_node (TokenList node)
- {
- if (m_typeId == ID_VOID)
- {
- m_typeId = ID_LIST ;
- m_vals = new TokenList [1] ;
- m_vals [0] = node ;
- return ;
- }
- if (m_typeId == ID_STRING)
- {
- string data = m_str ;
- m_typeId = ID_LIST ;
- m_vals = new TokenList [2] ;
- m_vals [0] = new TokenList (this, data) ;
- m_vals [1] = node ;
- return ;
- }
- //type_id == ID_LIST
- m_vals ~= node ;
- }
- public :
- this ()
- {
- m_typeId = ID_VOID ;
- m_str = null ;
- m_vals = null ;
- m_parent = null ;
- return this ;
- }
- this (string s)
- {
- this () ;
- setString (s) ;
- return this ;
- }
- this (TokenList p)
- {
- this () ;
- m_parent = p ;
- return this ;
- }
- this (TokenList p, string s)
- {
- this () ;
- setString (s) ;
- m_parent = p ;
- return this ;
- }
- this (TokenList parent, TokenList leaf)
- {
- this () ;
- m_parent = parent ;
- m_typeId = ID_LIST ;
- m_vals = new TokenList [1] ;
- m_vals [0] = leaf ;
- return this ;
- }
- string toString ()
- {
- switch (m_typeId)
- {
- case ID_VOID :
- return "void" ;
- case ID_STRING :
- return m_str ;
- case ID_LIST :
- {
- string result = "(" ;
- foreach (i, val ; m_vals)
- {
- result ~= val.toString () ;
- if (i != m_vals.length - 1)
- result ~= " " ;
- }
- result ~= ")" ;
- return result ;
- }
- }
- }
- string toD_String ()
- {
- switch (m_typeId)
- {
- case ID_VOID :
- return "void" ;
- case ID_STRING :
- return m_str ;
- case ID_LIST :
- {
- string result = "" ;
- if (m_vals.length >= 1)
- {
- if (is_prefix_operator (m_vals [0]))
- {
- result ~= m_vals [0].m_str [3 .. $] ;
- if (m_vals.length != 2)
- assert (false, "Prefix operator should have only one argument") ;
- result ~= m_vals [1].toD_String () ;
- }
- else
- if (is_infix_operator (m_vals [0]))
- {
- string s = " " ~ m_vals [0].m_str ~ " ";
- if (s == " %list ")
- s = " " ;
- if (m_vals.length < 3)
- assert (false, "Infix operator should have at list 2 arguments") ;
- foreach (i, val ; m_vals [1..$-1])
- {
- result ~= val.toD_String () ~ s;
- }
- result ~= m_vals [$-1].toD_String () ;
- if (m_isSameOperator)
- result ~= s ;
- }
- else
- if (is_any_left_paren (m_vals [0]))
- {
- string s1, s2 ;
- switch (m_vals [0].m_str)
- {
- case "%lparen" : s1 = " (" ; s2 = " ) " ; break ;
- case "%lbracket" : s1 = " [" ; s2 = " ] " ; break ;
- case "%lbrace" : s1 = " {" ; s2 = " } " ; break ;
- }
- result ~= s1 ;
- foreach (val ; m_vals [1 .. $])
- result ~= " " ~ val.toD_String () ;
- result ~= s2 ;
- }
- else
- if (is_operator (m_vals [0])) // is postrix operator
- {
- if (m_vals.length != 2)
- assert (false, "Postfix operator should have one argument") ;
- result ~= m_vals [1].toD_String () ;
- result ~= " " ~ m_vals [0].m_str [3..$] ;
- }
- else // is data
- {
- result ~= " " ~ m_vals [0].toD_String () ;
- }
- }
- //result ~= ")" ;
- return result ;
- }
- }
- }
- unittest
- {
- TokenList list = new TokenList ;
- list.setString ("123");
- assert (list.toString () == "123") ;
- list.m_typeId = ID_VOID ;
- assert (list.toString () == "void") ;
- list.m_typeId = ID_LIST ;
- list.m_vals = new TokenList [4] ;
- list.m_vals [0] = new TokenList ("+") ;
- list.m_vals [1] = new TokenList ("a") ;
- list.m_vals [2] = new TokenList () ;
- list.m_vals [2].m_typeId = ID_LIST ;
- list.m_vals [2].m_vals = new TokenList [3] ;
- list.m_vals [2].m_vals [0] = new TokenList ("*") ;
- list.m_vals [2].m_vals [1] = new TokenList ("b") ;
- list.m_vals [2].m_vals [2] = new TokenList ("c") ;
- list.m_vals [3] = new TokenList ("d") ;
- assert (list.toString () == "(+ a (* b c) d)", list.toString ()) ;
- }
- static bool is_operator (string s)
- {
- return (s in ms_operator) != null;
- }
- static bool is_operator (TokenList list)
- {
- if (list !is null && list.m_typeId == ID_STRING)
- return (list.m_str in ms_operator) != null ;
- return false ;
- }
- static bool is_prefix_operator (string s)
- {
- if ((s in ms_operator) is null)
- return false ;
- return ms_operator [s].type == PREFIX ;
- }
- static bool is_prefix_operator (TokenList list)
- {
- if (list !is null && list.m_typeId == ID_STRING)
- return is_prefix_operator (list.m_str) ;
- return false ;
- }
- static bool is_infix_operator (string s)
- {
- if ((s in ms_operator) is null)
- return false ;
- return ms_operator [s].type == INFIX ;
- }
- static bool is_infix_operator (TokenList list)
- {
- if (list !is null && list.m_typeId == ID_STRING)
- return is_infix_operator (list.m_str) ;
- return false ;
- }
- static bool is_any_left_paren (string s)
- {
- if ((s in ms_operator) is null)
- return false ;
- return ms_operator [s].type == PAREN && s [1] == 'l' ;
- }
- static bool is_any_left_paren (TokenList list)
- {
- if (list !is null && list.m_typeId == ID_STRING && is_any_left_paren (list.m_str))
- return true ;
- return false ;
- }
- static private bool is_any_right_paren (string s)
- {
- if ((s in ms_operator) is null)
- return false ;
- return ms_operator [s].type == PAREN && s [1] == 'r' ;
- }
- static private bool is_any_right_paren (TokenList list)
- {
- if (list !is null && list.m_typeId == ID_STRING && is_any_right_paren (list.m_str))
- return true ;
- return false ;
- }
- //TODO : remove, as direction is used directly
- /+
- static private int operator_direction (string s)
- {
- if (s in ms_operator)
- {
- return ms_operator [s].direction ;
- }
- return LEFT_TO_RIGHT ;
- }
- +/
- private TokenList add_d (string s)
- {
- //rule 1.0
- if (!is_operator (s))
- {
- // rule 1.1
- if (m_typeId == ID_STRING && !is_operator (m_str))
- {
- TokenList old_parent = this.m_parent ;
- TokenList new_parent = new TokenList (old_parent) ;
- new_parent.m_typeId = ID_LIST ;
- new_parent.m_vals.length = 0 ;
- new_parent.m_vals ~= new TokenList ("%list") ;
- new_parent.m_vals ~= this ;
- new_parent.m_vals ~= new TokenList (s) ;
- this = new_parent ;
- return this;
- }
- else if (m_typeId == ID_LIST && m_vals.length == 1 && !is_operator (m_vals [0]))
- {
- m_vals ~= m_vals [0] ;
- m_vals [0] = new TokenList ("%list") ;
- m_vals ~= new TokenList (s) ;
- return this ;
- }
- else if (m_typeId == ID_LIST && (
- m_vals.length == 2 && (is_prefix_operator (m_vals [0]) || is_any_left_paren (m_vals [0]))
- || m_vals.length >= 3 && is_infix_operator (m_vals [0]) && !m_isSameOperator))
- {
- this = add_d ("%list") ;
- this = add_d (s) ;
- return this ;
- }
- //rule 1.2
- add_node (new TokenList (this, s)) ;
- m_isSameOperator = false ;
- return this ;
- }
- //is operator:
- //rule 2.0
- if (s == "++" || s == "--")
- {
- if (m_typeId == ID_STRING )
- {
- if (! is_operator (m_str))
- {
- TokenList old_parent = this.m_parent ;
- if (old_parent is null)
- {
- old_parent = new TokenList (null, cast(TokenList)null) ;
- }
- TokenList new_parent = new TokenList (old_parent, new TokenList ("%o2" ~ s)) ;
- new_parent.m_vals [0].m_parent = new_parent ;
- new_parent.m_vals ~= this ;
- this.m_parent = new_parent ;
- old_parent.m_vals [0] = new_parent ;
- this = old_parent ;
- return this ;
- }
- }
- if (m_typeId == ID_LIST)
- {
- if (m_vals.length == 1 && !is_operator (m_vals [0]))
- {
- auto cp_list = m_vals [0] ;
- m_vals [0] = new TokenList (this, "%o2" ~ s) ;
- m_vals ~= cp_list ;
- return this ;
- }
- if (m_vals.length >= 2 && (is_prefix_operator (m_vals [0]) || is_any_left_paren (m_vals [0]))
- || m_vals.length >= 3 && is_infix_operator (m_vals [0]) && !m_isSameOperator)
- {
- TokenList data = m_vals [$-1] ;
- m_vals [$-1] = new TokenList (this, new TokenList ("%o2" ~ s)) ;
- m_vals [$-1].m_vals [0].m_parent = m_vals [$-1] ;
- m_vals [$-1].add_node (data) ;
- data.m_parent = m_vals [$-1] ;
- return this ;
- }
- }
- }
- //rule 2.0.1
- if (is_any_left_paren (s))
- {
- if (m_typeId == ID_VOID || m_typeId == ID_LIST && m_vals.length == 0)
- {
- add_node (new TokenList (this, s)) ;
- ++ m_numParenths ;
- return this ;
- }
- if (m_typeId == ID_LIST && (
- is_infix_operator (m_vals [0]) &&
- (m_vals.length == 2 || m_vals.length >= 3 && m_isSameOperator)
- || m_vals.length == 1 && (is_prefix_operator (m_vals [0]) || is_any_left_paren (m_vals [0]))
- ) )
- {
- m_isSameOperator = false ;
- auto list = new TokenList (this) ;
- list.add_node (new TokenList (list, s)) ;
- add_node (list) ;
- ++ m_numParenths ;
- this = list ;
- return this ;
- }
- if (m_typeId == ID_LIST && (
- m_vals.length >= 3 && is_infix_operator (m_vals [0]) && !m_isSameOperator
- || m_vals.length == 2 && (is_prefix_operator (m_vals [0]) || is_any_left_paren (m_vals [0]))
- ) )
- {
- auto nl1 = new TokenList (this) ;
- nl1.add_node (new TokenList (nl1, "%list")) ;
- nl1.m_vals ~= m_vals [$-1] ;
- nl1.m_vals [1].m_parent = nl1 ;
- auto nl2 = new TokenList (nl1) ;
- nl2.add_node (new TokenList (nl1, s)) ;
- nl1.m_vals ~= nl2 ;
- m_vals [$-1] = nl1 ;
- ++ m_numParenths ;
- this = nl2 ;
- return this ;
- }
- if (m_typeId == ID_LIST && m_vals.length == 1 && !is_operator (m_vals [0]))
- {
- auto list = m_vals [0] ;
- m_vals [0] = new TokenList (this, "%list") ;
- m_vals ~= list ;
- auto c_list = new TokenList (this) ;
- c_list.add_node (new TokenList (c_list, s)) ;
- m_vals ~= c_list ;
- this = c_list ;
- ++ m_numParenths ;
- return this ;
- }
- assert (false, "AST error : this can't happen") ;
- }
- //todo: rule 2.0.2
- if (is_any_right_paren (s))
- {
- -- m_numParenths ;
- if (m_numParenths < 0)
- assert (false, "AST error : Too much rigth " ~ s) ;
- l_rule_2_0_2_local : ; //horrible, but, eh... can be for (;;)
- if (m_typeId != ID_LIST
- || m_typeId == ID_LIST && (m_vals.length == 0 ||
- m_vals.length >= 1 && !is_operator (m_vals [0]) ) )
- assert (0, "AST error : Too much rigth " ~ s) ;
- if (is_any_left_paren (m_vals [0]))
- {
- if (m_vals [0].m_str [2..$] != s [2..$])
- assert (false, "AST error : no closing to "~m_vals [0].m_str ) ;
- if (m_parent !is null)
- {
- this = m_parent ;
- return this ;
- }
- return this ;
- }
- if (m_parent is null)
- assert (0, "AST error : Too much rigth " ~ s) ;
- this = m_parent ;
- goto l_rule_2_0_2_local ;
- assert (0, "Can't be here") ;
- }
- //rule 2.1
- //todo:
- if (m_typeId == ID_LIST && m_vals.length >= 1 &&
- (is_infix_operator (m_vals [0]) &&
- ( m_vals.length <= 2 || m_vals.length >= 3 && m_isSameOperator)
- || is_prefix_operator (m_vals [0]) && m_vals.length == 1)
- //&& !is_any_left_paren (m_vals [0])
- || m_typeId == ID_VOID || m_typeId == ID_LIST && m_vals.length == 0
- || m_typeId == ID_LIST && m_vals.length == 1 && is_any_left_paren (m_vals [0]) )
- {
- m_isSameOperator = false ;
- string new_op = "%o1" ~ s ;
- if ((new_op in ms_operator) == null)
- assert (false, "AST error - unknown operator " ~ new_op) ;
- auto new_op_list = new TokenList (this, new_op) ;
- if (m_typeId == ID_VOID)
- {
- add_node (new_op_list) ;
- }
- else
- {
- TokenList pre_list = new TokenList (this, new_op_list) ;
- new_op_list.m_parent = pre_list ;
- add_node (pre_list) ;
- this = pre_list ;
- }
- return this ;
- }
- //rule 2.2
- l_rule_2_2 : ;
- if (m_typeId == ID_STRING && !is_operator (this)
- || m_typeId == ID_LIST && m_vals.length == 1 && !is_operator (m_vals [0]))
- {
- TokenList swap ;
- if (m_typeId == ID_STRING)
- {
- swap = new TokenList (this, m_str) ;
- }
- else if (m_typeId == ID_LIST && m_vals.length == 1)
- swap = m_vals [0] ;
- add_node (swap) ;
- m_vals [0] = new TokenList (this, s) ;
- return this ;
- }
- //rule 2.2.1
- if (m_typeId == ID_LIST && (
- m_vals.length >= 3 && is_infix_operator (m_vals [0]) && !m_isSameOperator
- /* && (s in ms_operator) != null -- already tested */
- && ms_operator [s].priority > ms_operator [this.m_vals [0].m_str].priority
- || m_vals.length >= 2 && is_prefix_operator (m_vals [0])
- && ms_operator [s].priority > ms_operator [this.m_vals [0].m_str].priority
- || m_vals.length >= 2 && is_any_left_paren (m_vals [0]) )
- )
- {
- auto new_op_list = new TokenList (this, new TokenList (null, s)) ;
- new_op_list.m_vals [0].m_parent = new_op_list ;
- new_op_list.add_node (this.m_vals [$-1]) ;
- m_vals [$-1] = new_op_list ;
- this = new_op_list ;
- return this ;
- }
- //rule 2.3
- if (m_typeId == ID_LIST && m_vals.length >= 1 && is_operator (m_vals [0]) &&
- ms_operator [s].priority < ms_operator [m_vals [0].m_str].priority )
- {
- if (! is_any_left_paren (m_vals [0]))
- {
- if (m_parent is null)
- m_parent = new TokenList (null, this) ;
- this = m_parent ;
- goto l_rule_2_2 ;
- }
- }
- //rule 2.4
- if (m_typeId == ID_LIST && m_vals.length >= 2)
- {
- auto val = m_vals [0] ;
- if (is_operator (val))
- {
- auto str = val.m_str ;
- auto this_op = ms_operator [s] ;
- auto curr_op = ms_operator [str] ;
- if (this_op.priority == curr_op.priority
- && !is_any_left_paren (str) && !is_prefix_operator (str))
- {
- if (this_op.direction != curr_op.direction)
- assert (false,
- "AST error : currently no support for operators with different directions to be equal in priority") ;
- if (this_op.direction == RIGHT_TO_LEFT)
- {
- auto op_list = new TokenList (this, new TokenList (null, s)) ;
- op_list.m_vals [0].m_parent = op_list ;
- op_list.add_node (m_vals [$-1]) ;
- op_list.m_vals [1].m_parent = op_list ;
- m_vals [$-1] = op_list ;
- this = op_list ;
- return this ;
- }
- //direction == LEFT_TO_RIGHT
- if (str != s)
- {
- if (m_parent is null)
- {
- m_parent = new TokenList (null, this) ;
- }
- this = m_parent ;
- goto l_rule_2_2 ;
- }
- //rule 2.4 г
- m_isSameOperator = true ;
- return this ;
- }
- }
- }
- assert (0, "AST no matching rule to \"" ~ s ~ "\"") ;
- //return this ;
- }
- static TokenList parseD (string s)
- {
- /+ PREUDOCODE
- int ws = -1 ;
- for (int i = 0 ; i < s.len ;)
- {
- skip_space () ;
- skip_comments () ;
- if (if_nameable (s [i]) )
- ws = i ;
- do_while_nameabe (s [i]) ;
- ++i;
- word = s [ws .. i] ;
- add_d (word)
- continue ;
- if (is_first_operator_symbol (s[i]))
- max_op = s [i]
- while max_op + s[i+1] in operators
- max_op ~= s [i+1]
- ++i ;
- add_d (max_op) ;
- }
- +/
- bool is_nameable (char c)
- {
- immutable byte [] symbs =
- [
- '_', '@', '$'
- ] ;
- bool result = false ;
- if ('a' <= c && c <= 'z')
- result = true ;
- if ('A' <= c && c <= 'Z')
- result = true ;
- if ('0' <= c && c <= '9')
- result = true ;
- foreach (i ; symbs)
- if (c == i)
- {
- result = true ;
- break ;
- }
- return result ;
- }
- bool is_first_operator_symbol (char c)
- {
- immutable ubyte [] symbs =
- [
- ';', ',', '=', '?', ':', '|', '&',
- '^', '<', '>', '!', '+', '-', '*', '/', '%', '~', '.',
- '[', ']', '(', ')', '{', '}',
- ] ;
- foreach (v ; symbs)
- if (c == v)
- return true ;
- return false ;
- }
- auto list = new TokenList () ;
- list.m_typeId = ID_VOID ;
- list.m_str = null ;
- list.m_vals = null ;
- int ws = -1 ;
- for (int i = 0 ; i < s.length ;)
- {
- while (i < s.length && isspace (s [i]))
- ++ i ;
- //TODO : skip_comments () ;
- if (i < s.length && is_nameable (s [i]) )
- {
- ws = i ;
- ++i ;
- while (i < s.length && is_nameable (s [i]) )
- ++i ;
- string word = s [ws .. i] ;
- list = list.add_d (word) ;
- continue ;
- }
- if (i < s.length && is_first_operator_symbol (s[i]))
- {
- string max_op = "" ~ s [i] ;
- ++ i ;
- while (i < s.length && (max_op ~ s[i]) in ms_operator)
- {
- max_op ~= s [i] ;
- ++i ;
- }
- switch (max_op)
- {
- case "(" : max_op = "%lparen" ; break ;
- case ")" : max_op = "%rparen" ; break ;
- case "[" : max_op = "%lbracket" ; break ;
- case "]" : max_op = "%rbracket" ; break ;
- case "{" : max_op = "%lbrace" ; break ;
- case "}" : max_op = "%rbrace" ; break ;
- default : break ;
- }
- list = list.add_d (max_op) ;
- continue ;
- }
- }
- while (list.m_parent !is null)
- list = list.m_parent ;
- return list ;
- }
- unittest
- {
- TokenList test ; //= new TokenList () ;
- test = TokenList.parseD ("++a . b * c") ;
- assert (test.toString () == "(* (%o1++ (. a b)) c)") ;
- test = TokenList.parseD ("a + b * c") ;
- assert (test.toString () == "(+ a (* b c))") ;
- test = TokenList.parseD ("a + b + * c") ;
- assert (test.toString () == "(+ a b (%o1* c))") ;
- test = TokenList.parseD ("a + b * c * d") ;
- assert (test.toString () == "(+ a (* b c d))") ;
- test = TokenList.parseD ("a + b + c") ;
- assert (test.toString () == "(+ a b c)") ;
- test = TokenList.parseD ("a * b + c") ;
- assert (test.toString () == "(+ (* a b) c)") ;
- test = TokenList.parseD ("a * (b + c) ") ;
- assert (test.toString () == "(* a (%lparen (+ b c)))") ;
- test = TokenList.parseD ("a + b c ") ;
- assert (test.toString () == "(+ a (%list b c))") ;
- test = TokenList.parseD ("a . b c ") ;
- assert (test.toString () == "(%list (. a b) c)") ;
- test = TokenList.parseD ("a * (b + c / d ++ ) ") ;
- assert (test.toString () == "(* a (%lparen (+ b (/ c (%o2++ d)))))") ;
- test = TokenList.parseD ("a ++") ;
- assert (test.toString () == "(%o2++ a)") ;
- test = TokenList.parseD ("(a ++)") ;
- assert (test.toString () == "(%lparen (%o2++ a))") ;
- test = TokenList.parseD (" + a") ;
- assert (test.toString () == "(%o1+ a)") ;
- test = TokenList.parseD ("{abb + (x+c); c, ++d.x ;d ; e+f ; if (x = 1) x + 1 ;}") ;
- assert (test.toString () == "(%lbrace (; (+ abb (%lparen (+ x c))) (, c (%o1++ (. d x))) " ~
- "d (+ e f) (+ (%list if (%lparen (= x 1)) x) 1)))") ;
- //writeln (test.toD_String ()) ;
- test = TokenList.parseD ("a [] + b") ;
- assert (test.toString () == "(+ (%list a (%lbracket)) b)") ;
- test = TokenList.parseD ("a + b []") ;
- assert (test.toString () == "(+ a (%list b (%lbracket)))") ;
- test = TokenList.parseD ("a . b []") ;
- assert (test.toString () == "(. a (%list b (%lbracket)))") ;
- test = TokenList.parseD ("(a b)") ;
- assert (test.toString () == "(%lparen (%list a b))") ;
- test = TokenList.parseD ("a++ . b") ;
- assert (test.toString () == "(. (%o2++ a) b)") ;
- test = TokenList.parseD ("++a . b") ;
- assert (test.toString () == "(%o1++ (. a b))") ;
- test = TokenList.parseD ("++a ; b ;") ;
- assert (test.toString () == "(; (%o1++ a) b)" && test.m_isSameOperator) ;
- test = TokenList.parseD ("+ +a ") ;
- assert (test.toString () == "(%o1+ (%o1+ a))") ;
- test = TokenList.parseD ("i = 1:1:10, if(i % 2 == 0); i*i +1") ;
- assert (test.toString () == "(; (, (= i (: 1 1 10)) (%list if (%lparen (== (% i 2) 0)))) " ~
- "(+ (* i i) 1))") ;
- //writeln (test.toD_String ()) ;
- }
- string getStringItem (int n)
- {
- assert (m_typeId == ID_LIST, "AST error : expected ID_LIST argument, but found "
- ~ (m_typeId == ID_VOID ? "ID_VOID" : "ID_STRING") ) ;
- assert (m_vals.length > n, "AST error : index out of bounds") ;
- assert (m_vals [n].m_typeId == ID_STRING, "AST error : List should be of type string") ;
- return m_vals [n].m_str ;
- }
- TokenList getItem (int n)
- {
- assert (m_typeId == ID_LIST, "AST error : expected ID_LIST argument, but found "
- ~ (m_typeId == ID_VOID ? "ID_VOID" : "ID_STRING") ) ;
- assert (m_vals.length > n, "AST error : index out of bounds") ;
- return m_vals [n] ;
- }
- @property int typeId ()
- {
- return m_typeId ;
- }
- @property bool isEndSeparator ()
- {
- return m_isSameOperator ;
- }
- @property string str ()
- {
- assert (m_typeId == ID_STRING, "AST error : expected list m_typeId to be ID_STRING") ;
- return m_str ;
- }
- @property int length ()
- {
- assert (m_typeId == ID_LIST, "AST error : expected list m_typeId to be ID_LIST") ;
- return m_vals.length ;
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement