ParserInitial.jj

/*******************************
***** SECTION 1 - OPTIONS *****
*******************************/
options { JAVA_UNICODE_ESCAPE = true; }
/*********************************
***** SECTION 2 - USER CODE *****
*********************************/
PARSER_BEGIN(SLPTokeniser)
public class SLPTokeniser {
    public static void main(String args[]) {
        SLPTokeniser tokeniser;
        if (args.length == 0) {
            System.out.println("Reading from standard input . . .");
            tokeniser = new SLPTokeniser(System.in);
        } else if (args.length == 1) {
            try {
                tokeniser = new SLPTokeniser(new java.io.FileInputStream(args[0]));
             } catch (java.io.FileNotFoundException e) {
        System.err.println("File " + args[0] + " not found.");
        return;
         }
        }
        else {
            System.out.println("SLP Tokeniser: Usage is one of:");
            System.out.println(" java SLPTokeniser < inputfile");
            System.out.println("OR");
            System.out.println(" java SLPTokeniser inputfile");
        return;
        }
        /*
        * We’ve now initialised the tokeniser to read from the appropriate place,
        * so just keep reading tokens and printing them until we hit EOF
        */
        for (Token t = getNextToken(); t.kind!=EOF; t = getNextToken()) {
            // Print out the actual text for the constants, identifiers etc.
            if (t.kind==NUM)
            {
                System.out.print("Number");
                System.out.print("("+t.image+") ");
            }
            else if (t.kind==ID)
            {
                System.out.print("Identifier");
                System.out.print("("+t.image+") ");
            }
            else
            System.out.print(t.image+" ");
        }
    }
}
PARSER_END(SLPTokeniser)

/*****************************************
***** SECTION 3 - TOKEN DEFINITIONS *****
*****************************************/
TOKEN_MGR_DECLS :
{
    static int commentNesting = 0;
}

SKIP : /*** Ignoring spaces/tabs/newlines ***/
{
    " "
    | "\t"
    | "\n"
    | "\r"
    | "\f"
}

SKIP : /* COMMENTS */
{
    "/*" { commentNesting++; } : IN_COMMENT
    |"--" { } : IN_BASIC_COMMENT
}

/* ENDS ON -STAR FORWARD SLASH-  ALSO ALLOWS FOR NESTING */
<IN_COMMENT> SKIP :
{
    "/*" { commentNesting++; }
    | "*/" { commentNesting--;
        if (commentNesting == 0)
            SwitchTo(DEFAULT);
        }
    | <~[]>
}

/* ENDS ON NEW LINE */
<IN_BASIC_COMMENT> SKIP :
{
    "\n" { SwitchTo(DEFAULT); }
}

TOKEN : /* keywords */
{
    < AND : "and" >
    |< BOOL : "bool" >
    |< CONST : "const" >
    |< DO : "do" >
    |< ELSE : "else">
    |< FALSE : "false">
    |< IF : "if" >
    |< INT : "int" >
    |< MAIN : "main" >
    |< NOT : "not" >
    |< OR : "or" >
    |< RETURN : "return" >
    |< THEN : "then" >
    |< TRUE : "true" >
    |< VAR : "var" >
    |< VOID : "void" >
    |< WHILE : "while" >
    |< BEGIN : "begin" >
    |< END : "end" >

}

TOKEN : /* operators and relations */
{
    < SEMIC : ";" >
    | < ASSIGN : ":=" >
    | < COLON : ":" >
    | < LBR : "(" >
    | < RBR : ")" >
    | < COMMA : "," >
    | < PLUS_SIGN : "+" >
    | < MINUS_SIGN : "-" >
    | < MULT_SIGN : "*" >
    | < DIV_SIGN : "/" >
    | < EQUAL_SIGN : "=" >
    | < NOT_EQUAL_SIGN : "!=" >
    | < LESS_THAN : "<" >
    | < GREATER_THAN : ">" >
    | < LESS_EQUALS : "<=" >
    | < GREATER_EQUALS : ">=" >
}

TOKEN : /* Numbers */
{
    < NUM : (<DIGIT>)+ >
    | < #DIGIT : ["0" - "9"] >
}

TOKEN : /* Identifiers */
{
    < ID : (<LETTER>)+ >
    | < #LETTER : ["a" - "z", "A" - "Z", "_", "0" - "9"] >
}

TOKEN : /* epsilon empty string */
{
    <EPSILON : (<EMPTY>)+ >
    | < #EMPTY: [] >
}

TOKEN : /* Anything not recognised so far */
{
    < OTHER : ~[] >
}

/*************************************************************************************
***** SECTION 4 - THE GRAMMAR & PRODUCTION RULES  *****
**************************************************************************************/

void program() : {}
{
    ( decl() )*
    ( function() )*
    main_prog()
}

void decl () : {}
{
    ( var_decl() | const_decl() )*
}

void var_decl() : {}
{
    <VAR> ident_list()<COMMA>type() ( <COMMA> ident_list()<COLON>type() )* <SEMIC>
}

void const_decl() : {}
{
    <CONST> <ID> <COLON>type = expression() ( <COMMA> <ID> <COLON> type = expression() )* <SEMIC>
}

void function() : {}
{
    type() <ID> (param_list())
    <BEGIN>
    ( decl() )*
    ( statement() )*
    <RETURN> ( expression() | <EPSILON>  ) <SEMIC>
    <END>
}

void param_list() : {}
{
    ( <ID><COLON>type() ( <COMMA> <ID> <COLON>type() )* | <EPSILON>  )
}

void type() : {}
{
    <INT> | <BOOL> | <VOID>
}

void main_prog() : {}
{
    <MAIN>
    <BEGIN>
    ( decl() )*
    ( statement() <SEMIC> )*
    <END>
}

void statement() : {}
{
    <ID> <ASSIGN> expression()
    | <ID> ( arg_list() )
    | <BEGIN> ( statement() <SEMIC> )* <END>
    | <IF> condition() <THEN> statement() <ELSE> statement()
    | <WHILE> condition() <DO> statement()
    | <EPSILON>
}

void expression() : {}
{
    fragment() ( ( <PLUS_SIGN> | <MINUS_SIGN> | <MULT_SIGN> | <DIV_SIGN> )  fragment() )*
    | <ID> ( arg_list() )
}

void fragment() : {}
{
    <ID> | <TRUE> | <FALSE> | <NUM> | ( <PLUS_SIGN> | <MINUS_SIGN> ) fragment() | expression()
}

void condition() : {}
{
    <NOT> expression()
    | expression() ( <EQUAL_SIGN> | <NOT_EQUAL_SIGN> | <LESS_THAN> | <GREATER_THAN> | <LESS_EQUALS> | <GREATER_EQUALS>  | <AND> | <OR>) expression()
    | <ID>
}

void ident_list() : {}
{
    <ID> ( <COMMA> <ID> )*
}

void arg_list() : {}
{
    ( <ID> ( <COMMA> <ID> )* | <EPSILON>  )
}