parser.mly

/* vim:sts=2:sw=2:et:et:cc=80
 * parser.mly: A parser definition for pli-beancompiler
 *
 * Shared on pastebin for the fine folks on freenode.#ocaml
 */

%token <int> INT_LIT
%token <string> STR_LIT
%token <string> IDENTIFIER
%token <string> FIELD
%token TRUE
%token FALSE
%token AND
%token BOOL_KW
%token DO
%token OD
%token ELSE
%token END
%token IF
%token FI
%token INT_KW
%token NOT
%token OR
%token PROC
%token READ
%token REF
%token THEN
%token TYPEDEF
%token VAL
%token WHILE
%token WRITE
%token BINARY_MINUS
%token BINARY_PLUS
%token BINARY_MULTIPLICATION
%token BINARY_DIVISION
%token INEQUALITY
%token LESSER_EQUALITY
%token GREATER_EQUALITY
%token LESSER_THAN
%token GREATER_THAN
%token EQUALITY
%token ASSIGN
%token COMMA
%token COLON
%token SEMICOLON
%token L_BRACE
%token R_BRACE
%token L_PAREN
%token R_PAREN
%token L_BRACK
%token R_BRACK
%token COMMENT
%token REFERENCE_END
%token EOF
%start prog
%type <Tuple> prog
%%
prog:
  | prog_typedefs prog_procs
    { ($1, $2) }
;

prog_typedefs:
  | type_def prog_typedefs
    { $1 :: $2 }
  | /* Nothing */
    { [] }
;

prog_procs:
  | proc_def prog_procs
    { $1 :: $2 }
  | /* Nothing */
    { [] }
;

type_def:
  | TYPEDEF type_spec id
    { ($3, $2) }
;

type_spec:
  /* TODO Make these rules yield consistent types? Will require a bit of
   *      design-work, or at least discussion to work out what the best
   *      approach here is. (Translation: MAK! HEEELLLLPPPP!)
   */
  | type_kw
    { $1 }
  | L_BRACE type_spec_field_defs R_BRACE
    /* Currently, type_(...) yields a non-empty list of 2-tuples,
     * (id, type_spec)
     */
    { $2 }
  | id
    { $1 }
;

type_kw:
  | BOOL_KW
    { `String "bool" }
  | INT_KW
    { `String "int" }
;

/* Type specification field definitions. */
type_spec_field_defs:
  /* This list must be non-empty. So, no epsilon rule. */
  | type_spec_field_def type_spec_field_defs_
    { $1 :: $2 }
;

type_spec_field_defs_:
  | COMMA type_spec_field_def type_spec_field_defs_
    { $2 :: $3 }
  | /* Nothing */
    { [] } /* Get that accumulator fired up. */
;

type_spec_field_def:
  | id COLON type_spec
    { ($1, $3) }
;

ref:
  | id ref_
    { $1 :: $2 } /* Add 'root' of the ref to the head of the list. */
;

ref_:
  | FIELD ref_
    { $1 :: $2 } /* Add fld to the head of the list. */
  | /* Nothing */
    { [] } /* Start an empty list. */
;

id:
  | IDENTIFIER
    { `String $1 }
;

proc_def:
  /* TODO Worth discussing if proc_vars should be separate from proc_body.
   *      I've gone with yes here because I think that's probably going to
   *      be handy later on, and the spec mandates that they exist before
   *      the procedure body.
   */
  | PROC proc_head proc_var_decs proc_body END
    { ($2, $3, $4) }
;

proc_head:
  | id L_PAREN proc_params R_PAREN
    { ($1, $3) }
;

proc_params:
  | formal_param formal_params_
    { $1 :: $2 }
  | /* Nothing */
    { [] }
;

formal_params_:
  /* Exists to neatly handle the case of a single parameter, which shouldn't
   * have a comma after it.
   */
  | COMMA formal_param formal_params_
    { $2 :: $3 }
  | /* Nothing */
    { [] }
;

formal_param:
  | passing_indicator type_spec ref
    /* NOTE I've chosen to put ref here instead of id, as I'm pretty sure that
     *      a _reference_ is intended, to allow people to write (for example)
     *      foo(z.f1) , which wouldn't be possible if args were only
     *      identifiers.
     */
    { ($1, $2, $3) }  /* Form a tuple of the subcomponents. */
;

passing_indicator:
  | VAL
    { `String "val" }
  | REF
    { `String "ref" }
;

proc_var_decs:
  | var_decl proc_var_decs
    { $1 :: $2 }
  | /* Nothing */
    { [] }
;

var_decl:
  | type_spec id SEMICOLON
    { ($1, $2) }
;

proc_body:
  |
    { [] }
;

stmt:
  | atomic_stmt
    { $1 }
  | comp_stmt
    { $1 }
;

stmt_list:
  | stmt stmt_list
    { $1 :: $2 }
  | /* Nothing */
    { [] }
;

atomic_stmt:
  | ref ASSIGN assign_rvalue SEMICOLON
    { (`String "assign", $1, $3) }
  | READ ref SEMICOLON
    { (`String "read", $2, Null) }
  | WRITE expr SEMICOLON
    { (`String "write", $2, Null) }
  | id L_PAREN expr_list R_PAREN SEMICOLON
    { (`String "proc", $1, $3) }
;

/* rvalue start */
assign_rvalue:
  | expr
    { (`String "expr", $1) }
  | L_BRACE a_rvalue_struct R_BRACE
    { (`String "struct", $2) }
;

a_rvalue_struct:
  | rvalue_field_init a_rvalue_struct_
    { $1 :: $2 }
  | /* Nothing. */
    { [] }
;

a_rvalue_struct_:
  | COMMA rvalue_field_init a_rvalue_struct_
    { $2 :: $3 }
  | /* Nothing. */
    { [] }
;

rvalue_field_init:
  | id EQUALITY assign_rvalue
    { ($1, $3) }
;
/* rvalue end */

comp_stmt:
  /* Seems the sanest way to do things is to handle
   * both the has-else and no-else cases in the same
   * thing.
   */
  | IF expr THEN stmt_list else_block FI
    { (`String "comp_if", $2, $4, $4) }
  | WHILE expr DO stmt_list OD
    { (`String "comp_while", $2, $4, Null) }
;

else_block:
  | ELSE stmt_list
    { $2 }
  | /* Nothing. */
    { [] }

expr_list:
  | expr expr_list_
    { $1 :: $2 }
  | /* Nothing */
    { [] }
;

expr_list_:
  | COMMA expr expr_list_
    { $2 :: $3 }
  | /* Nothing */
    { [] }
;

expr:
  | expr_ref
    { $1 }
  | const
    { $1 }
  | L_PAREN expr R_PAREN
    { $2 }
  | expr binop expr
    { ($2, $1, $3) }
  | unop expr
    { ($2, $1, Null) }
;

expr_ref:
  | ref
    { (`String "ref", $1) }
;

const:
  | INT_LIT
    { (`String "int_lit", $1) }
  | STR_LIT
    { (`String "str_lit", $1) }
  | TRUE
    { (`String "bool_lit", `String "true") }
  | FALSE
    { (`String "bool_lit", `String "false") }
;

binop:
  | BINARY_MINUS
    { `String "binop_minus" }
  | BINARY_PLUS
    { `String "binop_plus" }
  | BINARY_MULTIPLICATION
    { `String "binop_multi" }
  | BINARY_DIVISION
    { `String "binop_division" }
  | INEQUALITY
    { `String "binop_ineq" }
  | LESSER_EQUALITY
    { `String "binop_leq" }
  | GREATER_EQUALITY
    { `String "binop_geq" }
  | LESSER_THAN
    { `String "binop_lesser" }
  | GREATER_THAN
    { `String "binop_greater" }
  | EQUALITY
    { `String "binop_equality" }
  | AND
    { `String "binop_and" }
  | OR
    { `String "binop_or" }
;

unop:
  /* NOTE: Unary minus is handled (currently) in the int_lit lexing.
   */
  | NOT
    { `String "unop_not" }
;