430 lines
9.7 KiB
OCaml
430 lines
9.7 KiB
OCaml
open Ast
|
|
open Lexer
|
|
|
|
exception Parse_error of string
|
|
|
|
type parser_state = {
|
|
toks : token array;
|
|
mutable i : int;
|
|
}
|
|
|
|
let mk_state toks = { toks = Array.of_list toks; i = 0 }
|
|
let peek st = if st.i < Array.length st.toks then st.toks.(st.i) else TEOF
|
|
let peek_n st n =
|
|
let idx = st.i + n in
|
|
if idx < Array.length st.toks then st.toks.(idx) else TEOF
|
|
|
|
let consume st =
|
|
let t = peek st in
|
|
st.i <- st.i + 1;
|
|
t
|
|
|
|
let expect st tok =
|
|
match (tok, consume st) with
|
|
| TLParen, TLParen
|
|
| TRParen, TRParen
|
|
| TLBrace, TLBrace
|
|
| TRBrace, TRBrace
|
|
| TLBracket, TLBracket
|
|
| TRBracket, TRBracket
|
|
| TSemicolon, TSemicolon
|
|
| TComma, TComma
|
|
| TDot, TDot
|
|
| TAssign, TAssign
|
|
| TPlus, TPlus
|
|
| TMinus, TMinus
|
|
| TStar, TStar
|
|
| TSlash, TSlash
|
|
| TPercent, TPercent
|
|
| TAndAnd, TAndAnd
|
|
| TOrOr, TOrOr
|
|
| TBang, TBang
|
|
| TEqEq, TEqEq
|
|
| TNe, TNe
|
|
| TLt, TLt
|
|
| TLe, TLe
|
|
| TGt, TGt
|
|
| TGe, TGe
|
|
| TIf, TIf
|
|
| TElse, TElse
|
|
| TForEach, TForEach
|
|
| TFor, TFor
|
|
| TEach, TEach
|
|
| TIn, TIn
|
|
| TReturn, TReturn
|
|
| TIntKw, TIntKw
|
|
| TBoolKw, TBoolKw
|
|
| TVoidKw, TVoidKw
|
|
| TStructKw, TStructKw
|
|
| TFn, TFn
|
|
| TLet, TLet
|
|
| TMut, TMut
|
|
| TArrow, TArrow
|
|
| TColon, TColon
|
|
| THash, THash
|
|
| TEOF, TEOF -> ()
|
|
| _ -> raise (Parse_error "unexpected token")
|
|
|
|
let expect_ident st =
|
|
match consume st with
|
|
| TIdent s -> s
|
|
| _ -> raise (Parse_error "expected identifier")
|
|
|
|
(* Parse a base type (no # prefix or # suffix).
|
|
Handles: int, bool, void, struct Name, TypeName, and [] suffixes. *)
|
|
let rec parse_base_type st =
|
|
let base =
|
|
match consume st with
|
|
| TIntKw -> TInt
|
|
| TBoolKw -> TBool
|
|
| TVoidKw -> TVoid
|
|
| TStructKw -> TStruct (expect_ident st)
|
|
| TIdent s -> TStruct s
|
|
| _ -> raise (Parse_error "expected type")
|
|
in
|
|
let rec arrays t =
|
|
match peek st with
|
|
| TLBracket ->
|
|
expect st TLBracket;
|
|
expect st TRBracket;
|
|
arrays (TArray t)
|
|
| _ -> t
|
|
in
|
|
arrays base
|
|
|
|
(* Parse a full type, including comptime forms:
|
|
#type -> TComptime type
|
|
type -> type
|
|
type#type -> TCombo (runtime_type, comptime_type) *)
|
|
and parse_type st =
|
|
match peek st with
|
|
| THash ->
|
|
expect st THash;
|
|
TComptime (parse_base_type st)
|
|
| _ ->
|
|
let t = parse_base_type st in
|
|
(match peek st with
|
|
| THash ->
|
|
expect st THash;
|
|
TCombo (t, parse_base_type st)
|
|
| _ -> t)
|
|
|
|
let rec parse_program st =
|
|
let rec loop acc =
|
|
match peek st with
|
|
| TEOF -> List.rev acc
|
|
| _ -> loop (parse_top st :: acc)
|
|
in
|
|
loop []
|
|
|
|
and parse_top st =
|
|
match peek st with
|
|
| TStructKw ->
|
|
expect st TStructKw;
|
|
let sname = expect_ident st in
|
|
expect st TLBrace;
|
|
let rec fields acc =
|
|
match peek st with
|
|
| TRBrace -> List.rev acc
|
|
| _ ->
|
|
let t = parse_base_type st in
|
|
let n = expect_ident st in
|
|
expect st TSemicolon;
|
|
fields ((t, n) :: acc)
|
|
in
|
|
let fs = fields [] in
|
|
expect st TRBrace;
|
|
expect st TSemicolon;
|
|
TopStruct { sname; fields = fs }
|
|
| TFn ->
|
|
expect st TFn;
|
|
let name = expect_ident st in
|
|
expect st TLParen;
|
|
let params = parse_params st in
|
|
expect st TRParen;
|
|
expect st TArrow;
|
|
let ret = parse_type st in
|
|
let body = parse_stmt_as_block st in
|
|
TopFunc { name; params; ret; body }
|
|
| TLet ->
|
|
expect st TLet;
|
|
let is_mut =
|
|
match peek st with
|
|
| TMut ->
|
|
expect st TMut;
|
|
true
|
|
| _ -> false
|
|
in
|
|
let name = expect_ident st in
|
|
let t_annot =
|
|
match peek st with
|
|
| TColon ->
|
|
expect st TColon;
|
|
Some (parse_type st)
|
|
| _ -> None
|
|
in
|
|
expect st TAssign;
|
|
let e = parse_expr st in
|
|
expect st TSemicolon;
|
|
TopGlobalVar (is_mut, name, t_annot, Some e)
|
|
| _ -> raise (Parse_error "expected top-level declaration (struct, fn, or let)")
|
|
|
|
and parse_params st =
|
|
match peek st with
|
|
| TRParen -> []
|
|
| _ ->
|
|
let rec loop acc =
|
|
let n = expect_ident st in
|
|
expect st TColon;
|
|
let t = parse_type st in
|
|
match peek st with
|
|
| TComma ->
|
|
expect st TComma;
|
|
loop ((n, t) :: acc)
|
|
| _ -> List.rev ((n, t) :: acc)
|
|
in
|
|
loop []
|
|
|
|
and parse_stmt_as_block st =
|
|
match peek st with
|
|
| TLBrace ->
|
|
expect st TLBrace;
|
|
let rec loop acc =
|
|
match peek st with
|
|
| TRBrace ->
|
|
expect st TRBrace;
|
|
List.rev acc
|
|
| _ -> loop (parse_stmt st :: acc)
|
|
in
|
|
loop []
|
|
| _ -> [ parse_stmt st ]
|
|
|
|
and parse_stmt st =
|
|
match peek st with
|
|
| TLBrace -> Block (parse_stmt_as_block st)
|
|
| TIf ->
|
|
expect st TIf;
|
|
expect st TLParen;
|
|
let cond = parse_expr st in
|
|
expect st TRParen;
|
|
let then_body = parse_stmt_as_block st in
|
|
let else_body =
|
|
match peek st with
|
|
| TElse ->
|
|
expect st TElse;
|
|
parse_stmt_as_block st
|
|
| _ -> []
|
|
in
|
|
If (cond, then_body, else_body)
|
|
| TForEach | TFor ->
|
|
(match peek st with
|
|
| TForEach -> expect st TForEach
|
|
| TFor ->
|
|
expect st TFor;
|
|
expect st TEach
|
|
| _ -> ());
|
|
expect st TLParen;
|
|
let it_t = parse_type st in
|
|
let it_name = expect_ident st in
|
|
expect st TIn;
|
|
let iterable = parse_expr st in
|
|
expect st TRParen;
|
|
let body = parse_stmt_as_block st in
|
|
ForEach (it_t, it_name, iterable, body)
|
|
| TReturn ->
|
|
expect st TReturn;
|
|
let v =
|
|
match peek st with
|
|
| TSemicolon -> None
|
|
| _ -> Some (parse_expr st)
|
|
in
|
|
expect st TSemicolon;
|
|
Return v
|
|
| TLet ->
|
|
expect st TLet;
|
|
let is_mut =
|
|
match peek st with
|
|
| TMut ->
|
|
expect st TMut;
|
|
true
|
|
| _ -> false
|
|
in
|
|
let n = expect_ident st in
|
|
let t_annot =
|
|
match peek st with
|
|
| TColon ->
|
|
expect st TColon;
|
|
Some (parse_type st)
|
|
| _ -> None
|
|
in
|
|
let init =
|
|
match peek st with
|
|
| TAssign ->
|
|
expect st TAssign;
|
|
Some (parse_expr st)
|
|
| _ -> None
|
|
in
|
|
expect st TSemicolon;
|
|
VarDecl (is_mut, n, t_annot, init)
|
|
| _ ->
|
|
let e = parse_expr st in
|
|
expect st TSemicolon;
|
|
Expr e
|
|
|
|
and parse_expr st = parse_assignment st
|
|
|
|
and parse_assignment st =
|
|
let lhs = parse_or st in
|
|
match peek st with
|
|
| TAssign ->
|
|
expect st TAssign;
|
|
let rhs = parse_assignment st in
|
|
Assign (lhs, rhs)
|
|
| _ -> lhs
|
|
|
|
and parse_or st =
|
|
let rec loop left =
|
|
match peek st with
|
|
| TOrOr ->
|
|
expect st TOrOr;
|
|
loop (Binop (Or, left, parse_and st))
|
|
| _ -> left
|
|
in
|
|
loop (parse_and st)
|
|
|
|
and parse_and st =
|
|
let rec loop left =
|
|
match peek st with
|
|
| TAndAnd ->
|
|
expect st TAndAnd;
|
|
loop (Binop (And, left, parse_equality st))
|
|
| _ -> left
|
|
in
|
|
loop (parse_equality st)
|
|
|
|
and parse_equality st =
|
|
let rec loop left =
|
|
match peek st with
|
|
| TEqEq ->
|
|
expect st TEqEq;
|
|
loop (Binop (Eq, left, parse_rel st))
|
|
| TNe ->
|
|
expect st TNe;
|
|
loop (Binop (Ne, left, parse_rel st))
|
|
| _ -> left
|
|
in
|
|
loop (parse_rel st)
|
|
|
|
and parse_rel st =
|
|
let rec loop left =
|
|
match peek st with
|
|
| TLt ->
|
|
expect st TLt;
|
|
loop (Binop (Lt, left, parse_add st))
|
|
| TLe ->
|
|
expect st TLe;
|
|
loop (Binop (Le, left, parse_add st))
|
|
| TGt ->
|
|
expect st TGt;
|
|
loop (Binop (Gt, left, parse_add st))
|
|
| TGe ->
|
|
expect st TGe;
|
|
loop (Binop (Ge, left, parse_add st))
|
|
| _ -> left
|
|
in
|
|
loop (parse_add st)
|
|
|
|
and parse_add st =
|
|
let rec loop left =
|
|
match peek st with
|
|
| TPlus ->
|
|
expect st TPlus;
|
|
loop (Binop (Add, left, parse_mul st))
|
|
| TMinus ->
|
|
expect st TMinus;
|
|
loop (Binop (Sub, left, parse_mul st))
|
|
| _ -> left
|
|
in
|
|
loop (parse_mul st)
|
|
|
|
and parse_mul st =
|
|
let rec loop left =
|
|
match peek st with
|
|
| TStar ->
|
|
expect st TStar;
|
|
loop (Binop (Mul, left, parse_unary st))
|
|
| TSlash ->
|
|
expect st TSlash;
|
|
loop (Binop (Div, left, parse_unary st))
|
|
| TPercent ->
|
|
expect st TPercent;
|
|
loop (Binop (Mod, left, parse_unary st))
|
|
| _ -> left
|
|
in
|
|
loop (parse_unary st)
|
|
|
|
and parse_unary st =
|
|
match peek st with
|
|
| TMinus ->
|
|
expect st TMinus;
|
|
Unop (Neg, parse_unary st)
|
|
| TBang ->
|
|
expect st TBang;
|
|
Unop (Not, parse_unary st)
|
|
| _ -> parse_postfix st
|
|
|
|
and parse_postfix st =
|
|
let rec loop e =
|
|
match peek st with
|
|
| TLParen ->
|
|
expect st TLParen;
|
|
let args = parse_args st in
|
|
expect st TRParen;
|
|
loop (Call (e, args))
|
|
| TLBracket ->
|
|
expect st TLBracket;
|
|
let idx = parse_expr st in
|
|
expect st TRBracket;
|
|
loop (ArrayGet (e, idx))
|
|
| TDot ->
|
|
expect st TDot;
|
|
let fld = expect_ident st in
|
|
loop (StructGet (e, fld))
|
|
| _ -> e
|
|
in
|
|
loop (parse_primary st)
|
|
|
|
and parse_args st =
|
|
match peek st with
|
|
| TRParen -> []
|
|
| _ ->
|
|
let rec loop acc =
|
|
let e = parse_expr st in
|
|
match peek st with
|
|
| TComma ->
|
|
expect st TComma;
|
|
loop (e :: acc)
|
|
| _ -> List.rev (e :: acc)
|
|
in
|
|
loop []
|
|
|
|
and parse_primary st =
|
|
match consume st with
|
|
| TIntLit n -> IntLit n
|
|
| TTrue -> BoolLit true
|
|
| TFalse -> BoolLit false
|
|
| TIdent s -> Var s
|
|
| TLParen ->
|
|
let e = parse_expr st in
|
|
expect st TRParen;
|
|
e
|
|
| _ -> raise (Parse_error "expected expression")
|
|
|
|
let parse_string src =
|
|
try
|
|
let st = mk_state (Lexer.lex src) in
|
|
Ok (parse_program st)
|
|
with
|
|
| Lexer.Lex_error msg -> Error ("lex error: " ^ msg)
|
|
| Parse_error msg -> Error ("parse error: " ^ msg)
|