spooky/lib/parser.ml

430 lines
9.7 KiB
OCaml
Raw Normal View History

2026-04-29 15:50:03 +00:00
open Ast
open Lexer
exception Parse_error of string
type parser_state = {
toks : token array;
mutable i : int;
}
let mk_state toks = { toks = Array.of_list toks; i = 0 }
let peek st = if st.i < Array.length st.toks then st.toks.(st.i) else TEOF
2026-04-29 16:35:55 +00:00
let peek_n st n =
let idx = st.i + n in
if idx < Array.length st.toks then st.toks.(idx) else TEOF
2026-04-29 15:50:03 +00:00
let consume st =
let t = peek st in
st.i <- st.i + 1;
t
let expect st tok =
match (tok, consume st) with
| TLParen, TLParen
| TRParen, TRParen
| TLBrace, TLBrace
| TRBrace, TRBrace
| TLBracket, TLBracket
| TRBracket, TRBracket
| TSemicolon, TSemicolon
| TComma, TComma
| TDot, TDot
| TAssign, TAssign
| TPlus, TPlus
| TMinus, TMinus
| TStar, TStar
| TSlash, TSlash
| TPercent, TPercent
| TAndAnd, TAndAnd
| TOrOr, TOrOr
| TBang, TBang
| TEqEq, TEqEq
| TNe, TNe
| TLt, TLt
| TLe, TLe
| TGt, TGt
| TGe, TGe
| TIf, TIf
| TElse, TElse
| TForEach, TForEach
| TFor, TFor
| TEach, TEach
| TIn, TIn
| TReturn, TReturn
| TIntKw, TIntKw
| TBoolKw, TBoolKw
| TVoidKw, TVoidKw
| TStructKw, TStructKw
2026-04-29 18:56:41 +00:00
| TFn, TFn
| TLet, TLet
| TMut, TMut
| TArrow, TArrow
| TColon, TColon
| THash, THash
2026-04-29 15:50:03 +00:00
| TEOF, TEOF -> ()
| _ -> raise (Parse_error "unexpected token")
let expect_ident st =
match consume st with
| TIdent s -> s
| _ -> raise (Parse_error "expected identifier")
2026-04-29 18:56:41 +00:00
(* Parse a base type (no # prefix or # suffix).
Handles: int, bool, void, struct Name, TypeName, and [] suffixes. *)
let rec parse_base_type st =
2026-04-29 15:50:03 +00:00
let base =
match consume st with
| TIntKw -> TInt
| TBoolKw -> TBool
| TVoidKw -> TVoid
| TStructKw -> TStruct (expect_ident st)
2026-04-29 16:35:55 +00:00
| TIdent s -> TStruct s
2026-04-29 15:50:03 +00:00
| _ -> raise (Parse_error "expected type")
in
let rec arrays t =
match peek st with
| TLBracket ->
expect st TLBracket;
expect st TRBracket;
arrays (TArray t)
| _ -> t
in
arrays base
2026-04-29 18:56:41 +00:00
(* Parse a full type, including comptime forms:
#type -> TComptime type
type -> type
type#type -> TCombo (runtime_type, comptime_type) *)
and parse_type st =
match peek st with
| THash ->
expect st THash;
TComptime (parse_base_type st)
| _ ->
let t = parse_base_type st in
(match peek st with
| THash ->
expect st THash;
TCombo (t, parse_base_type st)
| _ -> t)
2026-04-29 15:50:03 +00:00
let rec parse_program st =
let rec loop acc =
match peek st with
| TEOF -> List.rev acc
| _ -> loop (parse_top st :: acc)
in
loop []
and parse_top st =
match peek st with
| TStructKw ->
expect st TStructKw;
let sname = expect_ident st in
2026-04-29 18:56:41 +00:00
expect st TLBrace;
let rec fields acc =
match peek st with
| TRBrace -> List.rev acc
| _ ->
let t = parse_base_type st in
let n = expect_ident st in
expect st TSemicolon;
fields ((t, n) :: acc)
in
let fs = fields [] in
expect st TRBrace;
expect st TSemicolon;
TopStruct { sname; fields = fs }
| TFn ->
expect st TFn;
let name = expect_ident st in
2026-04-29 15:50:03 +00:00
expect st TLParen;
let params = parse_params st in
expect st TRParen;
2026-04-29 18:56:41 +00:00
expect st TArrow;
let ret = parse_type st in
2026-04-29 15:50:03 +00:00
let body = parse_stmt_as_block st in
2026-04-29 18:56:41 +00:00
TopFunc { name; params; ret; body }
| TLet ->
expect st TLet;
let is_mut =
2026-04-29 15:50:03 +00:00
match peek st with
2026-04-29 18:56:41 +00:00
| TMut ->
expect st TMut;
true
| _ -> false
in
let name = expect_ident st in
let t_annot =
match peek st with
| TColon ->
expect st TColon;
Some (parse_type st)
2026-04-29 15:50:03 +00:00
| _ -> None
in
2026-04-29 18:56:41 +00:00
expect st TAssign;
let e = parse_expr st in
2026-04-29 15:50:03 +00:00
expect st TSemicolon;
2026-04-29 18:56:41 +00:00
TopGlobalVar (is_mut, name, t_annot, Some e)
| _ -> raise (Parse_error "expected top-level declaration (struct, fn, or let)")
2026-04-29 15:50:03 +00:00
and parse_params st =
match peek st with
| TRParen -> []
| _ ->
let rec loop acc =
let n = expect_ident st in
2026-04-29 18:56:41 +00:00
expect st TColon;
let t = parse_type st in
2026-04-29 15:50:03 +00:00
match peek st with
| TComma ->
expect st TComma;
2026-04-29 18:56:41 +00:00
loop ((n, t) :: acc)
| _ -> List.rev ((n, t) :: acc)
2026-04-29 15:50:03 +00:00
in
loop []
and parse_stmt_as_block st =
match peek st with
| TLBrace ->
expect st TLBrace;
let rec loop acc =
match peek st with
| TRBrace ->
expect st TRBrace;
List.rev acc
| _ -> loop (parse_stmt st :: acc)
in
loop []
| _ -> [ parse_stmt st ]
and parse_stmt st =
match peek st with
| TLBrace -> Block (parse_stmt_as_block st)
| TIf ->
expect st TIf;
expect st TLParen;
let cond = parse_expr st in
expect st TRParen;
let then_body = parse_stmt_as_block st in
let else_body =
match peek st with
| TElse ->
expect st TElse;
parse_stmt_as_block st
| _ -> []
in
If (cond, then_body, else_body)
| TForEach | TFor ->
(match peek st with
| TForEach -> expect st TForEach
| TFor ->
expect st TFor;
expect st TEach
| _ -> ());
expect st TLParen;
let it_t = parse_type st in
let it_name = expect_ident st in
expect st TIn;
let iterable = parse_expr st in
expect st TRParen;
let body = parse_stmt_as_block st in
ForEach (it_t, it_name, iterable, body)
| TReturn ->
expect st TReturn;
let v =
match peek st with
| TSemicolon -> None
| _ -> Some (parse_expr st)
in
expect st TSemicolon;
Return v
2026-04-29 18:56:41 +00:00
| TLet ->
expect st TLet;
let is_mut =
match peek st with
| TMut ->
expect st TMut;
true
| _ -> false
in
2026-04-29 15:50:03 +00:00
let n = expect_ident st in
2026-04-29 18:56:41 +00:00
let t_annot =
match peek st with
| TColon ->
expect st TColon;
Some (parse_type st)
| _ -> None
in
2026-04-29 15:50:03 +00:00
let init =
match peek st with
| TAssign ->
expect st TAssign;
Some (parse_expr st)
| _ -> None
in
expect st TSemicolon;
2026-04-29 18:56:41 +00:00
VarDecl (is_mut, n, t_annot, init)
2026-04-29 15:50:03 +00:00
| _ ->
let e = parse_expr st in
expect st TSemicolon;
Expr e
and parse_expr st = parse_assignment st
and parse_assignment st =
let lhs = parse_or st in
match peek st with
| TAssign ->
expect st TAssign;
let rhs = parse_assignment st in
Assign (lhs, rhs)
| _ -> lhs
and parse_or st =
let rec loop left =
match peek st with
| TOrOr ->
expect st TOrOr;
loop (Binop (Or, left, parse_and st))
| _ -> left
in
loop (parse_and st)
and parse_and st =
let rec loop left =
match peek st with
| TAndAnd ->
expect st TAndAnd;
loop (Binop (And, left, parse_equality st))
| _ -> left
in
loop (parse_equality st)
and parse_equality st =
let rec loop left =
match peek st with
| TEqEq ->
expect st TEqEq;
loop (Binop (Eq, left, parse_rel st))
| TNe ->
expect st TNe;
loop (Binop (Ne, left, parse_rel st))
| _ -> left
in
loop (parse_rel st)
and parse_rel st =
let rec loop left =
match peek st with
| TLt ->
expect st TLt;
loop (Binop (Lt, left, parse_add st))
| TLe ->
expect st TLe;
loop (Binop (Le, left, parse_add st))
| TGt ->
expect st TGt;
loop (Binop (Gt, left, parse_add st))
| TGe ->
expect st TGe;
loop (Binop (Ge, left, parse_add st))
| _ -> left
in
loop (parse_add st)
and parse_add st =
let rec loop left =
match peek st with
| TPlus ->
expect st TPlus;
loop (Binop (Add, left, parse_mul st))
| TMinus ->
expect st TMinus;
loop (Binop (Sub, left, parse_mul st))
| _ -> left
in
loop (parse_mul st)
and parse_mul st =
let rec loop left =
match peek st with
| TStar ->
expect st TStar;
loop (Binop (Mul, left, parse_unary st))
| TSlash ->
expect st TSlash;
loop (Binop (Div, left, parse_unary st))
| TPercent ->
expect st TPercent;
loop (Binop (Mod, left, parse_unary st))
| _ -> left
in
loop (parse_unary st)
and parse_unary st =
match peek st with
| TMinus ->
expect st TMinus;
Unop (Neg, parse_unary st)
| TBang ->
expect st TBang;
Unop (Not, parse_unary st)
| _ -> parse_postfix st
and parse_postfix st =
let rec loop e =
match peek st with
| TLParen ->
expect st TLParen;
let args = parse_args st in
expect st TRParen;
loop (Call (e, args))
| TLBracket ->
expect st TLBracket;
let idx = parse_expr st in
expect st TRBracket;
loop (ArrayGet (e, idx))
| TDot ->
expect st TDot;
let fld = expect_ident st in
loop (StructGet (e, fld))
| _ -> e
in
loop (parse_primary st)
and parse_args st =
match peek st with
| TRParen -> []
| _ ->
let rec loop acc =
let e = parse_expr st in
match peek st with
| TComma ->
expect st TComma;
loop (e :: acc)
| _ -> List.rev (e :: acc)
in
loop []
and parse_primary st =
match consume st with
| TIntLit n -> IntLit n
| TTrue -> BoolLit true
| TFalse -> BoolLit false
| TIdent s -> Var s
| TLParen ->
let e = parse_expr st in
expect st TRParen;
e
| _ -> raise (Parse_error "expected expression")
let parse_string src =
try
let st = mk_state (Lexer.lex src) in
Ok (parse_program st)
with
| Lexer.Lex_error msg -> Error ("lex error: " ^ msg)
| Parse_error msg -> Error ("parse error: " ^ msg)