diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..0dc69f5 --- /dev/null +++ b/.gitignore @@ -0,0 +1,43 @@ +_opam +_build +_build* +_boot +_boot* +_test_boot +_perf +_coverage +__pycache__ +*.install +compile_commands.json + +# vim swap files +*.swp +*.swo +nvim.log + +# emacs lock files +.#* + +# vscode settings +.vscode + +# git-ps hooks +.git-ps + +.duneboot.* +Makefile.dev +src/dune_rules/setup.ml +.DS_Store + +# nix +nix/profiles/ +result +result-* +start/ + +.claude/ +.codex + +perf.data +perf.data.old +profile.json.gz \ No newline at end of file diff --git a/_build/.db b/_build/.db deleted file mode 100644 index a92e5c1..0000000 Binary files a/_build/.db and /dev/null differ diff --git a/_build/.digest-db b/_build/.digest-db deleted file mode 100644 index 6c4640d..0000000 Binary files a/_build/.digest-db and /dev/null differ diff --git a/_build/.filesystem-clock b/_build/.filesystem-clock deleted file mode 100644 index c61f4a7..0000000 --- a/_build/.filesystem-clock +++ /dev/null @@ -1 +0,0 @@ - \ No newline at end of file diff --git a/_build/.lock b/_build/.lock deleted file mode 100644 index e69de29..0000000 diff --git a/_build/default/.dune/configurator b/_build/default/.dune/configurator deleted file mode 100644 index 2fcab61..0000000 --- a/_build/default/.dune/configurator +++ /dev/null @@ -1,2 +0,0 @@ -(ocamlc /home/steve/.opam/default/bin/ocamlc.opt) -(ocaml_config_vars (afl_instrument false) (align_double false) (align_int64 false) (architecture amd64) (asm as) (asm_cfi_supported true) (asm_size_type_directives true) (ast_impl_magic_number Caml1999M036) (ast_intf_magic_number Caml1999N036) (bytecode_cflags "-O2 -fno-strict-aliasing -fwrapv -fPIC -pthread ") (bytecode_cppflags " -D_FILE_OFFSET_BITS=64 ") (bytecomp_c_compiler "gcc -O2 -fno-strict-aliasing -fwrapv -fPIC -pthread -D_FILE_OFFSET_BITS=64 ") (bytecomp_c_libraries "-lzstd -lm -lpthread") (c_compiler gcc) (ccomp_type cc) (cma_magic_number Caml1999A036) (cmi_magic_number Caml1999I036) (cmo_magic_number Caml1999O036) (cmt_magic_number Caml1999T036) (cmx_magic_number Caml1999Y036) (cmxa_magic_number Caml1999Z036) (cmxs_magic_number Caml1999D036) (compression_c_libraries -lzstd) (default_executable_name a.out) (default_safe_string true) (exec_magic_number Caml1999X036) (ext_asm .s) (ext_dll .so) (ext_exe "") (ext_lib .a) (ext_obj .o) (flambda false) (flat_float_array true) (function_sections true) (host x86_64-pc-linux-gnu) (int_size 63) (linear_magic_number Caml1999L036) (model default) (naked_pointers false) (native_c_compiler "gcc -O2 -fno-strict-aliasing -fwrapv -fPIC -pthread -D_FILE_OFFSET_BITS=64 ") (native_c_libraries " -lm -lpthread") (native_cflags "-O2 -fno-strict-aliasing -fwrapv -fPIC -pthread ") (native_compiler true) (native_cppflags " -D_FILE_OFFSET_BITS=64 ") (native_dynlink true) (native_ldflags "") (native_pack_linker "ld -r -o ") (ocamlc_cflags "-O2 -fno-strict-aliasing -fwrapv -fPIC -pthread ") (ocamlc_cppflags " -D_FILE_OFFSET_BITS=64 ") (ocamlopt_cflags "-O2 -fno-strict-aliasing -fwrapv -fPIC -pthread ") (ocamlopt_cppflags " -D_FILE_OFFSET_BITS=64 ") (os_type Unix) (safe_string true) (standard_library /home/steve/.opam/default/lib/ocaml) (standard_library_default /home/steve/.opam/default/lib/ocaml) (supports_shared_libraries true) (system linux) (systhread_supported true) (target x86_64-pc-linux-gnu) (tsan false) (version 5.4.1) (windows_unicode false) (with_codegen_invariants true) (with_frame_pointers false) (with_nonexecstack_note true) (word_size 64)) diff --git a/_build/default/.dune/configurator.v2 b/_build/default/.dune/configurator.v2 deleted file mode 100644 index 0e6f606..0000000 --- a/_build/default/.dune/configurator.v2 +++ /dev/null @@ -1 +0,0 @@ -((6:ocamlc40:/home/steve/.opam/default/bin/ocamlc.opt)(17:ocaml_config_vars((14:afl_instrument5:false)(12:align_double5:false)(11:align_int645:false)(12:architecture5:amd64)(3:asm2:as)(17:asm_cfi_supported4:true)(24:asm_size_type_directives4:true)(21:ast_impl_magic_number12:Caml1999M036)(21:ast_intf_magic_number12:Caml1999N036)(15:bytecode_cflags49:-O2 -fno-strict-aliasing -fwrapv -fPIC -pthread )(17:bytecode_cppflags24: -D_FILE_OFFSET_BITS=64 )(19:bytecomp_c_compiler78:gcc -O2 -fno-strict-aliasing -fwrapv -fPIC -pthread -D_FILE_OFFSET_BITS=64 )(20:bytecomp_c_libraries22:-lzstd -lm -lpthread)(10:c_compiler3:gcc)(10:ccomp_type2:cc)(16:cma_magic_number12:Caml1999A036)(16:cmi_magic_number12:Caml1999I036)(16:cmo_magic_number12:Caml1999O036)(16:cmt_magic_number12:Caml1999T036)(16:cmx_magic_number12:Caml1999Y036)(17:cmxa_magic_number12:Caml1999Z036)(17:cmxs_magic_number12:Caml1999D036)(23:compression_c_libraries6:-lzstd)(23:default_executable_name5:a.out)(19:default_safe_string4:true)(17:exec_magic_number12:Caml1999X036)(7:ext_asm2:.s)(7:ext_dll3:.so)(7:ext_exe0:)(7:ext_lib2:.a)(7:ext_obj2:.o)(7:flambda5:false)(16:flat_float_array4:true)(17:function_sections4:true)(4:host19:x86_64-pc-linux-gnu)(8:int_size2:63)(19:linear_magic_number12:Caml1999L036)(5:model7:default)(14:naked_pointers5:false)(17:native_c_compiler78:gcc -O2 -fno-strict-aliasing -fwrapv -fPIC -pthread -D_FILE_OFFSET_BITS=64 )(18:native_c_libraries15: -lm -lpthread)(13:native_cflags49:-O2 -fno-strict-aliasing -fwrapv -fPIC -pthread )(15:native_compiler4:true)(15:native_cppflags24: -D_FILE_OFFSET_BITS=64 )(14:native_dynlink4:true)(14:native_ldflags0:)(18:native_pack_linker9:ld -r -o )(13:ocamlc_cflags49:-O2 -fno-strict-aliasing -fwrapv -fPIC -pthread )(15:ocamlc_cppflags24: -D_FILE_OFFSET_BITS=64 )(15:ocamlopt_cflags49:-O2 -fno-strict-aliasing -fwrapv -fPIC -pthread )(17:ocamlopt_cppflags24: -D_FILE_OFFSET_BITS=64 )(7:os_type4:Unix)(11:safe_string4:true)(16:standard_library35:/home/steve/.opam/default/lib/ocaml)(24:standard_library_default35:/home/steve/.opam/default/lib/ocaml)(25:supports_shared_libraries4:true)(6:system5:linux)(19:systhread_supported4:true)(6:target19:x86_64-pc-linux-gnu)(4:tsan5:false)(7:version5:5.4.1)(15:windows_unicode5:false)(23:with_codegen_invariants4:true)(19:with_frame_pointers5:false)(22:with_nonexecstack_note4:true)(9:word_size2:64)))) \ No newline at end of file diff --git a/_build/default/META.spooky b/_build/default/META.spooky deleted file mode 100644 index e69de29..0000000 diff --git a/_build/default/bin/.main.eobjs/byte/dune__exe__Main.cmi b/_build/default/bin/.main.eobjs/byte/dune__exe__Main.cmi deleted file mode 100644 index ac36680..0000000 Binary files a/_build/default/bin/.main.eobjs/byte/dune__exe__Main.cmi and /dev/null differ diff --git a/_build/default/bin/.main.eobjs/byte/dune__exe__Main.cmti b/_build/default/bin/.main.eobjs/byte/dune__exe__Main.cmti deleted file mode 100644 index 6061faf..0000000 Binary files a/_build/default/bin/.main.eobjs/byte/dune__exe__Main.cmti and /dev/null differ diff --git a/_build/default/bin/.main.eobjs/native/dune__exe__Main.cmx b/_build/default/bin/.main.eobjs/native/dune__exe__Main.cmx deleted file mode 100644 index e3a821d..0000000 Binary files a/_build/default/bin/.main.eobjs/native/dune__exe__Main.cmx and /dev/null differ diff --git a/_build/default/bin/.main.eobjs/native/dune__exe__Main.o b/_build/default/bin/.main.eobjs/native/dune__exe__Main.o deleted file mode 100644 index 494b498..0000000 Binary files a/_build/default/bin/.main.eobjs/native/dune__exe__Main.o and /dev/null differ diff --git a/_build/default/bin/.merlin-conf/exe-main b/_build/default/bin/.merlin-conf/exe-main deleted file mode 100644 index 9e715f3..0000000 Binary files a/_build/default/bin/.merlin-conf/exe-main and /dev/null differ diff --git a/_build/default/bin/main.exe b/_build/default/bin/main.exe deleted file mode 100755 index 3777ac0..0000000 Binary files a/_build/default/bin/main.exe and /dev/null differ diff --git a/_build/default/bin/main.ml b/_build/default/bin/main.ml deleted file mode 100644 index 46289d0..0000000 --- a/_build/default/bin/main.ml +++ /dev/null @@ -1,36 +0,0 @@ -let sample_program = - {| -struct Point { - int x; - int y; -}; - -int sum_array(int[] arr) { - int total = 0; - foreach (int n in arr) { - total = total + n; - } - return total; -} - -int main() { - struct Point p; - int[] nums; - int x = 1 + 2 * 3; - p.x = x; - if (x > 0) { - x = x + p.x; - } else { - x = 0; - } - x = sum_array(nums); - return x; -} -|} - -let () = - match Spooky.parse_and_type_check sample_program with - | Ok ast -> - Printf.printf "Program parsed and type-checked successfully.\n\n"; - Printf.printf "Parsed AST:\n%s\n" (Spooky.string_of_program ast) - | Error msg -> Printf.printf "Error: %s\n" msg diff --git a/_build/default/bin/main.mli b/_build/default/bin/main.mli deleted file mode 100644 index 335ae1f..0000000 --- a/_build/default/bin/main.mli +++ /dev/null @@ -1 +0,0 @@ -(* Auto-generated by Dune *) \ No newline at end of file diff --git a/_build/default/lib/.merlin-conf/lib-spooky b/_build/default/lib/.merlin-conf/lib-spooky deleted file mode 100644 index 473155d..0000000 Binary files a/_build/default/lib/.merlin-conf/lib-spooky and /dev/null differ diff --git a/_build/default/lib/.spooky.objs/byte/spooky.cmi b/_build/default/lib/.spooky.objs/byte/spooky.cmi deleted file mode 100644 index 459bc5e..0000000 Binary files a/_build/default/lib/.spooky.objs/byte/spooky.cmi and /dev/null differ diff --git a/_build/default/lib/.spooky.objs/byte/spooky.cmo b/_build/default/lib/.spooky.objs/byte/spooky.cmo deleted file mode 100644 index ad429c5..0000000 Binary files a/_build/default/lib/.spooky.objs/byte/spooky.cmo and /dev/null differ diff --git a/_build/default/lib/.spooky.objs/byte/spooky.cmt b/_build/default/lib/.spooky.objs/byte/spooky.cmt deleted file mode 100644 index d19864f..0000000 Binary files a/_build/default/lib/.spooky.objs/byte/spooky.cmt and /dev/null differ diff --git a/_build/default/lib/.spooky.objs/byte/spooky.cmti b/_build/default/lib/.spooky.objs/byte/spooky.cmti deleted file mode 100644 index fac86d6..0000000 Binary files a/_build/default/lib/.spooky.objs/byte/spooky.cmti and /dev/null differ diff --git a/_build/default/lib/.spooky.objs/native/spooky.cmx b/_build/default/lib/.spooky.objs/native/spooky.cmx deleted file mode 100644 index d3be128..0000000 Binary files a/_build/default/lib/.spooky.objs/native/spooky.cmx and /dev/null differ diff --git a/_build/default/lib/.spooky.objs/native/spooky.o b/_build/default/lib/.spooky.objs/native/spooky.o deleted file mode 100644 index 4b7719f..0000000 Binary files a/_build/default/lib/.spooky.objs/native/spooky.o and /dev/null differ diff --git a/_build/default/lib/spooky.a b/_build/default/lib/spooky.a deleted file mode 100644 index 9dd1688..0000000 Binary files a/_build/default/lib/spooky.a and /dev/null differ diff --git a/_build/default/lib/spooky.cma b/_build/default/lib/spooky.cma deleted file mode 100644 index ddd1e04..0000000 Binary files a/_build/default/lib/spooky.cma and /dev/null differ diff --git a/_build/default/lib/spooky.cmxa b/_build/default/lib/spooky.cmxa deleted file mode 100644 index 4b3c7d5..0000000 Binary files a/_build/default/lib/spooky.cmxa and /dev/null differ diff --git a/_build/default/lib/spooky.cmxs b/_build/default/lib/spooky.cmxs deleted file mode 100755 index 4b8799a..0000000 Binary files a/_build/default/lib/spooky.cmxs and /dev/null differ diff --git a/_build/default/lib/spooky.ml b/_build/default/lib/spooky.ml deleted file mode 100644 index e0f55e0..0000000 --- a/_build/default/lib/spooky.ml +++ /dev/null @@ -1,886 +0,0 @@ -module StringMap = Map.Make (String) - -exception Parse_error of string -exception Type_error of string - -type typ = - | TInt - | TBool - | TVoid - | TStruct of string - | TArray of typ - -type binop = - | Add - | Sub - | Mul - | Div - | Mod - | And - | Or - | Eq - | Ne - | Lt - | Le - | Gt - | Ge - -type unop = Neg | Not - -type expr = - | IntLit of int - | BoolLit of bool - | Var of string - | Binop of binop * expr * expr - | Unop of unop * expr - | Assign of expr * expr - | Call of expr * expr list - | ArrayGet of expr * expr - | StructGet of expr * string - -type stmt = - | VarDecl of typ * string * expr option - | Expr of expr - | If of expr * stmt list * stmt list - | ForEach of typ * string * expr * stmt list - | Return of expr option - | Block of stmt list - -type func = { - name : string; - params : (typ * string) list; - ret : typ; - body : stmt list; -} - -type struct_def = { - sname : string; - fields : (typ * string) list; -} - -type top = - | TopStruct of struct_def - | TopFunc of func - | TopGlobalVar of typ * string * expr option - -type program = top list - -let string_of_typ = - let rec go = function - | TInt -> "int" - | TBool -> "bool" - | TVoid -> "void" - | TStruct n -> "struct " ^ n - | TArray t -> go t ^ "[]" - in - go - -let string_of_binop = function - | Add -> "+" - | Sub -> "-" - | Mul -> "*" - | Div -> "/" - | Mod -> "%" - | And -> "&&" - | Or -> "||" - | Eq -> "==" - | Ne -> "!=" - | Lt -> "<" - | Le -> "<=" - | Gt -> ">" - | Ge -> ">=" - -let string_of_unop = function Neg -> "-" | Not -> "!" - -let rec string_of_expr = function - | IntLit n -> Printf.sprintf "IntLit(%d)" n - | BoolLit b -> Printf.sprintf "BoolLit(%b)" b - | Var v -> Printf.sprintf "Var(%s)" v - | Binop (op, a, b) -> - Printf.sprintf "Binop(%s, %s, %s)" (string_of_binop op) (string_of_expr a) (string_of_expr b) - | Unop (op, e) -> Printf.sprintf "Unop(%s, %s)" (string_of_unop op) (string_of_expr e) - | Assign (lhs, rhs) -> Printf.sprintf "Assign(%s, %s)" (string_of_expr lhs) (string_of_expr rhs) - | Call (callee, args) -> - let args_s = String.concat ", " (List.map string_of_expr args) in - Printf.sprintf "Call(%s, [%s])" (string_of_expr callee) args_s - | ArrayGet (arr, idx) -> Printf.sprintf "ArrayGet(%s, %s)" (string_of_expr arr) (string_of_expr idx) - | StructGet (obj, fld) -> Printf.sprintf "StructGet(%s, %s)" (string_of_expr obj) fld - -let indent n = String.make (2 * n) ' ' - -let rec string_of_stmt ?(level = 0) st = - let i = indent level in - match st with - | VarDecl (t, n, None) -> Printf.sprintf "%sVarDecl(%s %s)" i (string_of_typ t) n - | VarDecl (t, n, Some e) -> - Printf.sprintf "%sVarDecl(%s %s = %s)" i (string_of_typ t) n (string_of_expr e) - | Expr e -> Printf.sprintf "%sExpr(%s)" i (string_of_expr e) - | Return None -> Printf.sprintf "%sReturn" i - | Return (Some e) -> Printf.sprintf "%sReturn(%s)" i (string_of_expr e) - | Block body -> - let body_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) body) in - if String.equal body_s "" then Printf.sprintf "%sBlock" i - else Printf.sprintf "%sBlock\n%s" i body_s - | If (cond, tbranch, ebranch) -> - let then_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) tbranch) in - let else_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) ebranch) in - if ebranch = [] then Printf.sprintf "%sIf(%s)\n%s" i (string_of_expr cond) then_s - else Printf.sprintf "%sIf(%s)\n%s\n%sElse\n%s" i (string_of_expr cond) then_s i else_s - | ForEach (it_t, it_name, iterable, body) -> - let body_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) body) in - Printf.sprintf "%sForEach(%s %s in %s)\n%s" i (string_of_typ it_t) it_name (string_of_expr iterable) - body_s - -let string_of_top = function - | TopStruct s -> - let fields = - s.fields |> List.map (fun (t, n) -> Printf.sprintf " Field(%s %s)" (string_of_typ t) n) - |> String.concat "\n" - in - if String.equal fields "" then Printf.sprintf "Struct %s" s.sname - else Printf.sprintf "Struct %s\n%s" s.sname fields - | TopGlobalVar (t, n, init) -> - (match init with - | None -> Printf.sprintf "GlobalVar(%s %s)" (string_of_typ t) n - | Some e -> Printf.sprintf "GlobalVar(%s %s = %s)" (string_of_typ t) n (string_of_expr e)) - | TopFunc f -> - let params = - f.params - |> List.map (fun (t, n) -> Printf.sprintf "%s %s" (string_of_typ t) n) - |> String.concat ", " - in - let body = String.concat "\n" (List.map (string_of_stmt ~level:1) f.body) in - if String.equal body "" then Printf.sprintf "Function %s(%s) -> %s" f.name params (string_of_typ f.ret) - else Printf.sprintf "Function %s(%s) -> %s\n%s" f.name params (string_of_typ f.ret) body - -let string_of_program (prog : program) = prog |> List.map string_of_top |> String.concat "\n\n" - -let rec equal_typ a b = - match (a, b) with - | TInt, TInt | TBool, TBool | TVoid, TVoid -> true - | TStruct x, TStruct y -> String.equal x y - | TArray x, TArray y -> equal_typ x y - | _ -> false - -type token = - | TIntKw - | TBoolKw - | TVoidKw - | TStructKw - | TIf - | TElse - | TFor - | TEach - | TForEach - | TIn - | TReturn - | TTrue - | TFalse - | TIdent of string - | TIntLit of int - | TLParen - | TRParen - | TLBrace - | TRBrace - | TLBracket - | TRBracket - | TSemicolon - | TComma - | TDot - | TAssign - | TPlus - | TMinus - | TStar - | TSlash - | TPercent - | TAndAnd - | TOrOr - | TBang - | TEqEq - | TNe - | TLt - | TLe - | TGt - | TGe - | TEOF - -let is_space = function ' ' | '\t' | '\r' | '\n' -> true | _ -> false - -let is_digit c = c >= '0' && c <= '9' - -let is_ident_start c = - (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c = '_' - -let is_ident_char c = is_ident_start c || is_digit c - -let keyword_or_ident s = - match s with - | "int" -> TIntKw - | "bool" -> TBoolKw - | "void" -> TVoidKw - | "struct" -> TStructKw - | "if" -> TIf - | "else" -> TElse - | "for" -> TFor - | "each" -> TEach - | "foreach" -> TForEach - | "in" -> TIn - | "return" -> TReturn - | "true" -> TTrue - | "false" -> TFalse - | _ -> TIdent s - -let lex (src : string) : token list = - let n = String.length src in - let rec skip_line_comment i = - if i >= n then i - else if src.[i] = '\n' then i + 1 - else skip_line_comment (i + 1) - in - let rec skip_block_comment i = - if i + 1 >= n then raise (Parse_error "unterminated block comment") - else if src.[i] = '*' && src.[i + 1] = '/' then i + 2 - else skip_block_comment (i + 1) - in - let rec read_number i j = - if j < n && is_digit src.[j] then read_number i (j + 1) - else - let s = String.sub src i (j - i) in - (TIntLit (int_of_string s), j) - in - let rec read_ident i j = - if j < n && is_ident_char src.[j] then read_ident i (j + 1) - else - let s = String.sub src i (j - i) in - (keyword_or_ident s, j) - in - let rec loop i acc = - if i >= n then List.rev (TEOF :: acc) - else if is_space src.[i] then loop (i + 1) acc - else - match src.[i] with - | '/' when i + 1 < n && src.[i + 1] = '/' -> loop (skip_line_comment (i + 2)) acc - | '/' when i + 1 < n && src.[i + 1] = '*' -> loop (skip_block_comment (i + 2)) acc - | '(' -> loop (i + 1) (TLParen :: acc) - | ')' -> loop (i + 1) (TRParen :: acc) - | '{' -> loop (i + 1) (TLBrace :: acc) - | '}' -> loop (i + 1) (TRBrace :: acc) - | '[' -> loop (i + 1) (TLBracket :: acc) - | ']' -> loop (i + 1) (TRBracket :: acc) - | ';' -> loop (i + 1) (TSemicolon :: acc) - | ',' -> loop (i + 1) (TComma :: acc) - | '.' -> loop (i + 1) (TDot :: acc) - | '+' -> loop (i + 1) (TPlus :: acc) - | '-' -> loop (i + 1) (TMinus :: acc) - | '*' -> loop (i + 1) (TStar :: acc) - | '%' -> loop (i + 1) (TPercent :: acc) - | '/' -> loop (i + 1) (TSlash :: acc) - | '!' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TNe :: acc) - | '!' -> loop (i + 1) (TBang :: acc) - | '=' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TEqEq :: acc) - | '=' -> loop (i + 1) (TAssign :: acc) - | '&' when i + 1 < n && src.[i + 1] = '&' -> loop (i + 2) (TAndAnd :: acc) - | '|' when i + 1 < n && src.[i + 1] = '|' -> loop (i + 2) (TOrOr :: acc) - | '<' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TLe :: acc) - | '<' -> loop (i + 1) (TLt :: acc) - | '>' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TGe :: acc) - | '>' -> loop (i + 1) (TGt :: acc) - | c when is_digit c -> - let tok, j = read_number i (i + 1) in - loop j (tok :: acc) - | c when is_ident_start c -> - let tok, j = read_ident i (i + 1) in - loop j (tok :: acc) - | c -> - let msg = Printf.sprintf "unexpected character: %c" c in - raise (Parse_error msg) - in - loop 0 [] - -type parser_state = { - toks : token array; - mutable i : int; -} - -let mk_state toks = { toks = Array.of_list toks; i = 0 } - -let peek st = if st.i < Array.length st.toks then st.toks.(st.i) else TEOF - -let consume st = - let t = peek st in - st.i <- st.i + 1; - t - -let expect st tok = - match (tok, consume st) with - | TLParen, TLParen - | TRParen, TRParen - | TLBrace, TLBrace - | TRBrace, TRBrace - | TLBracket, TLBracket - | TRBracket, TRBracket - | TSemicolon, TSemicolon - | TComma, TComma - | TDot, TDot - | TAssign, TAssign - | TPlus, TPlus - | TMinus, TMinus - | TStar, TStar - | TSlash, TSlash - | TPercent, TPercent - | TAndAnd, TAndAnd - | TOrOr, TOrOr - | TBang, TBang - | TEqEq, TEqEq - | TNe, TNe - | TLt, TLt - | TLe, TLe - | TGt, TGt - | TGe, TGe - | TIf, TIf - | TElse, TElse - | TForEach, TForEach - | TFor, TFor - | TEach, TEach - | TIn, TIn - | TReturn, TReturn - | TIntKw, TIntKw - | TBoolKw, TBoolKw - | TVoidKw, TVoidKw - | TStructKw, TStructKw - | TEOF, TEOF -> () - | _ -> raise (Parse_error "unexpected token") - -let expect_ident st = - match consume st with - | TIdent s -> s - | _ -> raise (Parse_error "expected identifier") - -let starts_type = function TIntKw | TBoolKw | TVoidKw | TStructKw -> true | _ -> false - -let rec parse_type st = - let base = - match consume st with - | TIntKw -> TInt - | TBoolKw -> TBool - | TVoidKw -> TVoid - | TStructKw -> TStruct (expect_ident st) - | _ -> raise (Parse_error "expected type") - in - let rec arrays t = - match peek st with - | TLBracket -> - expect st TLBracket; - expect st TRBracket; - arrays (TArray t) - | _ -> t - in - arrays base - -let rec parse_program st = - let rec loop acc = - match peek st with - | TEOF -> List.rev acc - | _ -> loop (parse_top st :: acc) - in - loop [] - -and parse_top st = - match peek st with - | TStructKw -> - expect st TStructKw; - let sname = expect_ident st in - (match peek st with - | TLBrace -> - expect st TLBrace; - let rec fields acc = - match peek st with - | TRBrace -> List.rev acc - | _ -> - let t = parse_type st in - let n = expect_ident st in - expect st TSemicolon; - fields ((t, n) :: acc) - in - let fs = fields [] in - expect st TRBrace; - expect st TSemicolon; - TopStruct { sname; fields = fs } - | _ -> - let ty = TStruct sname in - parse_top_after_type st ty) - | _ -> - let ty = parse_type st in - parse_top_after_type st ty - -and parse_top_after_type st ty = - let name = expect_ident st in - match peek st with - | TLParen -> - expect st TLParen; - let params = parse_params st in - expect st TRParen; - let body = parse_stmt_as_block st in - TopFunc { name; params; ret = ty; body } - | _ -> - let init = - match peek st with - | TAssign -> - expect st TAssign; - Some (parse_expr st) - | _ -> None - in - expect st TSemicolon; - TopGlobalVar (ty, name, init) - -and parse_params st = - match peek st with - | TRParen -> [] - | _ -> - let rec loop acc = - let t = parse_type st in - let n = expect_ident st in - match peek st with - | TComma -> - expect st TComma; - loop ((t, n) :: acc) - | _ -> List.rev ((t, n) :: acc) - in - loop [] - -and parse_stmt_as_block st = - match peek st with - | TLBrace -> - expect st TLBrace; - let rec loop acc = - match peek st with - | TRBrace -> - expect st TRBrace; - List.rev acc - | _ -> loop (parse_stmt st :: acc) - in - loop [] - | _ -> [ parse_stmt st ] - -and parse_stmt st = - match peek st with - | TLBrace -> Block (parse_stmt_as_block st) - | TIf -> - expect st TIf; - expect st TLParen; - let cond = parse_expr st in - expect st TRParen; - let then_body = parse_stmt_as_block st in - let else_body = - match peek st with - | TElse -> - expect st TElse; - parse_stmt_as_block st - | _ -> [] - in - If (cond, then_body, else_body) - | TForEach | TFor -> - (match peek st with - | TForEach -> expect st TForEach - | TFor -> - expect st TFor; - expect st TEach - | _ -> ()); - expect st TLParen; - let it_t = parse_type st in - let it_name = expect_ident st in - expect st TIn; - let iterable = parse_expr st in - expect st TRParen; - let body = parse_stmt_as_block st in - ForEach (it_t, it_name, iterable, body) - | TReturn -> - expect st TReturn; - let v = - match peek st with - | TSemicolon -> None - | _ -> Some (parse_expr st) - in - expect st TSemicolon; - Return v - | t when starts_type t -> - let ty = parse_type st in - let n = expect_ident st in - let init = - match peek st with - | TAssign -> - expect st TAssign; - Some (parse_expr st) - | _ -> None - in - expect st TSemicolon; - VarDecl (ty, n, init) - | _ -> - let e = parse_expr st in - expect st TSemicolon; - Expr e - -and parse_expr st = parse_assignment st - -and parse_assignment st = - let lhs = parse_or st in - match peek st with - | TAssign -> - expect st TAssign; - let rhs = parse_assignment st in - Assign (lhs, rhs) - | _ -> lhs - -and parse_or st = - let rec loop left = - match peek st with - | TOrOr -> - expect st TOrOr; - loop (Binop (Or, left, parse_and st)) - | _ -> left - in - loop (parse_and st) - -and parse_and st = - let rec loop left = - match peek st with - | TAndAnd -> - expect st TAndAnd; - loop (Binop (And, left, parse_equality st)) - | _ -> left - in - loop (parse_equality st) - -and parse_equality st = - let rec loop left = - match peek st with - | TEqEq -> - expect st TEqEq; - loop (Binop (Eq, left, parse_rel st)) - | TNe -> - expect st TNe; - loop (Binop (Ne, left, parse_rel st)) - | _ -> left - in - loop (parse_rel st) - -and parse_rel st = - let rec loop left = - match peek st with - | TLt -> - expect st TLt; - loop (Binop (Lt, left, parse_add st)) - | TLe -> - expect st TLe; - loop (Binop (Le, left, parse_add st)) - | TGt -> - expect st TGt; - loop (Binop (Gt, left, parse_add st)) - | TGe -> - expect st TGe; - loop (Binop (Ge, left, parse_add st)) - | _ -> left - in - loop (parse_add st) - -and parse_add st = - let rec loop left = - match peek st with - | TPlus -> - expect st TPlus; - loop (Binop (Add, left, parse_mul st)) - | TMinus -> - expect st TMinus; - loop (Binop (Sub, left, parse_mul st)) - | _ -> left - in - loop (parse_mul st) - -and parse_mul st = - let rec loop left = - match peek st with - | TStar -> - expect st TStar; - loop (Binop (Mul, left, parse_unary st)) - | TSlash -> - expect st TSlash; - loop (Binop (Div, left, parse_unary st)) - | TPercent -> - expect st TPercent; - loop (Binop (Mod, left, parse_unary st)) - | _ -> left - in - loop (parse_unary st) - -and parse_unary st = - match peek st with - | TMinus -> - expect st TMinus; - Unop (Neg, parse_unary st) - | TBang -> - expect st TBang; - Unop (Not, parse_unary st) - | _ -> parse_postfix st - -and parse_postfix st = - let rec loop e = - match peek st with - | TLParen -> - expect st TLParen; - let args = parse_args st in - expect st TRParen; - loop (Call (e, args)) - | TLBracket -> - expect st TLBracket; - let idx = parse_expr st in - expect st TRBracket; - loop (ArrayGet (e, idx)) - | TDot -> - expect st TDot; - let fld = expect_ident st in - loop (StructGet (e, fld)) - | _ -> e - in - loop (parse_primary st) - -and parse_args st = - match peek st with - | TRParen -> [] - | _ -> - let rec loop acc = - let e = parse_expr st in - match peek st with - | TComma -> - expect st TComma; - loop (e :: acc) - | _ -> List.rev (e :: acc) - in - loop [] - -and parse_primary st = - match consume st with - | TIntLit n -> IntLit n - | TTrue -> BoolLit true - | TFalse -> BoolLit false - | TIdent s -> Var s - | TLParen -> - let e = parse_expr st in - expect st TRParen; - e - | _ -> raise (Parse_error "expected expression") - -let parse_string src = - try - let st = mk_state (lex src) in - Ok (parse_program st) - with Parse_error msg -> Error msg - -type func_sig = { - fparams : typ list; - fret : typ; -} - -type tc_ctx = { - structs : (typ StringMap.t) StringMap.t; - funcs : func_sig StringMap.t; - globals : typ StringMap.t; -} - -let fail_type msg = raise (Type_error msg) - -let expect_type got want = - if not (equal_typ got want) then - fail_type (Printf.sprintf "type mismatch: got %s, expected %s" (string_of_typ got) (string_of_typ want)) - -let rec validate_type (structs : (typ StringMap.t) StringMap.t) (allow_void : bool) = function - | TVoid when allow_void -> () - | TVoid -> fail_type "void is not a valid variable type" - | TStruct n -> - if not (StringMap.mem n structs) then fail_type ("unknown struct type: " ^ n) - | TArray t -> - if equal_typ t TVoid then fail_type "array element type cannot be void"; - validate_type structs false t - | TInt | TBool -> () - -let collect_ctx (prog : program) : tc_ctx = - let rec collect tops structs funcs globals = - match tops with - | [] -> { structs; funcs; globals } - | TopStruct s :: tl -> - if StringMap.mem s.sname structs then fail_type ("duplicate struct: " ^ s.sname); - let fields = - List.fold_left - (fun acc (t, n) -> - if StringMap.mem n acc then fail_type ("duplicate field " ^ n ^ " in struct " ^ s.sname); - StringMap.add n t acc) - StringMap.empty s.fields - in - collect tl (StringMap.add s.sname fields structs) funcs globals - | TopFunc f :: tl -> - if StringMap.mem f.name funcs then fail_type ("duplicate function: " ^ f.name); - let sig_ = { fparams = List.map fst f.params; fret = f.ret } in - collect tl structs (StringMap.add f.name sig_ funcs) globals - | TopGlobalVar (t, n, _) :: tl -> - if StringMap.mem n globals then fail_type ("duplicate global variable: " ^ n); - collect tl structs funcs (StringMap.add n t globals) - in - collect prog StringMap.empty StringMap.empty StringMap.empty - -let lookup_var env x = - match StringMap.find_opt x env with - | Some t -> t - | None -> fail_type ("unknown variable: " ^ x) - -let lookup_struct_field ctx sname fname = - match StringMap.find_opt sname ctx.structs with - | None -> fail_type ("unknown struct: " ^ sname) - | Some fields -> - (match StringMap.find_opt fname fields with - | None -> fail_type ("unknown field " ^ fname ^ " on struct " ^ sname) - | Some t -> t) - -let rec infer_expr ctx env = function - | IntLit _ -> TInt - | BoolLit _ -> TBool - | Var x -> lookup_var env x - | Unop (Neg, e) -> - expect_type (infer_expr ctx env e) TInt; - TInt - | Unop (Not, e) -> - expect_type (infer_expr ctx env e) TBool; - TBool - | Binop (op, a, b) -> - let ta = infer_expr ctx env a in - let tb = infer_expr ctx env b in - (match op with - | Add | Sub | Mul | Div | Mod -> - expect_type ta TInt; - expect_type tb TInt; - TInt - | And | Or -> - expect_type ta TBool; - expect_type tb TBool; - TBool - | Lt | Le | Gt | Ge -> - expect_type ta TInt; - expect_type tb TInt; - TBool - | Eq | Ne -> - if not (equal_typ ta tb) then fail_type "equality operands must have same type"; - TBool) - | Assign (lhs, rhs) -> - (match lhs with Var _ | ArrayGet _ | StructGet _ -> () | _ -> fail_type "left side of assignment is not assignable"); - let tl = infer_expr ctx env lhs in - let tr = infer_expr ctx env rhs in - expect_type tr tl; - tl - | ArrayGet (arr, idx) -> - expect_type (infer_expr ctx env idx) TInt; - (match infer_expr ctx env arr with - | TArray t -> t - | t -> fail_type ("indexing requires array, got " ^ string_of_typ t)) - | StructGet (obj, fld) -> - (match infer_expr ctx env obj with - | TStruct sname -> lookup_struct_field ctx sname fld - | t -> fail_type ("field access requires struct, got " ^ string_of_typ t)) - | Call (callee, args) -> - let fname = - match callee with - | Var n -> n - | _ -> fail_type "only direct function calls are supported" - in - let sig_ = - match StringMap.find_opt fname ctx.funcs with - | Some s -> s - | None -> fail_type ("unknown function: " ^ fname) - in - if List.length args <> List.length sig_.fparams then - fail_type - (Printf.sprintf "function %s expects %d arguments, got %d" fname (List.length sig_.fparams) - (List.length args)); - List.iter2 (fun arg pty -> expect_type (infer_expr ctx env arg) pty) args sig_.fparams; - sig_.fret - -let rec check_stmt ctx ret env = function - | VarDecl (t, n, init) -> - validate_type ctx.structs false t; - (match init with None -> () | Some e -> expect_type (infer_expr ctx env e) t); - StringMap.add n t env - | Expr e -> - ignore (infer_expr ctx env e); - env - | If (cond, tbranch, ebranch) -> - expect_type (infer_expr ctx env cond) TBool; - ignore (check_block ctx ret env tbranch); - ignore (check_block ctx ret env ebranch); - env - | ForEach (it_t, it_name, iterable, body) -> - validate_type ctx.structs false it_t; - (match infer_expr ctx env iterable with - | TArray elem_t -> expect_type elem_t it_t - | t -> fail_type ("foreach expects array iterable, got " ^ string_of_typ t)); - let env' = StringMap.add it_name it_t env in - ignore (check_block ctx ret env' body); - env - | Return None -> - expect_type TVoid ret; - env - | Return (Some e) -> - expect_type (infer_expr ctx env e) ret; - env - | Block stmts -> - ignore (check_block ctx ret env stmts); - env - -and check_block ctx ret env stmts = List.fold_left (check_stmt ctx ret) env stmts - -let rec has_return_stmt = function - | Return _ -> true - | If (_, t, e) -> List.exists has_return_stmt t || List.exists has_return_stmt e - | ForEach (_, _, _, body) | Block body -> List.exists has_return_stmt body - | VarDecl _ | Expr _ -> false - -let check_program (prog : program) = - let ctx = collect_ctx prog in - StringMap.iter (fun _ t -> validate_type ctx.structs false t) ctx.globals; - StringMap.iter - (fun _ sig_ -> - List.iter (validate_type ctx.structs false) sig_.fparams; - validate_type ctx.structs true sig_.fret) - ctx.funcs; - List.iter - (function - | TopStruct s -> - List.iter (fun (t, _) -> validate_type ctx.structs false t) s.fields - | TopGlobalVar (t, _, init) -> - validate_type ctx.structs false t; - let env = ctx.globals in - (match init with None -> () | Some e -> expect_type (infer_expr ctx env e) t) - | TopFunc f -> - let env_with_globals = ctx.globals in - let env = - List.fold_left - (fun acc (t, n) -> - validate_type ctx.structs false t; - if StringMap.mem n acc then fail_type ("duplicate parameter name: " ^ n); - StringMap.add n t acc) - env_with_globals f.params - in - ignore (check_block ctx f.ret env f.body); - if (not (equal_typ f.ret TVoid)) && not (List.exists has_return_stmt f.body) then - fail_type ("non-void function " ^ f.name ^ " must return a value")) - prog - -let type_check (prog : program) = - try - check_program prog; - Ok () - with Type_error msg -> Error msg - -let parse_and_type_check src = - match parse_string src with - | Error e -> Error ("Parse error: " ^ e) - | Ok prog -> - (match type_check prog with - | Error e -> Error ("Type error: " ^ e) - | Ok () -> Ok prog) diff --git a/_build/default/spooky.dune-package b/_build/default/spooky.dune-package deleted file mode 100644 index 1448450..0000000 --- a/_build/default/spooky.dune-package +++ /dev/null @@ -1,4 +0,0 @@ -(lang dune 3.22) -(name spooky) -(sections (lib .) (bin ../../bin)) -(files (lib (META dune-package opam)) (bin (spooky))) diff --git a/_build/default/spooky.install b/_build/default/spooky.install deleted file mode 100644 index 85c1948..0000000 --- a/_build/default/spooky.install +++ /dev/null @@ -1,8 +0,0 @@ -lib: [ - "_build/install/default/lib/spooky/META" - "_build/install/default/lib/spooky/dune-package" - "_build/install/default/lib/spooky/opam" -] -bin: [ - "_build/install/default/bin/spooky" -] diff --git a/_build/default/spooky.opam b/_build/default/spooky.opam deleted file mode 100644 index fcacbb1..0000000 --- a/_build/default/spooky.opam +++ /dev/null @@ -1,32 +0,0 @@ -# This file is generated by dune, edit dune-project instead -opam-version: "2.0" -synopsis: "A short synopsis" -description: "A longer description" -maintainer: ["Maintainer Name "] -authors: ["Author Name "] -license: "LICENSE" -tags: ["add topics" "to describe" "your" "project"] -homepage: "https://github.com/username/reponame" -doc: "https://url/to/documentation" -bug-reports: "https://github.com/username/reponame/issues" -depends: [ - "dune" {>= "3.22"} - "ocaml" - "odoc" {with-doc} -] -build: [ - ["dune" "subst"] {dev} - [ - "dune" - "build" - "-p" - name - "-j" - jobs - "@install" - "@runtest" {with-test} - "@doc" {with-doc} - ] -] -dev-repo: "git+https://github.com/username/reponame.git" -x-maintenance-intent: ["(latest)"] diff --git a/_build/default/test/.merlin-conf/exe-test_spooky b/_build/default/test/.merlin-conf/exe-test_spooky deleted file mode 100644 index a84ed15..0000000 Binary files a/_build/default/test/.merlin-conf/exe-test_spooky and /dev/null differ diff --git a/_build/default/test/.test_spooky.eobjs/byte/dune__exe__Test_spooky.cmi b/_build/default/test/.test_spooky.eobjs/byte/dune__exe__Test_spooky.cmi deleted file mode 100644 index 42ca5a4..0000000 Binary files a/_build/default/test/.test_spooky.eobjs/byte/dune__exe__Test_spooky.cmi and /dev/null differ diff --git a/_build/default/test/.test_spooky.eobjs/byte/dune__exe__Test_spooky.cmti b/_build/default/test/.test_spooky.eobjs/byte/dune__exe__Test_spooky.cmti deleted file mode 100644 index 0f55258..0000000 Binary files a/_build/default/test/.test_spooky.eobjs/byte/dune__exe__Test_spooky.cmti and /dev/null differ diff --git a/_build/default/test/.test_spooky.eobjs/native/dune__exe__Test_spooky.cmx b/_build/default/test/.test_spooky.eobjs/native/dune__exe__Test_spooky.cmx deleted file mode 100644 index b9fbb91..0000000 Binary files a/_build/default/test/.test_spooky.eobjs/native/dune__exe__Test_spooky.cmx and /dev/null differ diff --git a/_build/default/test/.test_spooky.eobjs/native/dune__exe__Test_spooky.o b/_build/default/test/.test_spooky.eobjs/native/dune__exe__Test_spooky.o deleted file mode 100644 index baa7409..0000000 Binary files a/_build/default/test/.test_spooky.eobjs/native/dune__exe__Test_spooky.o and /dev/null differ diff --git a/_build/default/test/test_spooky.exe b/_build/default/test/test_spooky.exe deleted file mode 100755 index 98ad7fe..0000000 Binary files a/_build/default/test/test_spooky.exe and /dev/null differ diff --git a/_build/default/test/test_spooky.ml b/_build/default/test/test_spooky.ml deleted file mode 100644 index 37c3639..0000000 --- a/_build/default/test/test_spooky.ml +++ /dev/null @@ -1,52 +0,0 @@ -let valid_program = - {| -struct Item { - int value; -}; - -int fold(int[] xs) { - int total = 0; - foreach (int x in xs) { - total = total + x; - } - return total; -} - -int main() { - int[] xs; - struct Item it; - int y = 2 + 3 * 4; - it.value = y; - if (y >= 0) { - y = fold(xs); - } else { - y = 0; - } - return y; -} -|} - -let invalid_program = - {| -int main() { - bool flag = true; - int x = 1; - x = flag; - return x; -} -|} - -let test_valid_program () = - match Spooky.parse_and_type_check valid_program with - | Ok _ -> () - | Error msg -> failwith ("expected valid program, got: " ^ msg) - -let test_invalid_program () = - match Spooky.parse_and_type_check invalid_program with - | Ok _ -> failwith "expected type error, but got success" - | Error _ -> () - -let () = - test_valid_program (); - test_invalid_program (); - print_endline "All parser/type-check tests passed." diff --git a/_build/default/test/test_spooky.mli b/_build/default/test/test_spooky.mli deleted file mode 100644 index 335ae1f..0000000 --- a/_build/default/test/test_spooky.mli +++ /dev/null @@ -1 +0,0 @@ -(* Auto-generated by Dune *) \ No newline at end of file diff --git a/_build/install/default/bin/spooky b/_build/install/default/bin/spooky deleted file mode 120000 index 9cc466a..0000000 --- a/_build/install/default/bin/spooky +++ /dev/null @@ -1 +0,0 @@ -../../../default/bin/main.exe \ No newline at end of file diff --git a/_build/install/default/lib/spooky/META b/_build/install/default/lib/spooky/META deleted file mode 120000 index 119bf67..0000000 --- a/_build/install/default/lib/spooky/META +++ /dev/null @@ -1 +0,0 @@ -../../../../default/META.spooky \ No newline at end of file diff --git a/_build/install/default/lib/spooky/dune-package b/_build/install/default/lib/spooky/dune-package deleted file mode 120000 index 5fe1d58..0000000 --- a/_build/install/default/lib/spooky/dune-package +++ /dev/null @@ -1 +0,0 @@ -../../../../default/spooky.dune-package \ No newline at end of file diff --git a/_build/install/default/lib/spooky/opam b/_build/install/default/lib/spooky/opam deleted file mode 120000 index 18800c0..0000000 --- a/_build/install/default/lib/spooky/opam +++ /dev/null @@ -1 +0,0 @@ -../../../../default/spooky.opam \ No newline at end of file diff --git a/_build/trace.csexp b/_build/trace.csexp deleted file mode 100644 index fcda99f..0000000 --- a/_build/trace.csexp +++ /dev/null @@ -1 +0,0 @@ -(6:config4:init19:1777476517247843124(7:version6:3.22.2)(9:build_dir6:_build)(4:argv(4:dune4:exec6:spooky))(3:env(33:ANDROID_NDK_HOME=/opt/android-ndk20:XDG_SESSION_TYPE=tty52:SSH_CONNECTION=84.115.232.107 53606 65.21.205.199 22140:CAML_LD_LIBRARY_PATH=/home/steve/.opam/default/lib/stublibs:/home/steve/.opam/default/lib/ocaml/stublibs:/home/steve/.opam/default/lib/ocaml18:DIRENV_LOG_FORMAT=28:MPS_JDK=/opt/MPS 2021.3/jbr/22:XDG_SESSION_CLASS=user153:VSCODE_GIT_ASKPASS_MAIN=/home/steve/.vscode-server/cli/servers/Stable-10c8e557c8b9f9ed0a87f61f1c9a44bde731c409/server/extensions/git/dist/askpass-main.js53:DBUS_SESSION_BUS_ADDRESS=unix:path=/run/user/1000/bus30:XDG_RUNTIME_DIR=/run/user/100028:TERM_PROGRAM_VERSION=1.117.060:OCAMLTOP_INCLUDE_PATH=/home/steve/.opam/default/lib/toplevel16:LANG=en_US.UTF-834:VSCODE_PYTHON_AUTOACTIVATE_GUARD=119:SYSTEMD_EDITOR=nvim2511:PATH=/home/steve/.opam/default/bin:/home/steve/.vscode-server/data/User/globalStorage/github.copilot-chat/debugCommand:/home/steve/.vscode-server/data/User/globalStorage/github.copilot-chat/copilotCli:/home/steve/.vscode-server/extensions/vadimcn.vscode-lldb-1.12.1/bin:/home/steve/.vscode-server/extensions/vadimcn.vscode-lldb-1.12.1/bin:/home/steve/.vscode-server/data/User/globalStorage/github.copilot-chat/debugCommand:/home/steve/.vscode-server/data/User/globalStorage/github.copilot-chat/copilotCli:/home/steve/.vscode-server/cli/servers/Stable-10c8e557c8b9f9ed0a87f61f1c9a44bde731c409/server/bin/remote-cli:/usr/local/sbin:/usr/local/bin:/usr/bin:/opt/riscv/bin:/home/steve/.npm/bin:/home/steve/.cargo/bin:/home/steve/scripts:/home/steve/bin:/home/steve/go/bin:/usr/local/go/bin:/home/steve/.fluvio/bin:/home/steve/.local/share/ponyup/bin:/home/steve/NuSMV-2.6.0-Linux/bin/:/home/steve/.local/bin:/home/steve/.vino/bin:/home/steve/projects/odin/Odin:/home/steve/j90x/bin:/home/steve/opt/GNAT/2021/bin:/home/steve/opt/GNAT/2021-arm-elf/bin:/home/steve/opt/GNAT/arm-gnueabihf//bin:/home/steve/.fly/bin:/home/steve/.platformio/penv/bin:/home/steve/.ghcup/bin/:/home/steve/.wasmer/bin/:/home/steve/.roswell/bin:/home/steve/.emacs.d/bin:/var/lib/snapd/snap/bin:/opt/riscv/bin:/home/steve/.npm/bin:/home/steve/.cargo/bin:/home/steve/scripts:/home/steve/bin:/home/steve/go/bin:/usr/local/go/bin:/home/steve/.fluvio/bin:/home/steve/.local/share/ponyup/bin:/home/steve/NuSMV-2.6.0-Linux/bin/:/home/steve/.local/bin:/home/steve/.vino/bin:/home/steve/projects/odin/Odin:/home/steve/j90x/bin:/home/steve/opt/GNAT/2021/bin:/home/steve/opt/GNAT/2021-arm-elf/bin:/home/steve/opt/GNAT/arm-gnueabihf//bin:/home/steve/.fly/bin:/home/steve/.platformio/penv/bin:/home/steve/.ghcup/bin/:/home/steve/.wasmer/bin/:/home/steve/.roswell/bin:/home/steve/.emacs.d/bin:/usr/bin/site_perl:/usr/bin/vendor_perl:/usr/bin/core_perl:/usr/lib/rustup/bin:/opt/riscv/bin:/home/steve/.npm/bin:/home/steve/.cargo/bin:/home/steve/scripts:/home/steve/bin:/home/steve/go/bin:/usr/local/go/bin:/home/steve/.fluvio/bin:/home/steve/.local/share/ponyup/bin:/home/steve/NuSMV-2.6.0-Linux/bin/:/home/steve/.local/bin:/home/steve/.vino/bin:/home/steve/projects/odin/Odin:/home/steve/j90x/bin:/home/steve/opt/GNAT/2021/bin:/home/steve/opt/GNAT/2021-arm-elf/bin:/home/steve/opt/GNAT/arm-gnueabihf//bin:/home/steve/.fly/bin:/home/steve/.platformio/penv/bin:/home/steve/.ghcup/bin/:/home/steve/.wasmer/bin/:/home/steve/.roswell/bin:/home/steve/.emacs.d/bin19:TERM_PROGRAM=vscode13:LOGNAME=steve7:SHLVL=311:VISUAL=nvim63:VSCODE_GIT_IPC_HANDLE=/run/user/1000/vscode-git-7fc91d943b.sock34:SSH_CLIENT=84.115.232.107 53606 22122:VSCODE_GIT_ASKPASS_NODE=/home/steve/.vscode-server/cli/servers/Stable-10c8e557c8b9f9ed0a87f61f1c9a44bde731c409/server/node37:PWD=/home/steve/projects/ocaml/spooky80:OPAM_LAST_ENV=/home/steve/.opam/.last-env/env-550cde22b571c5ee951c5aa1091061dc-065:FZF_DEFAULT_COMMAND=rg --files --hidden --follow --glob "!.git/*"30:VSCODE_GIT_ASKPASS_EXTRA_ARGS=29:ANDROID_HOME=/opt/android-sdk17:XDG_SESSION_ID=2719:STARSHIP_SHELL=fish124:BROWSER=/home/steve/.vscode-server/cli/servers/Stable-10c8e557c8b9f9ed0a87f61f1c9a44bde731c409/server/bin/helpers/browser.sh37:STARSHIP_SESSION_KEY=381514717543912158:OCAML_TOPLEVEL_PATH=/home/steve/.opam/default/lib/toplevel136:GIT_ASKPASS=/home/steve/.vscode-server/cli/servers/Stable-10c8e557c8b9f9ed0a87f61f1c9a44bde731c409/server/extensions/git/dist/askpass.sh10:USER=steve44:OPAM_SWITCH_PREFIX=/home/steve/.opam/default38:MANPATH=:/home/steve/.opam/default/man14:MOTD_SHOWN=pam16:HOME=/home/steve31:FLYCTL_INSTALL=/home/steve/.fly20:OPAMNOENVNOTICE=true19:TERM=xterm-256color87:VSCODE_IPC_HOOK_CLI=/run/user/1000/vscode-ipc-0974db83-8f7b-46f1-a5db-f5ac1fc5b845.sock19:COLORTERM=truecolor19:SHELL=/usr/bin/fish26:MAIL=/var/spool/mail/steve11:EDITOR=nvim64:XDG_DATA_DIRS=/usr/local/share:/usr/share:/var/lib/snapd/desktop150:CODELLDB_LAUNCH_CONNECT_FILE=/home/steve/.vscode-server/data/User/workspaceStorage/803f4dd44aeb4117289049d18054ecce/vadimcn.vscode-lldb/rpcaddress.txt18:VSCODE_INJECTION=1))(4:root33:/home/steve/projects/ocaml/spooky)(3:pid7:1719226)(11:initial_cwd33:/home/steve/projects/ocaml/spooky)(5:start19:1777476517243937930))(3:log4:info19:1777476517247870504(7:message10:ocamlparam)(10:OCAMLPARAM5:unset))(6:thread12:spawn_thread19:1777476517247903075(4:name7:console))(3:log4:info19:1777476517248024396(7:message20:Shared cache enabled)(13:cache_enabled25:Enabled_except_user_rules))(3:log4:info19:1777476517248034036(7:message21:Shared cache location)(8:root_dir26:/home/steve/.cache/dune/db))(3:log4:info19:1777476517248050326(7:message14:Workspace root)(4:root33:/home/steve/projects/ocaml/spooky))(3:log4:info19:1777476517248806052(7:message25:Auto-detected concurrency)(11:concurrency2:12))(6:thread12:spawn_thread19:1777476517248827182(4:name14:signal-watcher))(7:process15:signal_received19:1777476517248966054(6:signal4:CHLD))(10:persistent2:db(19:17774765172490558645:62811)(4:path17:_build/.digest-db)(6:module9:DIGEST-DB)(9:operation4:load))(3:log4:info19:1777476517249287316(7:message12:Dune context)(7:context((4:name7:default)(4:kind7:default)(7:profile3:Dev)(6:merlin4:true)(14:fdo_target_exe())(9:build_dir(12:In_build_dir7:default))(15:instrument_with()))))(5:rules9:Dune load(19:17774765172493093276:120251)(3:dir1:.))(7:process5:start19:1777476517249657460(12:process_args(7:-config))(3:pid7:1719231)(10:categories())(6:queued3:820)(4:prog40:/home/steve/.opam/default/bin/ocamlc.opt)(3:dir1:.))(7:process15:signal_received19:1777476517254495581(6:signal4:CHLD))(7:process6:finish(19:17774765172496574607:4881072)(12:process_args(7:-config))(3:pid7:1719231)(10:categories())(4:prog40:/home/steve/.opam/default/bin/ocamlc.opt)(3:dir1:.)(4:exit1:0)(6:rusage((13:user_cpu_time6:947000)(15:system_cpu_time7:3782000)(6:maxrss5:27052)(6:minflt4:1152)(6:majflt1:0)(7:inblock1:0)(7:oublock1:0)(5:nvcsw1:1)(6:nivcsw1:0))))(10:persistent2:db(19:17774765172564528885:31131)(4:path10:_build/.db)(6:module14:INCREMENTAL-DB)(9:operation4:load))(10:persistent2:db(19:17774765172668997786:113111)(4:path17:_build/.digest-db)(6:module9:DIGEST-DB)(9:operation4:save))(6:config4:exit19:1777476517267041130(2:gc((10:stack_size1:0)(10:heap_words6:373048)(14:top_heap_words6:373048)(11:minor_words7:917491.)(11:major_words7:277171.)(14:promoted_words7:263253.)(11:compactions1:0)(17:major_collections1:3)(17:minor_collections1:5)))(2:io((10:files_read((5:count1:2)(4:time7:1943206)(5:bytes3:538)))(13:files_written((5:count1:1)(4:time1:0)(5:bytes1:7)))(16:directories_read((5:count1:1)(4:time1:0)))))(6:digest((5:files((5:count1:3)(4:time5:53541)(5:bytes1:0)))(6:values((5:count2:52)(4:time5:58400)(5:bytes5:12846)))))) \ No newline at end of file diff --git a/bin/main.ml b/bin/main.ml index 46289d0..5f1fcac 100644 --- a/bin/main.ml +++ b/bin/main.ml @@ -1,36 +1,68 @@ -let sample_program = - {| -struct Point { - int x; - int y; -}; +type generator = Json | C -int sum_array(int[] arr) { - int total = 0; - foreach (int n in arr) { - total = total + n; - } - return total; -} +let read_file path = + let ic = open_in_bin path in + Fun.protect + ~finally:(fun () -> close_in ic) + (fun () -> really_input_string ic (in_channel_length ic)) -int main() { - struct Point p; - int[] nums; - int x = 1 + 2 * 3; - p.x = x; - if (x > 0) { - x = x + p.x; - } else { - x = 0; - } - x = sum_array(nums); - return x; -} -|} +let write_file path content = + let oc = open_out_bin path in + Fun.protect + ~finally:(fun () -> close_out oc) + (fun () -> output_string oc content) let () = - match Spooky.parse_and_type_check sample_program with - | Ok ast -> - Printf.printf "Program parsed and type-checked successfully.\n\n"; - Printf.printf "Parsed AST:\n%s\n" (Spooky.string_of_program ast) - | Error msg -> Printf.printf "Error: %s\n" msg + let generator = ref Json in + let input_path = ref "" in + let output_path = ref None in + let set_generator = function + | "json" -> generator := Json + | "c" -> generator := C + | _ -> invalid_arg "unsupported generator" + in + let specs = + [ + ( "--generator", + Arg.Symbol ([ "json"; "c" ], set_generator), + "Code generator to run: json or c" ); + ("--input", Arg.String (fun p -> input_path := p), "Input source file path"); + ("--output", Arg.String (fun p -> output_path := Some p), "Optional output file path"); + ] + in + let usage = "Usage: spooky --generator --input [--output ]" in + Arg.parse specs (fun _ -> ()) usage; + if String.equal !input_path "" then ( + prerr_endline "Missing required --input argument."; + Arg.usage specs usage; + exit 2); + let src = + try read_file !input_path + with Sys_error msg -> + prerr_endline ("Input error: " ^ msg); + exit 1 + in + let ast = + match Spooky.parse_string src with + | Error msg -> + prerr_endline msg; + exit 1 + | Ok ast -> ast + in + (match Spooky.type_check ast with + | Error msg -> + prerr_endline ("type error: " ^ msg); + exit 1 + | Ok () -> ()); + let generated = + match !generator with + | Json -> Spooky.generate_json ast + | C -> Spooky.generate_c ast + in + match !output_path with + | Some path -> + (try write_file path generated + with Sys_error msg -> + prerr_endline ("Output error: " ^ msg); + exit 1) + | None -> print_endline generated diff --git a/examples/sample.spooky b/examples/sample.spooky new file mode 100644 index 0000000..74718eb --- /dev/null +++ b/examples/sample.spooky @@ -0,0 +1,26 @@ +struct Point { + int x; + int y; +}; + +int sum_array(int[] arr) { + int total = 0; + foreach (int n in arr) { + total = total + n; + } + return total; +} + +int main() { + struct Point p; + int[] nums; + int x = 1 + 2 * 3; + p.x = x; + if (x > 0) { + x = x + p.x; + } else { + x = 0; + } + x = sum_array(nums); + return x; +} diff --git a/lib/ast.ml b/lib/ast.ml new file mode 100644 index 0000000..0e64882 --- /dev/null +++ b/lib/ast.ml @@ -0,0 +1,158 @@ +type typ = + | TInt + | TBool + | TVoid + | TStruct of string + | TArray of typ + +type binop = + | Add + | Sub + | Mul + | Div + | Mod + | And + | Or + | Eq + | Ne + | Lt + | Le + | Gt + | Ge + +type unop = Neg | Not + +type expr = + | IntLit of int + | BoolLit of bool + | Var of string + | Binop of binop * expr * expr + | Unop of unop * expr + | Assign of expr * expr + | Call of expr * expr list + | ArrayGet of expr * expr + | StructGet of expr * string + +type stmt = + | VarDecl of typ * string * expr option + | Expr of expr + | If of expr * stmt list * stmt list + | ForEach of typ * string * expr * stmt list + | Return of expr option + | Block of stmt list + +type func = { + name : string; + params : (typ * string) list; + ret : typ; + body : stmt list; +} + +type struct_def = { + sname : string; + fields : (typ * string) list; +} + +type top = + | TopStruct of struct_def + | TopFunc of func + | TopGlobalVar of typ * string * expr option + +type program = top list + +let string_of_typ = + let rec go = function + | TInt -> "int" + | TBool -> "bool" + | TVoid -> "void" + | TStruct n -> "struct " ^ n + | TArray t -> go t ^ "[]" + in + go + +let string_of_binop = function + | Add -> "+" + | Sub -> "-" + | Mul -> "*" + | Div -> "/" + | Mod -> "%" + | And -> "&&" + | Or -> "||" + | Eq -> "==" + | Ne -> "!=" + | Lt -> "<" + | Le -> "<=" + | Gt -> ">" + | Ge -> ">=" + +let string_of_unop = function Neg -> "-" | Not -> "!" + +let rec string_of_expr = function + | IntLit n -> Printf.sprintf "IntLit(%d)" n + | BoolLit b -> Printf.sprintf "BoolLit(%b)" b + | Var v -> Printf.sprintf "Var(%s)" v + | Binop (op, a, b) -> + Printf.sprintf "Binop(%s, %s, %s)" (string_of_binop op) (string_of_expr a) (string_of_expr b) + | Unop (op, e) -> Printf.sprintf "Unop(%s, %s)" (string_of_unop op) (string_of_expr e) + | Assign (lhs, rhs) -> Printf.sprintf "Assign(%s, %s)" (string_of_expr lhs) (string_of_expr rhs) + | Call (callee, args) -> + let args_s = String.concat ", " (List.map string_of_expr args) in + Printf.sprintf "Call(%s, [%s])" (string_of_expr callee) args_s + | ArrayGet (arr, idx) -> Printf.sprintf "ArrayGet(%s, %s)" (string_of_expr arr) (string_of_expr idx) + | StructGet (obj, fld) -> Printf.sprintf "StructGet(%s, %s)" (string_of_expr obj) fld + +let indent n = String.make (2 * n) ' ' + +let rec string_of_stmt ?(level = 0) st = + let i = indent level in + match st with + | VarDecl (t, n, None) -> Printf.sprintf "%sVarDecl(%s %s)" i (string_of_typ t) n + | VarDecl (t, n, Some e) -> + Printf.sprintf "%sVarDecl(%s %s = %s)" i (string_of_typ t) n (string_of_expr e) + | Expr e -> Printf.sprintf "%sExpr(%s)" i (string_of_expr e) + | Return None -> Printf.sprintf "%sReturn" i + | Return (Some e) -> Printf.sprintf "%sReturn(%s)" i (string_of_expr e) + | Block body -> + let body_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) body) in + if String.equal body_s "" then Printf.sprintf "%sBlock" i + else Printf.sprintf "%sBlock\n%s" i body_s + | If (cond, tbranch, ebranch) -> + let then_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) tbranch) in + let else_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) ebranch) in + if ebranch = [] then Printf.sprintf "%sIf(%s)\n%s" i (string_of_expr cond) then_s + else Printf.sprintf "%sIf(%s)\n%s\n%sElse\n%s" i (string_of_expr cond) then_s i else_s + | ForEach (it_t, it_name, iterable, body) -> + let body_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) body) in + Printf.sprintf "%sForEach(%s %s in %s)\n%s" i (string_of_typ it_t) it_name (string_of_expr iterable) + body_s + +let string_of_top = function + | TopStruct s -> + let fields = + s.fields |> List.map (fun (t, n) -> Printf.sprintf " Field(%s %s)" (string_of_typ t) n) + |> String.concat "\n" + in + if String.equal fields "" then Printf.sprintf "Struct %s" s.sname + else Printf.sprintf "Struct %s\n%s" s.sname fields + | TopGlobalVar (t, n, init) -> + (match init with + | None -> Printf.sprintf "GlobalVar(%s %s)" (string_of_typ t) n + | Some e -> Printf.sprintf "GlobalVar(%s %s = %s)" (string_of_typ t) n (string_of_expr e)) + | TopFunc f -> + let params = + f.params + |> List.map (fun (t, n) -> Printf.sprintf "%s %s" (string_of_typ t) n) + |> String.concat ", " + in + let body = String.concat "\n" (List.map (string_of_stmt ~level:1) f.body) in + if String.equal body "" then Printf.sprintf "Function %s(%s) -> %s" f.name params (string_of_typ f.ret) + else Printf.sprintf "Function %s(%s) -> %s\n%s" f.name params (string_of_typ f.ret) body + +let string_of_program (prog : program) = prog |> List.map string_of_top |> String.concat "\n\n" + +let rec equal_typ a b = + match (a, b) with + | TInt, TInt | TBool, TBool | TVoid, TVoid -> true + | TStruct x, TStruct y -> String.equal x y + | TArray x, TArray y -> equal_typ x y + | _ -> false diff --git a/_build/default/lib/spooky.mli b/lib/ast.mli similarity index 86% rename from _build/default/lib/spooky.mli rename to lib/ast.mli index c0b4748..fe717da 100644 --- a/_build/default/lib/spooky.mli +++ b/lib/ast.mli @@ -61,7 +61,8 @@ type top = type program = top list val string_of_typ : typ -> string +val string_of_binop : binop -> string +val string_of_unop : unop -> string +val string_of_expr : expr -> string val string_of_program : program -> string -val parse_string : string -> (program, string) result -val type_check : program -> (unit, string) result -val parse_and_type_check : string -> (program, string) result +val equal_typ : typ -> typ -> bool diff --git a/lib/generator_c.ml b/lib/generator_c.ml new file mode 100644 index 0000000..3547f86 --- /dev/null +++ b/lib/generator_c.ml @@ -0,0 +1,83 @@ +open Ast + +let rec c_type = function + | TInt -> "int" + | TBool -> "bool" + | TVoid -> "void" + | TStruct n -> "struct " ^ n + | TArray t -> c_type t ^ "*" + +let rec expr_to_c = function + | IntLit n -> string_of_int n + | BoolLit true -> "true" + | BoolLit false -> "false" + | Var n -> n + | Binop (op, a, b) -> Printf.sprintf "(%s %s %s)" (expr_to_c a) (Ast.string_of_binop op) (expr_to_c b) + | Unop (op, e) -> Printf.sprintf "(%s%s)" (Ast.string_of_unop op) (expr_to_c e) + | Assign (lhs, rhs) -> Printf.sprintf "(%s = %s)" (expr_to_c lhs) (expr_to_c rhs) + | Call (callee, args) -> + let args_s = args |> List.map expr_to_c |> String.concat ", " in + Printf.sprintf "%s(%s)" (expr_to_c callee) args_s + | ArrayGet (arr, idx) -> Printf.sprintf "%s[%s]" (expr_to_c arr) (expr_to_c idx) + | StructGet (obj, fld) -> Printf.sprintf "%s.%s" (expr_to_c obj) fld + +let indent n = String.make (2 * n) ' ' + +let rec stmt_to_c ?(level = 1) = function + | VarDecl (t, n, None) -> Printf.sprintf "%s%s %s;" (indent level) (c_type t) n + | VarDecl (t, n, Some e) -> Printf.sprintf "%s%s %s = %s;" (indent level) (c_type t) n (expr_to_c e) + | Expr e -> Printf.sprintf "%s%s;" (indent level) (expr_to_c e) + | Return None -> Printf.sprintf "%sreturn;" (indent level) + | Return (Some e) -> Printf.sprintf "%sreturn %s;" (indent level) (expr_to_c e) + | Block body -> + let inner = body |> List.map (stmt_to_c ~level:(level + 1)) |> String.concat "\n" in + if String.equal inner "" then Printf.sprintf "%s{ }" (indent level) + else Printf.sprintf "%s{\n%s\n%s}" (indent level) inner (indent level) + | If (cond, tbranch, ebranch) -> + let then_body = tbranch |> List.map (stmt_to_c ~level:(level + 1)) |> String.concat "\n" in + let else_body = ebranch |> List.map (stmt_to_c ~level:(level + 1)) |> String.concat "\n" in + if ebranch = [] then + Printf.sprintf "%sif (%s) {\n%s\n%s}" (indent level) (expr_to_c cond) then_body (indent level) + else + Printf.sprintf "%sif (%s) {\n%s\n%s} else {\n%s\n%s}" (indent level) (expr_to_c cond) + then_body (indent level) else_body (indent level) + | ForEach (it_t, it_name, iterable, body) -> + let body_inner = body |> List.map (stmt_to_c ~level:(level + 2)) |> String.concat "\n" in + (match iterable with + | Var arr_name -> + Printf.sprintf + "%sfor (int __i = 0; __i < %s_len; ++__i) {\n%s%s %s = %s[__i];\n%s\n%s}" + (indent level) arr_name (indent (level + 1)) (c_type it_t) it_name arr_name body_inner (indent level) + | _ -> + Printf.sprintf + "%s/* foreach over complex iterable not supported in strict C lowering */\n%sfor (int __i = 0; ; ++__i) {\n%s%s %s = /* TODO */ 0;\n%s\n%s}" + (indent level) (indent level) (indent (level + 1)) (c_type it_t) it_name body_inner (indent level)) + +let struct_to_c s = + let fields = + s.fields + |> List.map (fun (t, n) -> Printf.sprintf " %s %s;" (c_type t) n) + |> String.concat "\n" + in + Printf.sprintf "struct %s {\n%s\n};" s.sname fields + +let func_to_c f = + let params = + f.params + |> List.map (fun (t, n) -> Printf.sprintf "%s %s" (c_type t) n) + |> String.concat ", " + in + let body = f.body |> List.map (stmt_to_c ~level:1) |> String.concat "\n" in + Printf.sprintf "%s %s(%s) {\n%s\n}" (c_type f.ret) f.name params body + +let top_to_c = function + | TopStruct s -> struct_to_c s + | TopFunc f -> func_to_c f + | TopGlobalVar (t, n, init) -> + (match init with + | None -> Printf.sprintf "%s %s;" (c_type t) n + | Some e -> Printf.sprintf "%s %s = %s;" (c_type t) n (expr_to_c e)) + +let generate (prog : program) = + let header = "#include \n\n" in + header ^ String.concat "\n\n" (List.map top_to_c prog) ^ "\n" diff --git a/lib/generator_c.mli b/lib/generator_c.mli new file mode 100644 index 0000000..5c3510e --- /dev/null +++ b/lib/generator_c.mli @@ -0,0 +1 @@ +val generate : Ast.program -> string diff --git a/lib/generator_json.ml b/lib/generator_json.ml new file mode 100644 index 0000000..d527b20 --- /dev/null +++ b/lib/generator_json.ml @@ -0,0 +1,89 @@ +open Ast + +let escape_json s = + let b = Buffer.create (String.length s + 16) in + String.iter + (function + | '"' -> Buffer.add_string b "\\\"" + | '\\' -> Buffer.add_string b "\\\\" + | '\n' -> Buffer.add_string b "\\n" + | '\r' -> Buffer.add_string b "\\r" + | '\t' -> Buffer.add_string b "\\t" + | c -> Buffer.add_char b c) + s; + Buffer.contents b + +let q s = Printf.sprintf "\"%s\"" (escape_json s) +let obj fields = "{" ^ String.concat "," fields ^ "}" +let arr xs = "[" ^ String.concat "," xs ^ "]" +let kv k v = q k ^ ":" ^ v + +let rec typ_to_json = function + | TInt -> obj [ kv "kind" (q "int") ] + | TBool -> obj [ kv "kind" (q "bool") ] + | TVoid -> obj [ kv "kind" (q "void") ] + | TStruct name -> obj [ kv "kind" (q "struct"); kv "name" (q name) ] + | TArray t -> obj [ kv "kind" (q "array"); kv "elem" (typ_to_json t) ] + +let rec expr_to_json = function + | IntLit n -> obj [ kv "node" (q "IntLit"); kv "value" (string_of_int n) ] + | BoolLit b -> obj [ kv "node" (q "BoolLit"); kv "value" (if b then "true" else "false") ] + | Var n -> obj [ kv "node" (q "Var"); kv "name" (q n) ] + | Binop (op, a, b) -> + obj + [ kv "node" (q "Binop"); kv "op" (q (Ast.string_of_binop op)); kv "left" (expr_to_json a); + kv "right" (expr_to_json b) ] + | Unop (op, e) -> + obj [ kv "node" (q "Unop"); kv "op" (q (Ast.string_of_unop op)); kv "expr" (expr_to_json e) ] + | Assign (lhs, rhs) -> + obj [ kv "node" (q "Assign"); kv "lhs" (expr_to_json lhs); kv "rhs" (expr_to_json rhs) ] + | Call (callee, args) -> + obj [ kv "node" (q "Call"); kv "callee" (expr_to_json callee); kv "args" (arr (List.map expr_to_json args)) ] + | ArrayGet (arr_e, idx) -> + obj [ kv "node" (q "ArrayGet"); kv "array" (expr_to_json arr_e); kv "index" (expr_to_json idx) ] + | StructGet (target, field) -> + obj [ kv "node" (q "StructGet"); kv "target" (expr_to_json target); kv "field" (q field) ] + +let rec stmt_to_json = function + | VarDecl (t, n, init) -> + obj + [ kv "node" (q "VarDecl"); kv "type" (typ_to_json t); kv "name" (q n); + kv "init" (match init with None -> "null" | Some e -> expr_to_json e) ] + | Expr e -> obj [ kv "node" (q "Expr"); kv "expr" (expr_to_json e) ] + | If (cond, tbranch, ebranch) -> + obj + [ kv "node" (q "If"); kv "cond" (expr_to_json cond); kv "then" (arr (List.map stmt_to_json tbranch)); + kv "else" (arr (List.map stmt_to_json ebranch)) ] + | ForEach (it_t, it_name, iterable, body) -> + obj + [ kv "node" (q "ForEach"); kv "iterType" (typ_to_json it_t); kv "iterName" (q it_name); + kv "iterable" (expr_to_json iterable); kv "body" (arr (List.map stmt_to_json body)) ] + | Return eo -> + obj [ kv "node" (q "Return"); kv "expr" (match eo with None -> "null" | Some e -> expr_to_json e) ] + | Block body -> obj [ kv "node" (q "Block"); kv "body" (arr (List.map stmt_to_json body)) ] + +let top_to_json = function + | TopStruct s -> + obj + [ kv "node" (q "Struct"); kv "name" (q s.sname); + kv "fields" + (arr + (List.map + (fun (t, n) -> obj [ kv "type" (typ_to_json t); kv "name" (q n) ]) + s.fields)) ] + | TopFunc f -> + obj + [ kv "node" (q "Function"); kv "name" (q f.name); kv "ret" (typ_to_json f.ret); + kv "params" + (arr + (List.map + (fun (t, n) -> obj [ kv "type" (typ_to_json t); kv "name" (q n) ]) + f.params)); + kv "body" (arr (List.map stmt_to_json f.body)) ] + | TopGlobalVar (t, n, init) -> + obj + [ kv "node" (q "GlobalVar"); kv "type" (typ_to_json t); kv "name" (q n); + kv "init" (match init with None -> "null" | Some e -> expr_to_json e) ] + +let generate (prog : program) = + obj [ kv "node" (q "Program"); kv "tops" (arr (List.map top_to_json prog)) ] diff --git a/lib/generator_json.mli b/lib/generator_json.mli new file mode 100644 index 0000000..5c3510e --- /dev/null +++ b/lib/generator_json.mli @@ -0,0 +1 @@ +val generate : Ast.program -> string diff --git a/lib/lexer.ml b/lib/lexer.ml new file mode 100644 index 0000000..ab79660 --- /dev/null +++ b/lib/lexer.ml @@ -0,0 +1,135 @@ +type token = + | TIntKw + | TBoolKw + | TVoidKw + | TStructKw + | TIf + | TElse + | TFor + | TEach + | TForEach + | TIn + | TReturn + | TTrue + | TFalse + | TIdent of string + | TIntLit of int + | TLParen + | TRParen + | TLBrace + | TRBrace + | TLBracket + | TRBracket + | TSemicolon + | TComma + | TDot + | TAssign + | TPlus + | TMinus + | TStar + | TSlash + | TPercent + | TAndAnd + | TOrOr + | TBang + | TEqEq + | TNe + | TLt + | TLe + | TGt + | TGe + | TEOF + +exception Lex_error of string + +let is_space = function ' ' | '\t' | '\r' | '\n' -> true | _ -> false +let is_digit c = c >= '0' && c <= '9' + +let is_ident_start c = + (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c = '_' + +let is_ident_char c = is_ident_start c || is_digit c + +let keyword_or_ident s = + match s with + | "int" -> TIntKw + | "bool" -> TBoolKw + | "void" -> TVoidKw + | "struct" -> TStructKw + | "if" -> TIf + | "else" -> TElse + | "for" -> TFor + | "each" -> TEach + | "foreach" -> TForEach + | "in" -> TIn + | "return" -> TReturn + | "true" -> TTrue + | "false" -> TFalse + | _ -> TIdent s + +let lex (src : string) : token list = + let n = String.length src in + let rec skip_line_comment i = + if i >= n then i + else if src.[i] = '\n' then i + 1 + else skip_line_comment (i + 1) + in + let rec skip_block_comment i = + if i + 1 >= n then raise (Lex_error "unterminated block comment") + else if src.[i] = '*' && src.[i + 1] = '/' then i + 2 + else skip_block_comment (i + 1) + in + let rec read_number i j = + if j < n && is_digit src.[j] then read_number i (j + 1) + else + let s = String.sub src i (j - i) in + (TIntLit (int_of_string s), j) + in + let rec read_ident i j = + if j < n && is_ident_char src.[j] then read_ident i (j + 1) + else + let s = String.sub src i (j - i) in + (keyword_or_ident s, j) + in + let rec loop i acc = + if i >= n then List.rev (TEOF :: acc) + else if is_space src.[i] then loop (i + 1) acc + else + match src.[i] with + | '/' when i + 1 < n && src.[i + 1] = '/' -> loop (skip_line_comment (i + 2)) acc + | '/' when i + 1 < n && src.[i + 1] = '*' -> loop (skip_block_comment (i + 2)) acc + | '(' -> loop (i + 1) (TLParen :: acc) + | ')' -> loop (i + 1) (TRParen :: acc) + | '{' -> loop (i + 1) (TLBrace :: acc) + | '}' -> loop (i + 1) (TRBrace :: acc) + | '[' -> loop (i + 1) (TLBracket :: acc) + | ']' -> loop (i + 1) (TRBracket :: acc) + | ';' -> loop (i + 1) (TSemicolon :: acc) + | ',' -> loop (i + 1) (TComma :: acc) + | '.' -> loop (i + 1) (TDot :: acc) + | '+' -> loop (i + 1) (TPlus :: acc) + | '-' -> loop (i + 1) (TMinus :: acc) + | '*' -> loop (i + 1) (TStar :: acc) + | '%' -> loop (i + 1) (TPercent :: acc) + | '/' -> loop (i + 1) (TSlash :: acc) + | '!' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TNe :: acc) + | '!' -> loop (i + 1) (TBang :: acc) + | '=' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TEqEq :: acc) + | '=' -> loop (i + 1) (TAssign :: acc) + | '&' when i + 1 < n && src.[i + 1] = '&' -> loop (i + 2) (TAndAnd :: acc) + | '|' when i + 1 < n && src.[i + 1] = '|' -> loop (i + 2) (TOrOr :: acc) + | '<' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TLe :: acc) + | '<' -> loop (i + 1) (TLt :: acc) + | '>' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TGe :: acc) + | '>' -> loop (i + 1) (TGt :: acc) + | c when is_digit c -> + let tok, j = read_number i (i + 1) in + loop j (tok :: acc) + | c when is_ident_start c -> + let tok, j = read_ident i (i + 1) in + loop j (tok :: acc) + | c -> + let msg = Printf.sprintf "unexpected character: %c" c in + raise (Lex_error msg) + in + loop 0 [] diff --git a/lib/lexer.mli b/lib/lexer.mli new file mode 100644 index 0000000..d5e3c5b --- /dev/null +++ b/lib/lexer.mli @@ -0,0 +1,45 @@ +type token = + | TIntKw + | TBoolKw + | TVoidKw + | TStructKw + | TIf + | TElse + | TFor + | TEach + | TForEach + | TIn + | TReturn + | TTrue + | TFalse + | TIdent of string + | TIntLit of int + | TLParen + | TRParen + | TLBrace + | TRBrace + | TLBracket + | TRBracket + | TSemicolon + | TComma + | TDot + | TAssign + | TPlus + | TMinus + | TStar + | TSlash + | TPercent + | TAndAnd + | TOrOr + | TBang + | TEqEq + | TNe + | TLt + | TLe + | TGt + | TGe + | TEOF + +exception Lex_error of string + +val lex : string -> token list diff --git a/lib/parser.ml b/lib/parser.ml new file mode 100644 index 0000000..9f84aeb --- /dev/null +++ b/lib/parser.ml @@ -0,0 +1,383 @@ +open Ast +open Lexer + +exception Parse_error of string + +type parser_state = { + toks : token array; + mutable i : int; +} + +let mk_state toks = { toks = Array.of_list toks; i = 0 } +let peek st = if st.i < Array.length st.toks then st.toks.(st.i) else TEOF + +let consume st = + let t = peek st in + st.i <- st.i + 1; + t + +let expect st tok = + match (tok, consume st) with + | TLParen, TLParen + | TRParen, TRParen + | TLBrace, TLBrace + | TRBrace, TRBrace + | TLBracket, TLBracket + | TRBracket, TRBracket + | TSemicolon, TSemicolon + | TComma, TComma + | TDot, TDot + | TAssign, TAssign + | TPlus, TPlus + | TMinus, TMinus + | TStar, TStar + | TSlash, TSlash + | TPercent, TPercent + | TAndAnd, TAndAnd + | TOrOr, TOrOr + | TBang, TBang + | TEqEq, TEqEq + | TNe, TNe + | TLt, TLt + | TLe, TLe + | TGt, TGt + | TGe, TGe + | TIf, TIf + | TElse, TElse + | TForEach, TForEach + | TFor, TFor + | TEach, TEach + | TIn, TIn + | TReturn, TReturn + | TIntKw, TIntKw + | TBoolKw, TBoolKw + | TVoidKw, TVoidKw + | TStructKw, TStructKw + | TEOF, TEOF -> () + | _ -> raise (Parse_error "unexpected token") + +let expect_ident st = + match consume st with + | TIdent s -> s + | _ -> raise (Parse_error "expected identifier") + +let starts_type = function TIntKw | TBoolKw | TVoidKw | TStructKw -> true | _ -> false + +let rec parse_type st = + let base = + match consume st with + | TIntKw -> TInt + | TBoolKw -> TBool + | TVoidKw -> TVoid + | TStructKw -> TStruct (expect_ident st) + | _ -> raise (Parse_error "expected type") + in + let rec arrays t = + match peek st with + | TLBracket -> + expect st TLBracket; + expect st TRBracket; + arrays (TArray t) + | _ -> t + in + arrays base + +let rec parse_program st = + let rec loop acc = + match peek st with + | TEOF -> List.rev acc + | _ -> loop (parse_top st :: acc) + in + loop [] + +and parse_top st = + match peek st with + | TStructKw -> + expect st TStructKw; + let sname = expect_ident st in + (match peek st with + | TLBrace -> + expect st TLBrace; + let rec fields acc = + match peek st with + | TRBrace -> List.rev acc + | _ -> + let t = parse_type st in + let n = expect_ident st in + expect st TSemicolon; + fields ((t, n) :: acc) + in + let fs = fields [] in + expect st TRBrace; + expect st TSemicolon; + TopStruct { sname; fields = fs } + | _ -> + let ty = TStruct sname in + parse_top_after_type st ty) + | _ -> + let ty = parse_type st in + parse_top_after_type st ty + +and parse_top_after_type st ty = + let name = expect_ident st in + match peek st with + | TLParen -> + expect st TLParen; + let params = parse_params st in + expect st TRParen; + let body = parse_stmt_as_block st in + TopFunc { name; params; ret = ty; body } + | _ -> + let init = + match peek st with + | TAssign -> + expect st TAssign; + Some (parse_expr st) + | _ -> None + in + expect st TSemicolon; + TopGlobalVar (ty, name, init) + +and parse_params st = + match peek st with + | TRParen -> [] + | _ -> + let rec loop acc = + let t = parse_type st in + let n = expect_ident st in + match peek st with + | TComma -> + expect st TComma; + loop ((t, n) :: acc) + | _ -> List.rev ((t, n) :: acc) + in + loop [] + +and parse_stmt_as_block st = + match peek st with + | TLBrace -> + expect st TLBrace; + let rec loop acc = + match peek st with + | TRBrace -> + expect st TRBrace; + List.rev acc + | _ -> loop (parse_stmt st :: acc) + in + loop [] + | _ -> [ parse_stmt st ] + +and parse_stmt st = + match peek st with + | TLBrace -> Block (parse_stmt_as_block st) + | TIf -> + expect st TIf; + expect st TLParen; + let cond = parse_expr st in + expect st TRParen; + let then_body = parse_stmt_as_block st in + let else_body = + match peek st with + | TElse -> + expect st TElse; + parse_stmt_as_block st + | _ -> [] + in + If (cond, then_body, else_body) + | TForEach | TFor -> + (match peek st with + | TForEach -> expect st TForEach + | TFor -> + expect st TFor; + expect st TEach + | _ -> ()); + expect st TLParen; + let it_t = parse_type st in + let it_name = expect_ident st in + expect st TIn; + let iterable = parse_expr st in + expect st TRParen; + let body = parse_stmt_as_block st in + ForEach (it_t, it_name, iterable, body) + | TReturn -> + expect st TReturn; + let v = + match peek st with + | TSemicolon -> None + | _ -> Some (parse_expr st) + in + expect st TSemicolon; + Return v + | t when starts_type t -> + let ty = parse_type st in + let n = expect_ident st in + let init = + match peek st with + | TAssign -> + expect st TAssign; + Some (parse_expr st) + | _ -> None + in + expect st TSemicolon; + VarDecl (ty, n, init) + | _ -> + let e = parse_expr st in + expect st TSemicolon; + Expr e + +and parse_expr st = parse_assignment st + +and parse_assignment st = + let lhs = parse_or st in + match peek st with + | TAssign -> + expect st TAssign; + let rhs = parse_assignment st in + Assign (lhs, rhs) + | _ -> lhs + +and parse_or st = + let rec loop left = + match peek st with + | TOrOr -> + expect st TOrOr; + loop (Binop (Or, left, parse_and st)) + | _ -> left + in + loop (parse_and st) + +and parse_and st = + let rec loop left = + match peek st with + | TAndAnd -> + expect st TAndAnd; + loop (Binop (And, left, parse_equality st)) + | _ -> left + in + loop (parse_equality st) + +and parse_equality st = + let rec loop left = + match peek st with + | TEqEq -> + expect st TEqEq; + loop (Binop (Eq, left, parse_rel st)) + | TNe -> + expect st TNe; + loop (Binop (Ne, left, parse_rel st)) + | _ -> left + in + loop (parse_rel st) + +and parse_rel st = + let rec loop left = + match peek st with + | TLt -> + expect st TLt; + loop (Binop (Lt, left, parse_add st)) + | TLe -> + expect st TLe; + loop (Binop (Le, left, parse_add st)) + | TGt -> + expect st TGt; + loop (Binop (Gt, left, parse_add st)) + | TGe -> + expect st TGe; + loop (Binop (Ge, left, parse_add st)) + | _ -> left + in + loop (parse_add st) + +and parse_add st = + let rec loop left = + match peek st with + | TPlus -> + expect st TPlus; + loop (Binop (Add, left, parse_mul st)) + | TMinus -> + expect st TMinus; + loop (Binop (Sub, left, parse_mul st)) + | _ -> left + in + loop (parse_mul st) + +and parse_mul st = + let rec loop left = + match peek st with + | TStar -> + expect st TStar; + loop (Binop (Mul, left, parse_unary st)) + | TSlash -> + expect st TSlash; + loop (Binop (Div, left, parse_unary st)) + | TPercent -> + expect st TPercent; + loop (Binop (Mod, left, parse_unary st)) + | _ -> left + in + loop (parse_unary st) + +and parse_unary st = + match peek st with + | TMinus -> + expect st TMinus; + Unop (Neg, parse_unary st) + | TBang -> + expect st TBang; + Unop (Not, parse_unary st) + | _ -> parse_postfix st + +and parse_postfix st = + let rec loop e = + match peek st with + | TLParen -> + expect st TLParen; + let args = parse_args st in + expect st TRParen; + loop (Call (e, args)) + | TLBracket -> + expect st TLBracket; + let idx = parse_expr st in + expect st TRBracket; + loop (ArrayGet (e, idx)) + | TDot -> + expect st TDot; + let fld = expect_ident st in + loop (StructGet (e, fld)) + | _ -> e + in + loop (parse_primary st) + +and parse_args st = + match peek st with + | TRParen -> [] + | _ -> + let rec loop acc = + let e = parse_expr st in + match peek st with + | TComma -> + expect st TComma; + loop (e :: acc) + | _ -> List.rev (e :: acc) + in + loop [] + +and parse_primary st = + match consume st with + | TIntLit n -> IntLit n + | TTrue -> BoolLit true + | TFalse -> BoolLit false + | TIdent s -> Var s + | TLParen -> + let e = parse_expr st in + expect st TRParen; + e + | _ -> raise (Parse_error "expected expression") + +let parse_string src = + try + let st = mk_state (Lexer.lex src) in + Ok (parse_program st) + with + | Lexer.Lex_error msg -> Error ("lex error: " ^ msg) + | Parse_error msg -> Error ("parse error: " ^ msg) diff --git a/lib/parser.mli b/lib/parser.mli new file mode 100644 index 0000000..53d76dd --- /dev/null +++ b/lib/parser.mli @@ -0,0 +1 @@ +val parse_string : string -> (Ast.program, string) result diff --git a/lib/spooky.ml b/lib/spooky.ml index e0f55e0..dede2b7 100644 --- a/lib/spooky.ml +++ b/lib/spooky.ml @@ -1,886 +1,23 @@ -module StringMap = Map.Make (String) - -exception Parse_error of string -exception Type_error of string - -type typ = - | TInt - | TBool - | TVoid - | TStruct of string - | TArray of typ - -type binop = - | Add - | Sub - | Mul - | Div - | Mod - | And - | Or - | Eq - | Ne - | Lt - | Le - | Gt - | Ge - -type unop = Neg | Not - -type expr = - | IntLit of int - | BoolLit of bool - | Var of string - | Binop of binop * expr * expr - | Unop of unop * expr - | Assign of expr * expr - | Call of expr * expr list - | ArrayGet of expr * expr - | StructGet of expr * string - -type stmt = - | VarDecl of typ * string * expr option - | Expr of expr - | If of expr * stmt list * stmt list - | ForEach of typ * string * expr * stmt list - | Return of expr option - | Block of stmt list - -type func = { - name : string; - params : (typ * string) list; - ret : typ; - body : stmt list; -} - -type struct_def = { - sname : string; - fields : (typ * string) list; -} - -type top = - | TopStruct of struct_def - | TopFunc of func - | TopGlobalVar of typ * string * expr option - -type program = top list - -let string_of_typ = - let rec go = function - | TInt -> "int" - | TBool -> "bool" - | TVoid -> "void" - | TStruct n -> "struct " ^ n - | TArray t -> go t ^ "[]" - in - go - -let string_of_binop = function - | Add -> "+" - | Sub -> "-" - | Mul -> "*" - | Div -> "/" - | Mod -> "%" - | And -> "&&" - | Or -> "||" - | Eq -> "==" - | Ne -> "!=" - | Lt -> "<" - | Le -> "<=" - | Gt -> ">" - | Ge -> ">=" - -let string_of_unop = function Neg -> "-" | Not -> "!" - -let rec string_of_expr = function - | IntLit n -> Printf.sprintf "IntLit(%d)" n - | BoolLit b -> Printf.sprintf "BoolLit(%b)" b - | Var v -> Printf.sprintf "Var(%s)" v - | Binop (op, a, b) -> - Printf.sprintf "Binop(%s, %s, %s)" (string_of_binop op) (string_of_expr a) (string_of_expr b) - | Unop (op, e) -> Printf.sprintf "Unop(%s, %s)" (string_of_unop op) (string_of_expr e) - | Assign (lhs, rhs) -> Printf.sprintf "Assign(%s, %s)" (string_of_expr lhs) (string_of_expr rhs) - | Call (callee, args) -> - let args_s = String.concat ", " (List.map string_of_expr args) in - Printf.sprintf "Call(%s, [%s])" (string_of_expr callee) args_s - | ArrayGet (arr, idx) -> Printf.sprintf "ArrayGet(%s, %s)" (string_of_expr arr) (string_of_expr idx) - | StructGet (obj, fld) -> Printf.sprintf "StructGet(%s, %s)" (string_of_expr obj) fld - -let indent n = String.make (2 * n) ' ' - -let rec string_of_stmt ?(level = 0) st = - let i = indent level in - match st with - | VarDecl (t, n, None) -> Printf.sprintf "%sVarDecl(%s %s)" i (string_of_typ t) n - | VarDecl (t, n, Some e) -> - Printf.sprintf "%sVarDecl(%s %s = %s)" i (string_of_typ t) n (string_of_expr e) - | Expr e -> Printf.sprintf "%sExpr(%s)" i (string_of_expr e) - | Return None -> Printf.sprintf "%sReturn" i - | Return (Some e) -> Printf.sprintf "%sReturn(%s)" i (string_of_expr e) - | Block body -> - let body_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) body) in - if String.equal body_s "" then Printf.sprintf "%sBlock" i - else Printf.sprintf "%sBlock\n%s" i body_s - | If (cond, tbranch, ebranch) -> - let then_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) tbranch) in - let else_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) ebranch) in - if ebranch = [] then Printf.sprintf "%sIf(%s)\n%s" i (string_of_expr cond) then_s - else Printf.sprintf "%sIf(%s)\n%s\n%sElse\n%s" i (string_of_expr cond) then_s i else_s - | ForEach (it_t, it_name, iterable, body) -> - let body_s = String.concat "\n" (List.map (string_of_stmt ~level:(level + 1)) body) in - Printf.sprintf "%sForEach(%s %s in %s)\n%s" i (string_of_typ it_t) it_name (string_of_expr iterable) - body_s - -let string_of_top = function - | TopStruct s -> - let fields = - s.fields |> List.map (fun (t, n) -> Printf.sprintf " Field(%s %s)" (string_of_typ t) n) - |> String.concat "\n" - in - if String.equal fields "" then Printf.sprintf "Struct %s" s.sname - else Printf.sprintf "Struct %s\n%s" s.sname fields - | TopGlobalVar (t, n, init) -> - (match init with - | None -> Printf.sprintf "GlobalVar(%s %s)" (string_of_typ t) n - | Some e -> Printf.sprintf "GlobalVar(%s %s = %s)" (string_of_typ t) n (string_of_expr e)) - | TopFunc f -> - let params = - f.params - |> List.map (fun (t, n) -> Printf.sprintf "%s %s" (string_of_typ t) n) - |> String.concat ", " - in - let body = String.concat "\n" (List.map (string_of_stmt ~level:1) f.body) in - if String.equal body "" then Printf.sprintf "Function %s(%s) -> %s" f.name params (string_of_typ f.ret) - else Printf.sprintf "Function %s(%s) -> %s\n%s" f.name params (string_of_typ f.ret) body - -let string_of_program (prog : program) = prog |> List.map string_of_top |> String.concat "\n\n" - -let rec equal_typ a b = - match (a, b) with - | TInt, TInt | TBool, TBool | TVoid, TVoid -> true - | TStruct x, TStruct y -> String.equal x y - | TArray x, TArray y -> equal_typ x y - | _ -> false - -type token = - | TIntKw - | TBoolKw - | TVoidKw - | TStructKw - | TIf - | TElse - | TFor - | TEach - | TForEach - | TIn - | TReturn - | TTrue - | TFalse - | TIdent of string - | TIntLit of int - | TLParen - | TRParen - | TLBrace - | TRBrace - | TLBracket - | TRBracket - | TSemicolon - | TComma - | TDot - | TAssign - | TPlus - | TMinus - | TStar - | TSlash - | TPercent - | TAndAnd - | TOrOr - | TBang - | TEqEq - | TNe - | TLt - | TLe - | TGt - | TGe - | TEOF - -let is_space = function ' ' | '\t' | '\r' | '\n' -> true | _ -> false - -let is_digit c = c >= '0' && c <= '9' - -let is_ident_start c = - (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c = '_' - -let is_ident_char c = is_ident_start c || is_digit c - -let keyword_or_ident s = - match s with - | "int" -> TIntKw - | "bool" -> TBoolKw - | "void" -> TVoidKw - | "struct" -> TStructKw - | "if" -> TIf - | "else" -> TElse - | "for" -> TFor - | "each" -> TEach - | "foreach" -> TForEach - | "in" -> TIn - | "return" -> TReturn - | "true" -> TTrue - | "false" -> TFalse - | _ -> TIdent s - -let lex (src : string) : token list = - let n = String.length src in - let rec skip_line_comment i = - if i >= n then i - else if src.[i] = '\n' then i + 1 - else skip_line_comment (i + 1) - in - let rec skip_block_comment i = - if i + 1 >= n then raise (Parse_error "unterminated block comment") - else if src.[i] = '*' && src.[i + 1] = '/' then i + 2 - else skip_block_comment (i + 1) - in - let rec read_number i j = - if j < n && is_digit src.[j] then read_number i (j + 1) - else - let s = String.sub src i (j - i) in - (TIntLit (int_of_string s), j) - in - let rec read_ident i j = - if j < n && is_ident_char src.[j] then read_ident i (j + 1) - else - let s = String.sub src i (j - i) in - (keyword_or_ident s, j) - in - let rec loop i acc = - if i >= n then List.rev (TEOF :: acc) - else if is_space src.[i] then loop (i + 1) acc - else - match src.[i] with - | '/' when i + 1 < n && src.[i + 1] = '/' -> loop (skip_line_comment (i + 2)) acc - | '/' when i + 1 < n && src.[i + 1] = '*' -> loop (skip_block_comment (i + 2)) acc - | '(' -> loop (i + 1) (TLParen :: acc) - | ')' -> loop (i + 1) (TRParen :: acc) - | '{' -> loop (i + 1) (TLBrace :: acc) - | '}' -> loop (i + 1) (TRBrace :: acc) - | '[' -> loop (i + 1) (TLBracket :: acc) - | ']' -> loop (i + 1) (TRBracket :: acc) - | ';' -> loop (i + 1) (TSemicolon :: acc) - | ',' -> loop (i + 1) (TComma :: acc) - | '.' -> loop (i + 1) (TDot :: acc) - | '+' -> loop (i + 1) (TPlus :: acc) - | '-' -> loop (i + 1) (TMinus :: acc) - | '*' -> loop (i + 1) (TStar :: acc) - | '%' -> loop (i + 1) (TPercent :: acc) - | '/' -> loop (i + 1) (TSlash :: acc) - | '!' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TNe :: acc) - | '!' -> loop (i + 1) (TBang :: acc) - | '=' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TEqEq :: acc) - | '=' -> loop (i + 1) (TAssign :: acc) - | '&' when i + 1 < n && src.[i + 1] = '&' -> loop (i + 2) (TAndAnd :: acc) - | '|' when i + 1 < n && src.[i + 1] = '|' -> loop (i + 2) (TOrOr :: acc) - | '<' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TLe :: acc) - | '<' -> loop (i + 1) (TLt :: acc) - | '>' when i + 1 < n && src.[i + 1] = '=' -> loop (i + 2) (TGe :: acc) - | '>' -> loop (i + 1) (TGt :: acc) - | c when is_digit c -> - let tok, j = read_number i (i + 1) in - loop j (tok :: acc) - | c when is_ident_start c -> - let tok, j = read_ident i (i + 1) in - loop j (tok :: acc) - | c -> - let msg = Printf.sprintf "unexpected character: %c" c in - raise (Parse_error msg) - in - loop 0 [] - -type parser_state = { - toks : token array; - mutable i : int; -} - -let mk_state toks = { toks = Array.of_list toks; i = 0 } - -let peek st = if st.i < Array.length st.toks then st.toks.(st.i) else TEOF - -let consume st = - let t = peek st in - st.i <- st.i + 1; - t - -let expect st tok = - match (tok, consume st) with - | TLParen, TLParen - | TRParen, TRParen - | TLBrace, TLBrace - | TRBrace, TRBrace - | TLBracket, TLBracket - | TRBracket, TRBracket - | TSemicolon, TSemicolon - | TComma, TComma - | TDot, TDot - | TAssign, TAssign - | TPlus, TPlus - | TMinus, TMinus - | TStar, TStar - | TSlash, TSlash - | TPercent, TPercent - | TAndAnd, TAndAnd - | TOrOr, TOrOr - | TBang, TBang - | TEqEq, TEqEq - | TNe, TNe - | TLt, TLt - | TLe, TLe - | TGt, TGt - | TGe, TGe - | TIf, TIf - | TElse, TElse - | TForEach, TForEach - | TFor, TFor - | TEach, TEach - | TIn, TIn - | TReturn, TReturn - | TIntKw, TIntKw - | TBoolKw, TBoolKw - | TVoidKw, TVoidKw - | TStructKw, TStructKw - | TEOF, TEOF -> () - | _ -> raise (Parse_error "unexpected token") - -let expect_ident st = - match consume st with - | TIdent s -> s - | _ -> raise (Parse_error "expected identifier") - -let starts_type = function TIntKw | TBoolKw | TVoidKw | TStructKw -> true | _ -> false - -let rec parse_type st = - let base = - match consume st with - | TIntKw -> TInt - | TBoolKw -> TBool - | TVoidKw -> TVoid - | TStructKw -> TStruct (expect_ident st) - | _ -> raise (Parse_error "expected type") - in - let rec arrays t = - match peek st with - | TLBracket -> - expect st TLBracket; - expect st TRBracket; - arrays (TArray t) - | _ -> t - in - arrays base - -let rec parse_program st = - let rec loop acc = - match peek st with - | TEOF -> List.rev acc - | _ -> loop (parse_top st :: acc) - in - loop [] - -and parse_top st = - match peek st with - | TStructKw -> - expect st TStructKw; - let sname = expect_ident st in - (match peek st with - | TLBrace -> - expect st TLBrace; - let rec fields acc = - match peek st with - | TRBrace -> List.rev acc - | _ -> - let t = parse_type st in - let n = expect_ident st in - expect st TSemicolon; - fields ((t, n) :: acc) - in - let fs = fields [] in - expect st TRBrace; - expect st TSemicolon; - TopStruct { sname; fields = fs } - | _ -> - let ty = TStruct sname in - parse_top_after_type st ty) - | _ -> - let ty = parse_type st in - parse_top_after_type st ty - -and parse_top_after_type st ty = - let name = expect_ident st in - match peek st with - | TLParen -> - expect st TLParen; - let params = parse_params st in - expect st TRParen; - let body = parse_stmt_as_block st in - TopFunc { name; params; ret = ty; body } - | _ -> - let init = - match peek st with - | TAssign -> - expect st TAssign; - Some (parse_expr st) - | _ -> None - in - expect st TSemicolon; - TopGlobalVar (ty, name, init) - -and parse_params st = - match peek st with - | TRParen -> [] - | _ -> - let rec loop acc = - let t = parse_type st in - let n = expect_ident st in - match peek st with - | TComma -> - expect st TComma; - loop ((t, n) :: acc) - | _ -> List.rev ((t, n) :: acc) - in - loop [] - -and parse_stmt_as_block st = - match peek st with - | TLBrace -> - expect st TLBrace; - let rec loop acc = - match peek st with - | TRBrace -> - expect st TRBrace; - List.rev acc - | _ -> loop (parse_stmt st :: acc) - in - loop [] - | _ -> [ parse_stmt st ] - -and parse_stmt st = - match peek st with - | TLBrace -> Block (parse_stmt_as_block st) - | TIf -> - expect st TIf; - expect st TLParen; - let cond = parse_expr st in - expect st TRParen; - let then_body = parse_stmt_as_block st in - let else_body = - match peek st with - | TElse -> - expect st TElse; - parse_stmt_as_block st - | _ -> [] - in - If (cond, then_body, else_body) - | TForEach | TFor -> - (match peek st with - | TForEach -> expect st TForEach - | TFor -> - expect st TFor; - expect st TEach - | _ -> ()); - expect st TLParen; - let it_t = parse_type st in - let it_name = expect_ident st in - expect st TIn; - let iterable = parse_expr st in - expect st TRParen; - let body = parse_stmt_as_block st in - ForEach (it_t, it_name, iterable, body) - | TReturn -> - expect st TReturn; - let v = - match peek st with - | TSemicolon -> None - | _ -> Some (parse_expr st) - in - expect st TSemicolon; - Return v - | t when starts_type t -> - let ty = parse_type st in - let n = expect_ident st in - let init = - match peek st with - | TAssign -> - expect st TAssign; - Some (parse_expr st) - | _ -> None - in - expect st TSemicolon; - VarDecl (ty, n, init) - | _ -> - let e = parse_expr st in - expect st TSemicolon; - Expr e - -and parse_expr st = parse_assignment st - -and parse_assignment st = - let lhs = parse_or st in - match peek st with - | TAssign -> - expect st TAssign; - let rhs = parse_assignment st in - Assign (lhs, rhs) - | _ -> lhs - -and parse_or st = - let rec loop left = - match peek st with - | TOrOr -> - expect st TOrOr; - loop (Binop (Or, left, parse_and st)) - | _ -> left - in - loop (parse_and st) - -and parse_and st = - let rec loop left = - match peek st with - | TAndAnd -> - expect st TAndAnd; - loop (Binop (And, left, parse_equality st)) - | _ -> left - in - loop (parse_equality st) - -and parse_equality st = - let rec loop left = - match peek st with - | TEqEq -> - expect st TEqEq; - loop (Binop (Eq, left, parse_rel st)) - | TNe -> - expect st TNe; - loop (Binop (Ne, left, parse_rel st)) - | _ -> left - in - loop (parse_rel st) - -and parse_rel st = - let rec loop left = - match peek st with - | TLt -> - expect st TLt; - loop (Binop (Lt, left, parse_add st)) - | TLe -> - expect st TLe; - loop (Binop (Le, left, parse_add st)) - | TGt -> - expect st TGt; - loop (Binop (Gt, left, parse_add st)) - | TGe -> - expect st TGe; - loop (Binop (Ge, left, parse_add st)) - | _ -> left - in - loop (parse_add st) - -and parse_add st = - let rec loop left = - match peek st with - | TPlus -> - expect st TPlus; - loop (Binop (Add, left, parse_mul st)) - | TMinus -> - expect st TMinus; - loop (Binop (Sub, left, parse_mul st)) - | _ -> left - in - loop (parse_mul st) - -and parse_mul st = - let rec loop left = - match peek st with - | TStar -> - expect st TStar; - loop (Binop (Mul, left, parse_unary st)) - | TSlash -> - expect st TSlash; - loop (Binop (Div, left, parse_unary st)) - | TPercent -> - expect st TPercent; - loop (Binop (Mod, left, parse_unary st)) - | _ -> left - in - loop (parse_unary st) - -and parse_unary st = - match peek st with - | TMinus -> - expect st TMinus; - Unop (Neg, parse_unary st) - | TBang -> - expect st TBang; - Unop (Not, parse_unary st) - | _ -> parse_postfix st - -and parse_postfix st = - let rec loop e = - match peek st with - | TLParen -> - expect st TLParen; - let args = parse_args st in - expect st TRParen; - loop (Call (e, args)) - | TLBracket -> - expect st TLBracket; - let idx = parse_expr st in - expect st TRBracket; - loop (ArrayGet (e, idx)) - | TDot -> - expect st TDot; - let fld = expect_ident st in - loop (StructGet (e, fld)) - | _ -> e - in - loop (parse_primary st) - -and parse_args st = - match peek st with - | TRParen -> [] - | _ -> - let rec loop acc = - let e = parse_expr st in - match peek st with - | TComma -> - expect st TComma; - loop (e :: acc) - | _ -> List.rev (e :: acc) - in - loop [] - -and parse_primary st = - match consume st with - | TIntLit n -> IntLit n - | TTrue -> BoolLit true - | TFalse -> BoolLit false - | TIdent s -> Var s - | TLParen -> - let e = parse_expr st in - expect st TRParen; - e - | _ -> raise (Parse_error "expected expression") - -let parse_string src = - try - let st = mk_state (lex src) in - Ok (parse_program st) - with Parse_error msg -> Error msg - -type func_sig = { - fparams : typ list; - fret : typ; -} - -type tc_ctx = { - structs : (typ StringMap.t) StringMap.t; - funcs : func_sig StringMap.t; - globals : typ StringMap.t; -} - -let fail_type msg = raise (Type_error msg) - -let expect_type got want = - if not (equal_typ got want) then - fail_type (Printf.sprintf "type mismatch: got %s, expected %s" (string_of_typ got) (string_of_typ want)) - -let rec validate_type (structs : (typ StringMap.t) StringMap.t) (allow_void : bool) = function - | TVoid when allow_void -> () - | TVoid -> fail_type "void is not a valid variable type" - | TStruct n -> - if not (StringMap.mem n structs) then fail_type ("unknown struct type: " ^ n) - | TArray t -> - if equal_typ t TVoid then fail_type "array element type cannot be void"; - validate_type structs false t - | TInt | TBool -> () - -let collect_ctx (prog : program) : tc_ctx = - let rec collect tops structs funcs globals = - match tops with - | [] -> { structs; funcs; globals } - | TopStruct s :: tl -> - if StringMap.mem s.sname structs then fail_type ("duplicate struct: " ^ s.sname); - let fields = - List.fold_left - (fun acc (t, n) -> - if StringMap.mem n acc then fail_type ("duplicate field " ^ n ^ " in struct " ^ s.sname); - StringMap.add n t acc) - StringMap.empty s.fields - in - collect tl (StringMap.add s.sname fields structs) funcs globals - | TopFunc f :: tl -> - if StringMap.mem f.name funcs then fail_type ("duplicate function: " ^ f.name); - let sig_ = { fparams = List.map fst f.params; fret = f.ret } in - collect tl structs (StringMap.add f.name sig_ funcs) globals - | TopGlobalVar (t, n, _) :: tl -> - if StringMap.mem n globals then fail_type ("duplicate global variable: " ^ n); - collect tl structs funcs (StringMap.add n t globals) - in - collect prog StringMap.empty StringMap.empty StringMap.empty - -let lookup_var env x = - match StringMap.find_opt x env with - | Some t -> t - | None -> fail_type ("unknown variable: " ^ x) - -let lookup_struct_field ctx sname fname = - match StringMap.find_opt sname ctx.structs with - | None -> fail_type ("unknown struct: " ^ sname) - | Some fields -> - (match StringMap.find_opt fname fields with - | None -> fail_type ("unknown field " ^ fname ^ " on struct " ^ sname) - | Some t -> t) - -let rec infer_expr ctx env = function - | IntLit _ -> TInt - | BoolLit _ -> TBool - | Var x -> lookup_var env x - | Unop (Neg, e) -> - expect_type (infer_expr ctx env e) TInt; - TInt - | Unop (Not, e) -> - expect_type (infer_expr ctx env e) TBool; - TBool - | Binop (op, a, b) -> - let ta = infer_expr ctx env a in - let tb = infer_expr ctx env b in - (match op with - | Add | Sub | Mul | Div | Mod -> - expect_type ta TInt; - expect_type tb TInt; - TInt - | And | Or -> - expect_type ta TBool; - expect_type tb TBool; - TBool - | Lt | Le | Gt | Ge -> - expect_type ta TInt; - expect_type tb TInt; - TBool - | Eq | Ne -> - if not (equal_typ ta tb) then fail_type "equality operands must have same type"; - TBool) - | Assign (lhs, rhs) -> - (match lhs with Var _ | ArrayGet _ | StructGet _ -> () | _ -> fail_type "left side of assignment is not assignable"); - let tl = infer_expr ctx env lhs in - let tr = infer_expr ctx env rhs in - expect_type tr tl; - tl - | ArrayGet (arr, idx) -> - expect_type (infer_expr ctx env idx) TInt; - (match infer_expr ctx env arr with - | TArray t -> t - | t -> fail_type ("indexing requires array, got " ^ string_of_typ t)) - | StructGet (obj, fld) -> - (match infer_expr ctx env obj with - | TStruct sname -> lookup_struct_field ctx sname fld - | t -> fail_type ("field access requires struct, got " ^ string_of_typ t)) - | Call (callee, args) -> - let fname = - match callee with - | Var n -> n - | _ -> fail_type "only direct function calls are supported" - in - let sig_ = - match StringMap.find_opt fname ctx.funcs with - | Some s -> s - | None -> fail_type ("unknown function: " ^ fname) - in - if List.length args <> List.length sig_.fparams then - fail_type - (Printf.sprintf "function %s expects %d arguments, got %d" fname (List.length sig_.fparams) - (List.length args)); - List.iter2 (fun arg pty -> expect_type (infer_expr ctx env arg) pty) args sig_.fparams; - sig_.fret - -let rec check_stmt ctx ret env = function - | VarDecl (t, n, init) -> - validate_type ctx.structs false t; - (match init with None -> () | Some e -> expect_type (infer_expr ctx env e) t); - StringMap.add n t env - | Expr e -> - ignore (infer_expr ctx env e); - env - | If (cond, tbranch, ebranch) -> - expect_type (infer_expr ctx env cond) TBool; - ignore (check_block ctx ret env tbranch); - ignore (check_block ctx ret env ebranch); - env - | ForEach (it_t, it_name, iterable, body) -> - validate_type ctx.structs false it_t; - (match infer_expr ctx env iterable with - | TArray elem_t -> expect_type elem_t it_t - | t -> fail_type ("foreach expects array iterable, got " ^ string_of_typ t)); - let env' = StringMap.add it_name it_t env in - ignore (check_block ctx ret env' body); - env - | Return None -> - expect_type TVoid ret; - env - | Return (Some e) -> - expect_type (infer_expr ctx env e) ret; - env - | Block stmts -> - ignore (check_block ctx ret env stmts); - env - -and check_block ctx ret env stmts = List.fold_left (check_stmt ctx ret) env stmts - -let rec has_return_stmt = function - | Return _ -> true - | If (_, t, e) -> List.exists has_return_stmt t || List.exists has_return_stmt e - | ForEach (_, _, _, body) | Block body -> List.exists has_return_stmt body - | VarDecl _ | Expr _ -> false - -let check_program (prog : program) = - let ctx = collect_ctx prog in - StringMap.iter (fun _ t -> validate_type ctx.structs false t) ctx.globals; - StringMap.iter - (fun _ sig_ -> - List.iter (validate_type ctx.structs false) sig_.fparams; - validate_type ctx.structs true sig_.fret) - ctx.funcs; - List.iter - (function - | TopStruct s -> - List.iter (fun (t, _) -> validate_type ctx.structs false t) s.fields - | TopGlobalVar (t, _, init) -> - validate_type ctx.structs false t; - let env = ctx.globals in - (match init with None -> () | Some e -> expect_type (infer_expr ctx env e) t) - | TopFunc f -> - let env_with_globals = ctx.globals in - let env = - List.fold_left - (fun acc (t, n) -> - validate_type ctx.structs false t; - if StringMap.mem n acc then fail_type ("duplicate parameter name: " ^ n); - StringMap.add n t acc) - env_with_globals f.params - in - ignore (check_block ctx f.ret env f.body); - if (not (equal_typ f.ret TVoid)) && not (List.exists has_return_stmt f.body) then - fail_type ("non-void function " ^ f.name ^ " must return a value")) - prog - -let type_check (prog : program) = - try - check_program prog; - Ok () - with Type_error msg -> Error msg +module Ast = Ast +module Lexer = Lexer +module Parser = Parser +module Typechecker = Typechecker +module Generator_json = Generator_json +module Generator_c = Generator_c + +type program = Ast.program + +let parse_string = Parser.parse_string +let string_of_program = Ast.string_of_program +let type_check = Typechecker.type_check let parse_and_type_check src = match parse_string src with - | Error e -> Error ("Parse error: " ^ e) + | Error e -> Error e | Ok prog -> (match type_check prog with - | Error e -> Error ("Type error: " ^ e) - | Ok () -> Ok prog) + | Ok () -> Ok prog + | Error e -> Error ("type error: " ^ e)) + +let generate_json = Generator_json.generate +let generate_c = Generator_c.generate diff --git a/lib/spooky.mli b/lib/spooky.mli index c0b4748..3e79089 100644 --- a/lib/spooky.mli +++ b/lib/spooky.mli @@ -1,67 +1,15 @@ -type typ = - | TInt - | TBool - | TVoid - | TStruct of string - | TArray of typ +module Ast = Ast +module Lexer = Lexer +module Parser = Parser +module Typechecker = Typechecker +module Generator_json = Generator_json +module Generator_c = Generator_c -type binop = - | Add - | Sub - | Mul - | Div - | Mod - | And - | Or - | Eq - | Ne - | Lt - | Le - | Gt - | Ge +type program = Ast.program -type unop = Neg | Not - -type expr = - | IntLit of int - | BoolLit of bool - | Var of string - | Binop of binop * expr * expr - | Unop of unop * expr - | Assign of expr * expr - | Call of expr * expr list - | ArrayGet of expr * expr - | StructGet of expr * string - -type stmt = - | VarDecl of typ * string * expr option - | Expr of expr - | If of expr * stmt list * stmt list - | ForEach of typ * string * expr * stmt list - | Return of expr option - | Block of stmt list - -type func = { - name : string; - params : (typ * string) list; - ret : typ; - body : stmt list; -} - -type struct_def = { - sname : string; - fields : (typ * string) list; -} - -type top = - | TopStruct of struct_def - | TopFunc of func - | TopGlobalVar of typ * string * expr option - -type program = top list - -val string_of_typ : typ -> string -val string_of_program : program -> string val parse_string : string -> (program, string) result +val string_of_program : program -> string val type_check : program -> (unit, string) result val parse_and_type_check : string -> (program, string) result +val generate_json : program -> string +val generate_c : program -> string diff --git a/lib/typechecker.ml b/lib/typechecker.ml new file mode 100644 index 0000000..8af198b --- /dev/null +++ b/lib/typechecker.ml @@ -0,0 +1,208 @@ +open Ast + +module StringMap = Map.Make (String) + +exception Type_error of string + +type func_sig = { + fparams : typ list; + fret : typ; +} + +type tc_ctx = { + structs : (typ StringMap.t) StringMap.t; + funcs : func_sig StringMap.t; + globals : typ StringMap.t; +} + +let fail_type msg = raise (Type_error msg) + +let expect_type got want = + if not (equal_typ got want) then + fail_type + (Printf.sprintf "type mismatch: got %s, expected %s" (string_of_typ got) (string_of_typ want)) + +let rec validate_type (structs : (typ StringMap.t) StringMap.t) (allow_void : bool) = function + | TVoid when allow_void -> () + | TVoid -> fail_type "void is not a valid variable type" + | TStruct n -> + if not (StringMap.mem n structs) then fail_type ("unknown struct type: " ^ n) + | TArray t -> + if equal_typ t TVoid then fail_type "array element type cannot be void"; + validate_type structs false t + | TInt | TBool -> () + +let collect_ctx (prog : program) : tc_ctx = + let rec collect tops structs funcs globals = + match tops with + | [] -> { structs; funcs; globals } + | TopStruct s :: tl -> + if StringMap.mem s.sname structs then fail_type ("duplicate struct: " ^ s.sname); + let fields = + List.fold_left + (fun acc (t, n) -> + if StringMap.mem n acc then fail_type ("duplicate field " ^ n ^ " in struct " ^ s.sname); + StringMap.add n t acc) + StringMap.empty s.fields + in + collect tl (StringMap.add s.sname fields structs) funcs globals + | TopFunc f :: tl -> + if StringMap.mem f.name funcs then fail_type ("duplicate function: " ^ f.name); + let sig_ = { fparams = List.map fst f.params; fret = f.ret } in + collect tl structs (StringMap.add f.name sig_ funcs) globals + | TopGlobalVar (t, n, _) :: tl -> + if StringMap.mem n globals then fail_type ("duplicate global variable: " ^ n); + collect tl structs funcs (StringMap.add n t globals) + in + collect prog StringMap.empty StringMap.empty StringMap.empty + +let lookup_var env x = + match StringMap.find_opt x env with + | Some t -> t + | None -> fail_type ("unknown variable: " ^ x) + +let lookup_struct_field ctx sname fname = + match StringMap.find_opt sname ctx.structs with + | None -> fail_type ("unknown struct: " ^ sname) + | Some fields -> + (match StringMap.find_opt fname fields with + | None -> fail_type ("unknown field " ^ fname ^ " on struct " ^ sname) + | Some t -> t) + +let rec infer_expr ctx env = function + | IntLit _ -> TInt + | BoolLit _ -> TBool + | Var x -> lookup_var env x + | Unop (Neg, e) -> + expect_type (infer_expr ctx env e) TInt; + TInt + | Unop (Not, e) -> + expect_type (infer_expr ctx env e) TBool; + TBool + | Binop (op, a, b) -> + let ta = infer_expr ctx env a in + let tb = infer_expr ctx env b in + (match op with + | Add | Sub | Mul | Div | Mod -> + expect_type ta TInt; + expect_type tb TInt; + TInt + | And | Or -> + expect_type ta TBool; + expect_type tb TBool; + TBool + | Lt | Le | Gt | Ge -> + expect_type ta TInt; + expect_type tb TInt; + TBool + | Eq | Ne -> + if not (equal_typ ta tb) then fail_type "equality operands must have same type"; + TBool) + | Assign (lhs, rhs) -> + (match lhs with Var _ | ArrayGet _ | StructGet _ -> () | _ -> fail_type "left side of assignment is not assignable"); + let tl = infer_expr ctx env lhs in + let tr = infer_expr ctx env rhs in + expect_type tr tl; + tl + | ArrayGet (arr, idx) -> + expect_type (infer_expr ctx env idx) TInt; + (match infer_expr ctx env arr with + | TArray t -> t + | t -> fail_type ("indexing requires array, got " ^ string_of_typ t)) + | StructGet (obj, fld) -> + (match infer_expr ctx env obj with + | TStruct sname -> lookup_struct_field ctx sname fld + | t -> fail_type ("field access requires struct, got " ^ string_of_typ t)) + | Call (callee, args) -> + let fname = + match callee with + | Var n -> n + | _ -> fail_type "only direct function calls are supported" + in + let sig_ = + match StringMap.find_opt fname ctx.funcs with + | Some s -> s + | None -> fail_type ("unknown function: " ^ fname) + in + if List.length args <> List.length sig_.fparams then + fail_type + (Printf.sprintf "function %s expects %d arguments, got %d" fname (List.length sig_.fparams) + (List.length args)); + List.iter2 (fun arg pty -> expect_type (infer_expr ctx env arg) pty) args sig_.fparams; + sig_.fret + +let rec check_stmt ctx ret env = function + | VarDecl (t, n, init) -> + validate_type ctx.structs false t; + (match init with None -> () | Some e -> expect_type (infer_expr ctx env e) t); + StringMap.add n t env + | Expr e -> + ignore (infer_expr ctx env e); + env + | If (cond, tbranch, ebranch) -> + expect_type (infer_expr ctx env cond) TBool; + ignore (check_block ctx ret env tbranch); + ignore (check_block ctx ret env ebranch); + env + | ForEach (it_t, it_name, iterable, body) -> + validate_type ctx.structs false it_t; + (match infer_expr ctx env iterable with + | TArray elem_t -> expect_type elem_t it_t + | t -> fail_type ("foreach expects array iterable, got " ^ string_of_typ t)); + let env' = StringMap.add it_name it_t env in + ignore (check_block ctx ret env' body); + env + | Return None -> + expect_type TVoid ret; + env + | Return (Some e) -> + expect_type (infer_expr ctx env e) ret; + env + | Block stmts -> + ignore (check_block ctx ret env stmts); + env + +and check_block ctx ret env stmts = List.fold_left (check_stmt ctx ret) env stmts + +let rec has_return_stmt = function + | Return _ -> true + | If (_, t, e) -> List.exists has_return_stmt t || List.exists has_return_stmt e + | ForEach (_, _, _, body) | Block body -> List.exists has_return_stmt body + | VarDecl _ | Expr _ -> false + +let check_program (prog : program) = + let ctx = collect_ctx prog in + StringMap.iter (fun _ t -> validate_type ctx.structs false t) ctx.globals; + StringMap.iter + (fun _ sig_ -> + List.iter (validate_type ctx.structs false) sig_.fparams; + validate_type ctx.structs true sig_.fret) + ctx.funcs; + List.iter + (function + | TopStruct s -> + List.iter (fun (t, _) -> validate_type ctx.structs false t) s.fields + | TopGlobalVar (t, _, init) -> + validate_type ctx.structs false t; + let env = ctx.globals in + (match init with None -> () | Some e -> expect_type (infer_expr ctx env e) t) + | TopFunc f -> + let env_with_globals = ctx.globals in + let env = + List.fold_left + (fun acc (t, n) -> + validate_type ctx.structs false t; + if StringMap.mem n acc then fail_type ("duplicate parameter name: " ^ n); + StringMap.add n t acc) + env_with_globals f.params + in + ignore (check_block ctx f.ret env f.body); + if (not (equal_typ f.ret TVoid)) && not (List.exists has_return_stmt f.body) then + fail_type ("non-void function " ^ f.name ^ " must return a value")) + prog + +let type_check (prog : program) = + try + check_program prog; + Ok () + with Type_error msg -> Error msg diff --git a/lib/typechecker.mli b/lib/typechecker.mli new file mode 100644 index 0000000..d88d573 --- /dev/null +++ b/lib/typechecker.mli @@ -0,0 +1 @@ +val type_check : Ast.program -> (unit, string) result