diff --git a/README.md b/README.md index 6d5b6c0..3536ccc 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ ![CI Status](https://github.com/ethanuppal/cs3110_compiler/actions/workflows/ci.yaml/badge.svg) > "x86 is simple trust me bro" -> Last updated: 2024-05-08 22:10:33.402786 +> Last updated: 2024-05-08 23:00:51.290592 ``` $ ./main -h @@ -26,16 +26,18 @@ Usage: ./main [-h|-v] $ ./main -v x86ISTMB v0.1.0 -Written by: Utku Melemeti, Jason Klein, Jeffrey Huang, Vijay Shanmugam, Ethan Uppal +Written by: Utku Melemeti, Ethan Uppal, Jeffrey Huang, Jason Klein, Vijay Shanmugam ``` ## Group -- Jason Klein (jak532) - Utku Melemetci (um44) +- Ethan Uppal (eu55) - Jeffrey Huang (jrh382) + +With some contributions from - Vijay Shanmugam (vrs29) -- Ethan Uppal (eu55) +- Jason Klein (jak532) ## Usage diff --git a/README.md.template b/README.md.template index 4b79a10..cfba0c7 100644 --- a/README.md.template +++ b/README.md.template @@ -14,11 +14,13 @@ $ ./main -v ## Group -- Jason Klein (jak532) - Utku Melemetci (um44) +- Ethan Uppal (eu55) - Jeffrey Huang (jrh382) + +With some contributions from - Vijay Shanmugam (vrs29) -- Ethan Uppal (eu55) +- Jason Klein (jak532) ## Usage diff --git a/lib/frontend/analysis.ml b/lib/frontend/analysis.ml index 4c6a25d..26fb590 100644 --- a/lib/frontend/analysis.ml +++ b/lib/frontend/analysis.ml @@ -1,5 +1,4 @@ open Ast -open Util type analysis_error_info = | GeneralInfo @@ -9,6 +8,7 @@ type analysis_error_info = | `InvalidSig of string * Type.t list | `DerefRValue of Type.t ] + | HaltInfo of { name : string } exception AnalyzerError of { @@ -53,6 +53,14 @@ let deref_rval_error ty ?(msg = "") ast = ast; } +let halt_error name ?(msg = "") ast = + AnalyzerError + { + info = HaltInfo { name }; + msg = (if msg = "" then None else Some msg); + ast; + } + (** [bind_var_to_type ctx name ty ast] binds [name] to have [ty] in [ctx] as called in analyzing node [ast]. @@ -66,7 +74,7 @@ let bind_name_to_type ctx name ty ast = analyzing node [ast]. @raise AnalyzerError if [name] is not bound in [ctx]. *) -let get_var_type ctx name ast = +let get_type_of_name ctx name ast = match Context.get ctx name with | Some ty -> ty | None -> raise (name_error name ~msg:"unbound variable" ast) @@ -96,6 +104,9 @@ let analyzer_error_to_string info msg _ = (Type.to_string ty) in start_str ^ ": " ^ rest_str + | HaltInfo { name } -> + "Halting error: function '" ^ name ^ "' does not return from all paths" + ^ msg_str let () = Printexc.register_printer (function @@ -112,7 +123,7 @@ let () = let rec infer_expr (ctx : Type.t Context.t) expr = let infer_expr_aux expr = match expr with - | Var var -> var.ty <- Some (get_var_type ctx var.name (Left expr)) + | Var var -> var.ty <- Some (get_type_of_name ctx var.name (Left expr)) | ConstInt _ -> () | ConstBool _ -> () | Infix infix -> ( @@ -151,18 +162,33 @@ let rec infer_expr (ctx : Type.t Context.t) expr = raise (deref_rval_error rhs_ty (Left expr)); prefix.ty <- Some (Type.Pointer rhs_ty) | _ -> raise_error ()) + | Call call -> + let arg_tys = List.map (infer_expr ctx) call.args in + let exp_ty = get_type_of_name ctx call.name (Left expr) in + let exp_params, exp_return = + match exp_ty with + | FunctionType { params; return } -> (params, return) + | _ -> + raise + (name_error call.name ~msg:"only functions can be called" + (Left expr)) + in + if exp_params <> arg_tys then + raise (type_sig_error call.name arg_tys (Left expr)); + call.ty <- Some exp_return in infer_expr_aux expr; Option.get (type_of_expr expr) (* TODO: add Terminal and Nonterminal checks *) -(** [infer_stmt ctx stmt] is the type [stmt] will be assigned a type based on - [ctx]. +(** [infer_stmt ctx return_ctx stmt] is the type [stmt] will be assigned a type + based on [ctx] when the environment intends a return type of [return_ctx]. @raise AnalyzerError on failure. *) -let rec infer_stmt (ctx : Type.t Context.t) stmt = - (match stmt with +let rec infer_stmt (ctx : Type.t Context.t) return_ctx (stmt : stmt) : + Type.stmt_type = + match stmt with | Declaration { name; hint; expr } -> let expr_ty = infer_expr ctx expr in (match hint with @@ -172,8 +198,11 @@ let rec infer_stmt (ctx : Type.t Context.t) stmt = raise (type_mismatch_error hint_ty expr_ty ~msg:"in let statement" (Right stmt))); - bind_name_to_type ctx name expr_ty (Right stmt) - | Print expr -> infer_expr ctx expr |> ignore + bind_name_to_type ctx name expr_ty (Right stmt); + Nonterminal + | Print expr -> + infer_expr ctx expr |> ignore; + Nonterminal | Function _ -> raise (general_error ~msg:"functions can only be written at top level" @@ -184,22 +213,40 @@ let rec infer_stmt (ctx : Type.t Context.t) stmt = raise (type_mismatch_error Type.bool_prim_type cond_ty ~msg:"in if statement condition" (Right stmt)); - infer_body ctx body + infer_body ctx return_ctx body | Assignment (name, expr) -> - let exp_ty = get_var_type ctx name (Right stmt) in + let exp_ty = get_type_of_name ctx name (Right stmt) in let expr_ty = infer_expr ctx expr in if exp_ty <> expr_ty then raise (type_mismatch_error exp_ty expr_ty - ~msg:"variable types cannot be modified" (Right stmt)) - | Call _ -> failwith "not impl"); - Type.Nonterminal + ~msg:"variable types cannot be modified" (Right stmt)); + Nonterminal + | ExprStatement expr -> + ignore (infer_expr ctx expr); + Nonterminal + | Return expr_opt -> + let expr_ty = + match expr_opt with + | None -> Type.unit_prim_type + | Some expr -> infer_expr ctx expr + in + if return_ctx <> expr_ty then + raise + (type_mismatch_error return_ctx expr_ty ~msg:"invalid return type" + (Right stmt)); + Terminal (* TODO: final statement always needs to be Terminal *) -and infer_body ctx stmts = +and infer_body ctx return_ctx stmts = Context.push ctx; - List.iter (infer_stmt ctx >> ignore) stmts; - Context.pop ctx + let ty = + List.fold_left + (fun _ stmt -> infer_stmt ctx return_ctx stmt) + Nonterminal stmts + in + Context.pop ctx; + ty let infer prog = let ctx : Type.t Context.t = Context.make () in @@ -208,13 +255,20 @@ let infer prog = |> List.iter (fun stmt -> match stmt with | Function { name; params; return; body } -> - let fun_ty = Type.FunctionType { params; return } in + let fun_ty = + Type.FunctionType { params = List.map snd params; return } + in bind_name_to_type ctx name fun_ty (Right stmt); Context.push ctx; List.iter (fun (name, ty) -> bind_name_to_type ctx name ty (Right stmt)) params; - infer_body ctx body; + let body = + if return = Type.unit_prim_type then body @ [ Return None ] + else body + in + if infer_body ctx return body <> Terminal then + raise (halt_error name (Right stmt)); Context.pop ctx | _ -> raise diff --git a/lib/frontend/analysis.mli b/lib/frontend/analysis.mli index 77456d2..e171946 100644 --- a/lib/frontend/analysis.mli +++ b/lib/frontend/analysis.mli @@ -6,6 +6,7 @@ type analysis_error_info = | `InvalidSig of string * Type.t list | `DerefRValue of Type.t ] + | HaltInfo of { name : string } exception AnalyzerError of { diff --git a/lib/frontend/ast.ml b/lib/frontend/ast.ml index 8b3fa10..8b3ee68 100644 --- a/lib/frontend/ast.ml +++ b/lib/frontend/ast.ml @@ -30,6 +30,11 @@ type expr = rhs : expr; mutable ty : Type.t option; } + | Call of { + name : string; + args : expr list; + mutable ty : Type.t option; + } (** A statement can be executed. *) and stmt = @@ -37,9 +42,7 @@ and stmt = cond : expr; body : stmt list; } - | Call of string - (* tbd better function support ia ExpressionStatement need to add in stuff - baout returns and stuf lol*) + | ExprStatement of expr | Declaration of { name : string; hint : Type.t option; @@ -53,6 +56,7 @@ and stmt = body : stmt list; } | Print of expr + | Return of expr option (** A program is a series of statements. *) type prog = stmt list @@ -65,6 +69,7 @@ let type_of_expr = function | ConstBool _ -> Some Type.bool_prim_type | Infix { lhs = _; op = _; rhs = _; ty } -> ty | Prefix { op = _; rhs = _; ty } -> ty + | Call { name = _; args = _; ty } -> ty (** [expr_is_const expr] if and only if [expr] is a constant (i.e., cannot have an address taken of it). *) @@ -91,12 +96,14 @@ let rec expr_to_string = function ^ expr_to_string rhs ^ ")" | Prefix { op; rhs; ty = _ } -> "(" ^ op_to_string op ^ expr_to_string rhs ^ ")" + | Call { name; args; ty = _ } -> + name ^ "(" ^ (args |> List.map expr_to_string |> String.concat ", ") ^ ")" let stmt_to_string = let add_indent = String.make 4 ' ' in let rec stmt_to_string_aux indent stmt = let make_string = function - | Call name -> name ^ "()" + | ExprStatement expr -> expr_to_string expr | Declaration { name; hint; expr } -> let expr_type = type_of_expr expr in let display_type = if expr_type = None then hint else expr_type in @@ -116,14 +123,18 @@ let stmt_to_string = ^ (body |> List.map (stmt_to_string_aux (indent ^ add_indent)) |> String.concat "") - ^ "}" + ^ indent ^ "}" | Print expr -> "print " ^ expr_to_string expr | If { cond; body } -> "if " ^ expr_to_string cond ^ " {\n" ^ (body |> List.map (stmt_to_string_aux (indent ^ add_indent)) |> String.concat "") - ^ "}" + ^ indent ^ "}" + | Return expr_opt -> ( + match expr_opt with + | None -> "return" + | Some expr -> "return " ^ expr_to_string expr) in indent ^ make_string stmt ^ "\n" in diff --git a/lib/frontend/ir_gen.ml b/lib/frontend/ir_gen.ml index 342ba7f..04fedc0 100644 --- a/lib/frontend/ir_gen.ml +++ b/lib/frontend/ir_gen.ml @@ -44,12 +44,12 @@ let rec generate_expr ctx cfg block expr = in Basic_block.add_ir block ir_instr; Operand.make_var result + | Call _ -> failwith "no calls in ir gen" (** [generate_stmt ctx cfg block stmt] adds IR for [stmt] (and potentially more blocks) onto [block] in [cfg], and returns the block that program flow should continue from. *) let rec generate_stmt ctx cfg block = function - | Call _ -> failwith "not implemented" | Declaration { expr; name; _ } -> (* IR for this could probably be improved but it's fine *) let result = generate_expr ctx cfg block expr in @@ -85,6 +85,10 @@ let rec generate_stmt ctx cfg block = function let to_print = generate_expr ctx cfg block expr in Basic_block.add_ir block (Ir.DebugPrint to_print); block + | ExprStatement expr -> + ignore (generate_expr ctx cfg block expr); + block + | Return _ -> failwith "ir gen need to gen return" and generate_stmt_lst ctx cfg block lst = let block_ref = ref block in diff --git a/lib/frontend/lexer.mll b/lib/frontend/lexer.mll index a583157..85978c8 100644 --- a/lib/frontend/lexer.mll +++ b/lib/frontend/lexer.mll @@ -32,6 +32,7 @@ rule read = parse | "if" { IF } | "else" { ELSE } | "while" { WHILE } +| "return" { RETURN } | "true" { CONST_TRUE } | "false" { CONST_FALSE } | ['a'-'z' 'A'-'Z' '_'] ['0'-'9' 'a'-'z' 'A'-'Z' '_']* { IDEN (Lexing.lexeme lexbuf) } diff --git a/lib/frontend/parser.mly b/lib/frontend/parser.mly index ef303fd..847dc8b 100644 --- a/lib/frontend/parser.mly +++ b/lib/frontend/parser.mly @@ -7,7 +7,7 @@ %token IDEN %token PLUS MINUS TIMES DIVIDE MOD EQUALS BITAND %token LPAR RPAR LBRACE RBRACE COLON ARROW COMMA -%token PRINT ASSIGN LET FUNC IF ELSE WHILE +%token PRINT ASSIGN LET FUNC IF ELSE WHILE RETURN %token NEWLINE EOF %token INT_TYPE BOOL_TYPE @@ -49,6 +49,7 @@ expr: | PLUS expr { Prefix {op = Plus; rhs = $2; ty = None} } | MINUS expr { Prefix {op = Minus; rhs = $2; ty = None} } | TIMES expr { Prefix {op = Times; rhs = $2; ty = None} } + | name = IDEN; LPAR; args = separated_list(COMMA, expr); RPAR { Call { name; args; ty = None }} body_till_rbrace: | NEWLINE body_till_rbrace { $2 } @@ -64,9 +65,10 @@ return_type: stmt: | IF expr LBRACE body_till_rbrace { If {cond = $2; body = $4 } } - | IDEN LPAR RPAR { Call $1 } | LET IDEN COLON ty ASSIGN expr { Declaration {name = $2; hint = Some ($4); expr = $6} } | LET IDEN ASSIGN expr { Declaration {name = $2; hint = None; expr = $4} } | IDEN ASSIGN expr { Assignment ($1, $3) } | FUNC; name = IDEN; LPAR; params = separated_list(COMMA, param); RPAR; return_opt = option(return_type); LBRACE; body = body_till_rbrace { Function {name; params; return = if return_opt = None then Type.unit_prim_type else Option.get (return_opt); body} } | PRINT expr { Print $2 } + | RETURN; return_opt = option(expr) { Return (return_opt) } + | expr { ExprStatement $1 } diff --git a/lib/frontend/type.ml b/lib/frontend/type.ml index 89b842e..cdc4842 100644 --- a/lib/frontend/type.ml +++ b/lib/frontend/type.ml @@ -1,5 +1,3 @@ -open Util - module Primitive = struct (** [t] represents a primitive type. *) type t = @@ -24,7 +22,7 @@ type t = | Pointer of t | Var of string | FunctionType of { - params : (string * t) list; + params : t list; return : t; } @@ -35,7 +33,7 @@ let rec to_string = function | Var tvar -> tvar | FunctionType { params; return } -> "(" - ^ (params |> List.map (snd >> to_string) |> String.concat ", ") + ^ (params |> List.map to_string |> String.concat ", ") ^ ") -> " ^ to_string return let int_prim_type = Prim Int diff --git a/lib/user/meta.ml b/lib/user/meta.ml index 59da297..d57edf9 100644 --- a/lib/user/meta.ml +++ b/lib/user/meta.ml @@ -25,9 +25,9 @@ let get : t = authors = [ "Utku Melemeti"; - "Jason Klein"; + "Ethan Uppal"; "Jeffrey Huang"; + "Jason Klein"; "Vijay Shanmugam"; - "Ethan Uppal"; ]; } diff --git a/test/snapshots/type/return0.in b/test/snapshots/type/return0.in new file mode 100644 index 0000000..8a43491 --- /dev/null +++ b/test/snapshots/type/return0.in @@ -0,0 +1,3 @@ +func main() { + return +} diff --git a/test/snapshots/type/return0.out b/test/snapshots/type/return0.out new file mode 100644 index 0000000..97eef96 --- /dev/null +++ b/test/snapshots/type/return0.out @@ -0,0 +1,3 @@ +func main() -> Unit { + return +} diff --git a/test/snapshots/type/return1.in b/test/snapshots/type/return1.in new file mode 100644 index 0000000..14ae60b --- /dev/null +++ b/test/snapshots/type/return1.in @@ -0,0 +1,3 @@ +func main() -> Int { + +} diff --git a/test/snapshots/type/return1.out b/test/snapshots/type/return1.out new file mode 100644 index 0000000..46830c7 --- /dev/null +++ b/test/snapshots/type/return1.out @@ -0,0 +1 @@ +Halting error: function 'main' does not return from all paths diff --git a/test/snapshots/type/return2.in b/test/snapshots/type/return2.in new file mode 100644 index 0000000..5810e01 --- /dev/null +++ b/test/snapshots/type/return2.in @@ -0,0 +1,3 @@ +func main() -> Int { + return +} diff --git a/test/snapshots/type/return2.out b/test/snapshots/type/return2.out new file mode 100644 index 0000000..9cb8108 --- /dev/null +++ b/test/snapshots/type/return2.out @@ -0,0 +1 @@ +Type error: invalid return type: expected Int but received Unit diff --git a/test/snapshots/type/return3.in b/test/snapshots/type/return3.in new file mode 100644 index 0000000..2b311b5 --- /dev/null +++ b/test/snapshots/type/return3.in @@ -0,0 +1,3 @@ +func main() -> Int { + return 0 +} diff --git a/test/snapshots/type/return3.out b/test/snapshots/type/return3.out new file mode 100644 index 0000000..2b311b5 --- /dev/null +++ b/test/snapshots/type/return3.out @@ -0,0 +1,3 @@ +func main() -> Int { + return 0 +} diff --git a/test/snapshots/type/return4.in b/test/snapshots/type/return4.in new file mode 100644 index 0000000..061e409 --- /dev/null +++ b/test/snapshots/type/return4.in @@ -0,0 +1,4 @@ +func main() { + if true { + } +} diff --git a/test/snapshots/type/return4.out b/test/snapshots/type/return4.out new file mode 100644 index 0000000..450eff7 --- /dev/null +++ b/test/snapshots/type/return4.out @@ -0,0 +1,4 @@ +func main() -> Unit { + if true { + } +} diff --git a/test/snapshots/type/return5.in b/test/snapshots/type/return5.in new file mode 100644 index 0000000..b0beaf5 --- /dev/null +++ b/test/snapshots/type/return5.in @@ -0,0 +1,6 @@ +func main() -> Int { + // last statement here is terminal, since there is no 'else' branch we don't have a problem saying it halts + if true { + return 0 + } +} diff --git a/test/snapshots/type/return5.out b/test/snapshots/type/return5.out new file mode 100644 index 0000000..33b5e3d --- /dev/null +++ b/test/snapshots/type/return5.out @@ -0,0 +1,5 @@ +func main() -> Int { + if true { + return 0 + } +}