From 7870b9f816b095164d89a7ea07a9bc29bf8af875 Mon Sep 17 00:00:00 2001 From: Son Ho Date: Tue, 25 Jan 2022 09:38:01 +0100 Subject: Implement SymbolicToPure.translate_fun_sig --- src/Collections.ml | 24 +++++ src/Interpreter.ml | 16 ++-- src/Pure.ml | 88 ++++++++++++++--- src/SymbolicAst.ml | 12 ++- src/SymbolicToPure.ml | 260 +++++++++++++++++++++++++++++++++++++++++++------- src/main.ml | 3 +- 6 files changed, 344 insertions(+), 59 deletions(-) diff --git a/src/Collections.ml b/src/Collections.ml index ee998546..ee088a9d 100644 --- a/src/Collections.ml +++ b/src/Collections.ml @@ -61,6 +61,10 @@ end module type Map = sig include Map.S + val add_list : (key * 'a) list -> 'a t -> 'a t + + val of_list : (key * 'a) list -> 'a t + val to_string : string option -> ('a -> string) -> 'a t -> string (** "Simple" pretty printing function. @@ -80,6 +84,10 @@ module MakeMap (Ord : OrderedType) : Map with type key = Ord.t = struct module Map = Map.Make (Ord) include Map + let add_list bl m = List.fold_left (fun s (key, e) -> add key e s) m bl + + let of_list bl = add_list bl empty + let to_string indent_opt a_to_string m = let indent, break = match indent_opt with Some indent -> (indent, "\n") | None -> ("", " ") @@ -121,6 +129,10 @@ end module type Set = sig include Set.S + val add_list : elt list -> t -> t + + val of_list : elt list -> t + val to_string : string option -> t -> string (** "Simple" pretty printing function. @@ -140,6 +152,10 @@ module MakeSet (Ord : OrderedType) : Set with type elt = Ord.t = struct module Set = Set.Make (Ord) include Set + let add_list bl s = List.fold_left (fun s e -> add e s) s bl + + let of_list bl = add_list bl empty + let to_string indent_opt m = let indent, break = match indent_opt with Some indent -> (indent, "\n") | None -> ("", " ") @@ -246,6 +262,10 @@ module type MapInj = sig val add_seq : (key * elem) Seq.t -> t -> t val of_seq : (key * elem) Seq.t -> t + + val add_list : (key * elem) list -> t -> t + + val of_list : (key * elem) list -> t end (** See [MapInj] *) @@ -361,4 +381,8 @@ module MakeMapInj (Key : OrderedType) (Elem : OrderedType) : add_seq s m let of_seq s = add_seq s empty + + let add_list ls m = List.fold_left (fun m (key, elem) -> add key elem m) m ls + + let of_list ls = add_list ls empty end diff --git a/src/Interpreter.ml b/src/Interpreter.ml index 2c4d86d2..a9a44b0f 100644 --- a/src/Interpreter.ml +++ b/src/Interpreter.ml @@ -2,6 +2,7 @@ module L = Logging module T = Types module A = CfimAst module M = Modules +module SA = SymbolicAst open Cps open InterpreterUtils open InterpreterStatements @@ -170,8 +171,8 @@ module Test = struct List.iter test_unit_fun unit_funs (** Execute the symbolic interpreter on a function. *) - let test_function_symbolic (config : C.partial_config) (m : M.cfim_module) - (fid : A.FunDefId.id) : unit = + let test_function_symbolic (config : C.partial_config) (synthesize : bool) + (m : M.cfim_module) (fid : A.FunDefId.id) : unit = (* Retrieve the function declaration *) let fdef = A.FunDefId.nth m.functions fid in @@ -185,10 +186,11 @@ module Test = struct (* Create the continuation to check the function's result *) let cf_check res _ = match res with - | Return | Panic -> + | Return -> if synthesize then raise Errors.Unimplemented else None + | Panic -> (* Note that as we explore all the execution branches, one of * the executions can lead to a panic *) - None + if synthesize then Some SA.Panic else None | _ -> failwith ("Unit test failed (symbolic execution) on: " @@ -205,8 +207,8 @@ module Test = struct TODO: for now we ignore the functions which contain loops, because they are not supported by the symbolic interpreter. *) - let test_functions_symbolic (config : C.partial_config) (m : M.cfim_module) : - unit = + let test_functions_symbolic (config : C.partial_config) (synthesize : bool) + (m : M.cfim_module) : unit = let no_loop_funs = List.filter (fun f -> not (CfimAstUtils.fun_def_has_loops f)) m.functions in @@ -214,7 +216,7 @@ module Test = struct (* Execute the function - note that as the symbolic interpreter explores * all the path, some executions are expected to "panic": we thus don't * check the return value *) - test_function_symbolic config m def.A.def_id + test_function_symbolic config synthesize m def.A.def_id in List.iter test_fun no_loop_funs end diff --git a/src/Pure.ml b/src/Pure.ml index fcf8e6f0..6549d3fa 100644 --- a/src/Pure.ml +++ b/src/Pure.ml @@ -9,6 +9,7 @@ module RegionId = T.RegionId module VariantId = T.VariantId module FieldId = T.FieldId module SymbolicValueId = V.SymbolicValueId +module FunDefId = A.FunDefId module SynthPhaseId = IdGen () (** We give an identifier to every phase of the synthesis (forward, backward @@ -17,6 +18,9 @@ module SynthPhaseId = IdGen () module BackwardFunId = IdGen () (** Every backward function has its identifier *) +module VarId = IdGen () +(** Pay attention to the fact that we also define a [VarId] module in Values *) + type ty = | Adt of T.type_id * ty list (** [Adt] encodes ADTs, tuples and assumed types. @@ -26,7 +30,6 @@ type ty = | TypeVar of TypeVarId.id | Bool | Char - | Never | Integer of T.integer_type | Str | Array of ty (* TODO: there should be a constant with the array *) @@ -64,10 +67,7 @@ type symbolic_value = { *) } -type value = - | Concrete of constant_value - | Adt of adt_value - | Symbolic of symbolic_value +type value = Concrete of constant_value | Adt of adt_value and adt_value = { variant_id : (VariantId.id option[@opaque]); @@ -76,13 +76,27 @@ and adt_value = { and typed_value = { value : value; ty : ty } +type var = { id : VarId.id; ty : ty } +(** Because we introduce a lot of temporary variables, the list of variables + is not fixed: we thus must carry all its information with the variable + itself + *) + +(** Sometimes, when introducing several variables in an assignment (because + deconstructing a tuple) we can ignore some of the values: in such situation + we introduce dummy variables (extracted to "_"). + *) +type var_or_dummy = Var of var | Dummy + type projection_elem = { pkind : E.field_proj_kind; field_id : FieldId.id } type projection = projection_elem list -type operand = typed_value +type place = { var : VarId.id; projection : projection } + +type operand = Value of typed_value | Place of place -type assertion = { cond : operand; expected : bool } +(* type assertion = { cond : operand; expected : bool } *) type fun_id = | Local of A.FunDefId.id * BackwardFunId.id option @@ -94,20 +108,45 @@ type fun_id = type call = { func : fun_id; type_params : ty list; args : operand list } -type let_bindings = { call : call; bindings : symbolic_value list } +type left_value = unit +(** TODO: assignment left value *) + +type let_bindings = + | Call of var_or_dummy list * call + (** The called function and the tuple of returned values *) + | Assignment of var * place + (** Variable assignment: the introduced variable and the place we read *) + | ExpandEnum of var_or_dummy list * TypeDefId.id * VariantId.id * place + (** When expanding an enumeration with exactly one variant, we first + introduce something like this (with [ExpandEnum]): + ``` + let Cons x tl = ls in + ... + ``` + + Later, depending on the language we extract to, we can eventually + update it to something like this (for F*, for instance): + ``` + let x = Cons?.v ls in + let tl = Cons?.tl ls in + ... + ``` + + Note that we prefer not handling this case through a match. + TODO: actually why not encoding it as a match internally, then + generating the `let Cons ... = ... in ...` if we check there + is exactly one variant?... + *) (** **Rk.:** here, [expression] is not at all equivalent to the expressions used in CFIM. They are lambda-calculus expressions, and are thus actually more general than the CFIM statements, in a sense. *) type expression = + | Return of operand + | Panic | Let of let_bindings * expression (** Let bindings include the let-bindings introduced because of function calls *) - | Assert of assertion - | Return of typed_value - | Panic of SynthPhaseId.id - | Nop - | Sequence of expression * expression | Switch of operand * switch_body and switch_body = @@ -120,3 +159,26 @@ and match_branch = { vars : symbolic_value list; branch : expression; } + +type fun_sig = { + type_params : type_var list; + inputs : ty list; + outputs : ty list; + (** The list of outputs. + + In case of a forward function, the list will have length = 1. + However, in case of backward function, the list may have length > 1. + If the length is > 1, it gets extracted to a tuple type. Followingly, + we could not use a list because we can encode tuples, but here we + want to account for the fact that we immediately deconstruct the tuple + upon calling the backward function (because the backward function is + called to update a set of values in the environment). + *) +} + +type fun_def = { + def_id : FunDefId.id; + name : name; + signature : fun_sig; + body : expression; +} diff --git a/src/SymbolicAst.ml b/src/SymbolicAst.ml index fc2b2fc4..f1939802 100644 --- a/src/SymbolicAst.ml +++ b/src/SymbolicAst.ml @@ -23,8 +23,14 @@ type call = { dest : V.symbolic_value; } +(** **Rk.:** here, [expression] is not at all equivalent to the expressions + used in CFIM: they are a first step towards lambda-calculus expressions. + *) type expression = - | Return + | Return of V.typed_value + (** The typed value stored in [Return] is the value contained in the return + variable upon returning + *) | Panic | FunCall of call * expression | EndAbstraction of V.abs * expression @@ -34,8 +40,8 @@ type expression = and expansion = | ExpandNoBranch of V.symbolic_expansion * expression (** A symbolic expansion which doesn't generate a branching. - Includes: expansion of borrows, structures, enumerations with one - variants... *) + Includes: expansion of borrows, structures, enumerations with + exactly one variant... *) | ExpandEnum of (T.VariantId.id option * V.symbolic_value list * expression) list (** A symbolic expansion of an ADT value which leads to branching (i.e., diff --git a/src/SymbolicToPure.ml b/src/SymbolicToPure.ml index 19fa85b3..69c22b09 100644 --- a/src/SymbolicToPure.ml +++ b/src/SymbolicToPure.ml @@ -6,8 +6,10 @@ module E = Expressions module A = CfimAst module M = Modules module S = SymbolicAst +module TA = TypesAnalysis open Pure +(** TODO: move this, it is not useful for symbolic -> pure *) type name = | FunName of A.FunDefId.id * V.BackwardFunctionId.id option | TypeName of T.TypeDefId.id @@ -38,8 +40,8 @@ module NameMap = Collections.MakeMapInj (NameOrderedType) (Id.NameOrderedType) generation with collision in mind, it is always good to have such checks. *) -let fun_to_name (fdef : A.fun_def) (bid : V.BackwardFunctionId.id option) : - Id.name = +let translate_fun_name (fdef : A.fun_def) (bid : V.BackwardFunctionId.id option) + : Id.name = let sg = fdef.signature in (* General function to generate a suffix for a region group * (i.e., an abstraction)*) @@ -93,22 +95,26 @@ let fun_to_name (fdef : A.fun_def) (bid : V.BackwardFunctionId.id option) : add_to_last fdef.name (** Generates a name for a type (simply reuses the name in the definition) *) -let type_to_name (def : T.type_def) : Id.name = def.T.name +let translate_type_name (def : T.type_def) : Id.name = def.T.name type type_context = { type_defs : type_def TypeDefId.Map.t } -type fun_context = unit -(** TODO *) +type fun_context = { fun_defs : fun_def FunDefId.Map.t } +(* TODO: do we really need that actually? *) type synth_ctx = { names : NameMap.t; + (* TODO: remove? *) type_context : type_context; fun_context : fun_context; declarations : M.declaration_group list; } -let rec translate_sty (ctx : synth_ctx) (ty : T.sty) : ty = - let translate = translate_sty ctx in +type bs_ctx = { types_infos : TA.type_infos } +(** Body synthesis context *) + +let rec translate_sty (ty : T.sty) : ty = + let translate = translate_sty in match ty with | T.Adt (type_id, regions, tys) -> (* Can't translate types with regions for now *) @@ -125,52 +131,236 @@ let rec translate_sty (ctx : synth_ctx) (ty : T.sty) : ty = | Slice ty -> Slice (translate ty) | Ref (_, rty, _) -> translate rty -let translate_field (ctx : synth_ctx) (f : T.field) : field = +let translate_field (f : T.field) : field = let field_name = f.field_name in - let field_ty = translate_sty ctx f.field_ty in + let field_ty = translate_sty f.field_ty in { field_name; field_ty } -let translate_fields (ctx : synth_ctx) (fl : T.field list) : field list = - List.map (translate_field ctx) fl +let translate_fields (fl : T.field list) : field list = + List.map translate_field fl -let translate_variant (ctx : synth_ctx) (v : T.variant) : variant = +let translate_variant (v : T.variant) : variant = let variant_name = v.variant_name in - let fields = translate_fields ctx v.fields in + let fields = translate_fields v.fields in { variant_name; fields } -let translate_variants (ctx : synth_ctx) (vl : T.variant list) : variant list = - List.map (translate_variant ctx) vl +let translate_variants (vl : T.variant list) : variant list = + List.map translate_variant vl (** Translate a type def kind to IM *) -let translate_type_def_kind (ctx : synth_ctx) (kind : T.type_def_kind) : - type_def_kind = +let translate_type_def_kind (kind : T.type_def_kind) : type_def_kind = match kind with - | T.Struct fields -> Struct (translate_fields ctx fields) - | T.Enum variants -> Enum (translate_variants ctx variants) + | T.Struct fields -> Struct (translate_fields fields) + | T.Enum variants -> Enum (translate_variants variants) (** Translate a type definition from IM TODO: this is not symbolic to pure but IM to pure. Still, I don't see the point of moving this definition for now. *) -let translate_type_def (ctx : synth_ctx) (def : T.type_def) : - synth_ctx * type_def = +let translate_type_def (def : T.type_def) : type_def = (* Translate *) let def_id = def.T.def_id in - let name = type_to_name def in + let name = translate_type_name def in (* Can't translate types with regions for now *) assert (def.region_params = []); let type_params = def.type_params in - let kind = translate_type_def_kind ctx def.T.kind in - let def = { def_id; name; type_params; kind } in - (* Insert in the context *) - (* type context *) - let type_context = ctx.type_context in - let type_defs = type_context.type_defs in - let type_defs = TypeDefId.Map.add def_id def type_defs in - let type_context = { type_defs } in - (* names map *) - let names = NameMap.add (TypeName def_id) name ctx.names in - (* update the fields *) - let ctx = { ctx with type_context; names } in - (ctx, def) + let kind = translate_type_def_kind def.T.kind in + { def_id; name; type_params; kind } + +(** Translate a type, seen as an input/output of a forward function + (preserve all borrows, etc.) +*) + +let rec translate_fwd_ty (ctx : bs_ctx) (ty : 'r T.ty) : ty = + let translate = translate_fwd_ty ctx in + match ty with + | T.Adt (type_id, regions, tys) -> + (* Can't translate types with regions for now *) + assert (regions = []); + (* No general parametricity for now *) + assert (not (List.exists (TypesUtils.ty_has_borrows ctx.types_infos) tys)); + (* Translate the type parameters *) + let tys = List.map translate tys in + Adt (type_id, tys) + | TypeVar vid -> TypeVar vid + | Bool -> Bool + | Char -> Char + | Never -> failwith "Unreachable" + | Integer int_ty -> Integer int_ty + | Str -> Str + | Array ty -> + assert (not (TypesUtils.ty_has_borrows ctx.types_infos ty)); + Array (translate ty) + | Slice ty -> + assert (not (TypesUtils.ty_has_borrows ctx.types_infos ty)); + Slice (translate ty) + | Ref (_, rty, _) -> translate rty + +(** Translate a type, when some regions may have ended. + + We return an option, because the translated type may be empty. + + [inside_mut]: are we inside a mutable borrow? + *) +let rec translate_back_ty (ctx : bs_ctx) (keep_region : 'r -> bool) + (inside_mut : bool) (ty : 'r T.ty) : ty option = + let translate = translate_back_ty ctx keep_region inside_mut in + (* A small helper for "leave" types *) + let wrap ty = if inside_mut then Some ty else None in + match ty with + | T.Adt (type_id, regions, tys) -> ( + match type_id with + | T.AdtId _ | Assumed _ -> + (* Don't accept ADTs (which are not tuples) with borrows for now *) + assert (not (TypesUtils.ty_has_borrows ctx.types_infos ty)); + None + | T.Tuple -> ( + (* Tuples can contain borrows (which we eliminated) *) + let tys_t = List.filter_map translate tys in + match tys_t with [] -> None | _ -> Some (Adt (T.Tuple, tys_t)))) + | TypeVar vid -> wrap (TypeVar vid) + | Bool -> wrap Bool + | Char -> wrap Char + | Never -> failwith "Unreachable" + | Integer int_ty -> wrap (Integer int_ty) + | Str -> wrap Str + | Array ty -> ( + assert (not (TypesUtils.ty_has_borrows ctx.types_infos ty)); + match translate ty with None -> None | Some ty -> Some (Array ty)) + | Slice ty -> ( + assert (not (TypesUtils.ty_has_borrows ctx.types_infos ty)); + match translate ty with None -> None | Some ty -> Some (Slice ty)) + | Ref (r, rty, rkind) -> ( + match rkind with + | T.Shared -> + (* Ignore shared references, unless we are below a mutable borrow *) + if inside_mut then translate rty else None + | T.Mut -> + (* Dive in, remembering the fact that we are inside a mutable borrow *) + let inside_mut = true in + if keep_region r then translate_back_ty ctx keep_region inside_mut rty + else None) + +(** Small utility: list the transitive parents of a region var group. + We don't do that in an efficient manner, but it doesn't matter. + *) +let rec list_parent_region_groups (def : A.fun_def) (gid : T.RegionGroupId.id) : + T.RegionGroupId.Set.t = + let rg = T.RegionGroupId.nth def.signature.regions_hierarchy gid in + let parents = + List.fold_left + (fun s gid -> + (* Compute the parents *) + let parents = list_parent_region_groups def gid in + (* Parents U current region *) + let parents = T.RegionGroupId.Set.add gid parents in + (* Make the union with the accumulator *) + T.RegionGroupId.Set.union s parents) + T.RegionGroupId.Set.empty rg.parents + in + parents + +let translate_fun_sig (ctx : bs_ctx) (def : A.fun_def) + (bid : V.BackwardFunctionId.id option) : fun_sig = + let sg = def.signature in + (* Retrieve the list of parent backward functions *) + let gid, parents = + match bid with + | None -> (None, T.RegionGroupId.Set.empty) + | Some bid -> + let gid = T.RegionGroupId.of_int (V.BackwardFunctionId.to_int bid) in + let parents = list_parent_region_groups def gid in + (Some gid, parents) + in + (* List the inputs for: + * - the forward function + * - the parent backward functions, in proper order + * - the current backward function (if it is a backward function) + *) + let fwd_inputs = List.map (translate_fwd_ty ctx) sg.inputs in + (* For the backward functions: for now we don't supported nested borrows, + * so just check that there aren't parent regions *) + assert (T.RegionGroupId.Set.is_empty parents); + (* Small helper to translate types for backward functions *) + let translate_back_ty_for_gid (gid : T.RegionGroupId.id) : T.sty -> ty option + = + let rg = T.RegionGroupId.nth sg.regions_hierarchy gid in + let regions = T.RegionVarId.Set.of_list rg.regions in + let keep_region r = + match r with + | T.Static -> raise Unimplemented + | T.Var r -> T.RegionVarId.Set.mem r regions + in + let inside_mut = false in + translate_back_ty ctx keep_region inside_mut + in + (* Compute the additinal inputs for the current function, if it is a backward + * function *) + let back_inputs = + match gid with + | None -> [] + | Some gid -> + (* For now, we don't allow nested borrows, so the additional inputs to the + * backward function can only come from borrows that were returned like + * in (for the backward function we introduce for 'a): + * ``` + * fn f<'a>(...) -> &'a mut u32; + * ``` + * Upon ending the abstraction for 'a, we need to get back the borrow + * the function returned. + *) + List.filter_map (translate_back_ty_for_gid gid) [ sg.output ] + in + let inputs = List.append fwd_inputs back_inputs in + (* Outputs *) + let outputs : ty list = + match gid with + | None -> + (* This is a forward function: there is one output *) + [ translate_fwd_ty ctx sg.output ] + | Some gid -> + (* This is a backward function: there might be several outputs. + * The outputs are the borrows inside the regions of the abstractions + * and which are present in the input values. For instance, see: + * ``` + * fn f<'a>(x : 'a mut u32) -> ...; + * ``` + * Upon ending the abstraction for 'a, we give back the borrow which + * was consumed through the `x` parameter. + *) + List.filter_map (translate_back_ty_for_gid gid) sg.inputs + in + (* Type parameters *) + let type_params = sg.type_params in + (* Return *) + { type_params; inputs; outputs } + +let translate_typed_value (v : V.typed_value) (ctx : bs_ctx) : + bs_ctx * typed_value = + raise Unimplemented + +let rec translate_expression (def : A.fun_def) + (bid : V.BackwardFunctionId.id option) (body : S.expression) (ctx : bs_ctx) + : expression = + match body with + | S.Return v -> + let _, v = translate_typed_value v ctx in + Return (Value v) + | Panic -> Panic + | FunCall (call, e) -> raise Unimplemented + | EndAbstraction (abs, e) -> raise Unimplemented + | Expansion (sv, exp) -> raise Unimplemented + | Meta (_, e) -> + (* We ignore the meta information *) + translate_expression def bid e ctx + +let translate_fun_def (types_infos : TA.type_infos) (def : A.fun_def) + (bid : V.BackwardFunctionId.id option) (body : S.expression) : fun_def = + let bs_ctx = { types_infos } in + (* Translate the function *) + let def_id = def.A.def_id in + let name = translate_fun_name def bid in + let signature = translate_fun_sig bs_ctx def bid in + let body = translate_expression def bid body bs_ctx in + { def_id; name; signature; body } diff --git a/src/main.ml b/src/main.ml index 67db362e..492470c7 100644 --- a/src/main.ml +++ b/src/main.ml @@ -77,4 +77,5 @@ let () = I.Test.test_unit_functions config m; (* Evaluate the symbolic interpreter on the functions *) - I.Test.test_functions_symbolic config m + let synthesize = true in + I.Test.test_functions_symbolic config synthesize m -- cgit v1.2.3