(** This files contains passes we apply on the AST *before* calling the (concrete/symbolic) interpreter on it *) open Types open Expressions open LlbcAst open Utils open LlbcAstUtils open Errors let log = Logging.pre_passes_log (** Rustc inserts a lot of drops before the assignments. We consider those drops are part of the assignment, and splitting the drop and the assignment is problematic for us because it can introduce [⊥] under borrows. For instance, we encountered situations like the following one: {[ drop( *x ); // Illegal! Inserts a ⊥ under a borrow *x = move ...; ]} Rem.: we don't use this anymore *) let filter_drop_assigns (f : fun_decl) : fun_decl = (* The visitor *) let obj = object (self) inherit [_] map_statement as super method! visit_Sequence env st1 st2 = match (st1.content, st2.content) with | Drop p1, Assign (p2, _) -> if p1 = p2 then (self#visit_statement env st2).content else super#visit_Sequence env st1 st2 | Drop p1, Sequence ({ content = Assign (p2, _); meta = _ }, _) -> if p1 = p2 then (self#visit_statement env st2).content else super#visit_Sequence env st1 st2 | _ -> super#visit_Sequence env st1 st2 end in (* Map *) let body = match f.body with | Some body -> Some { body with body = obj#visit_statement () body.body } | None -> None in { f with body } (** This pass slightly restructures the control-flow to remove the need to merge branches during the symbolic execution in some quite common cases where doing a merge is actually not necessary and leads to an ugly translation. TODO: this is useless For instance, it performs the following transformation: {[ if b { var@0 := &mut *x; } else { var@0 := move y; } return; ~~> if b { var@0 := &mut *x; return; } else { var@0 := move y; return; } ]} This way, the translated body doesn't have an intermediate assignment, for the `if ... then ... else ...` expression (together with a backward function). More precisly, we move (and duplicate) a statement happening after a branching inside the branches if: - this statement ends with [return] or [panic] - this statement is only made of a sequence of nops, assignments (with some restrictions on the rvalue), fake reads, drops (usually, returns will be followed by such statements) *) let remove_useless_cf_merges (crate : crate) (f : fun_decl) : fun_decl = let f0 = f in (* Return [true] if the statement can be moved inside the branches of a switch. * * [must_end_with_exit]: we need this boolean because the inner statements * (inside the encountered sequences) don't need to end with [return] or [panic], * but all the paths inside the whole statement have to. * *) let rec can_be_moved_aux (must_end_with_exit : bool) (st : statement) : bool = match st.content with | SetDiscriminant _ | Assert _ | Call _ | Break _ | Continue _ | Switch _ | Loop _ -> false | Assign (_, rv) -> ( match rv with | Use _ | RvRef _ -> not must_end_with_exit | Aggregate (AggregatedAdt (TTuple, _, _), []) -> not must_end_with_exit | _ -> false) | FakeRead _ | Drop _ | Nop -> not must_end_with_exit | Panic | Return -> true | Sequence (st1, st2) -> can_be_moved_aux false st1 && can_be_moved_aux must_end_with_exit st2 in let can_be_moved = can_be_moved_aux true in (* The visitor *) let obj = object inherit [_] map_statement as super method! visit_Sequence env st1 st2 = match st1.content with | Switch switch -> if can_be_moved st2 then super#visit_Switch env (chain_statements_in_switch switch st2) else super#visit_Sequence env st1 st2 | _ -> super#visit_Sequence env st1 st2 end in (* Map *) let body = match f.body with | Some body -> Some { body with body = obj#visit_statement () body.body } | None -> None in let f = { f with body } in log#ldebug (lazy ("Before/after [remove_useless_cf_merges]:\n" ^ Print.Crate.crate_fun_decl_to_string crate f0 ^ "\n\n" ^ Print.Crate.crate_fun_decl_to_string crate f ^ "\n")); f (** This pass restructures the control-flow by inserting all the statements which occur after loops *inside* the loops, thus removing the need to have breaks (we later check that we removed all the breaks). This is needed because of the way we perform the symbolic execution on the loops for now. Rem.: we check that there are no nested loops (all the breaks must break to the first outer loop, and the statements we insert inside the loops mustn't contain breaks themselves). For instance, it performs the following transformation: {[ loop { if b { ... continue 0; } else { ... break 0; } }; x := x + 1; return; ~~> loop { if b { ... continue 0; } else { ... x := x + 1; return; } }; ]} *) let remove_loop_breaks (crate : crate) (f : fun_decl) : fun_decl = let f0 = f in (* Check that a statement doesn't contain loops, breaks or continues *) let statement_has_no_loop_break_continue (st : statement) : bool = let obj = object inherit [_] iter_statement method! visit_Loop _ _ = raise Found method! visit_Break _ _ = raise Found method! visit_Continue _ _ = raise Found end in try obj#visit_statement () st; true with Found -> false in (* Replace a break statement with another statement (we check that the break statement breaks exactly one level, and that there are no nested loops. *) let replace_breaks_with (st : statement) (nst : statement) : statement = let obj = object inherit [_] map_statement as super method! visit_statement entered_loop st = match st.content with | Loop loop -> cassert __FILE__ __LINE__ (not entered_loop) st.meta "Nested loops are not supported yet"; { st with content = super#visit_Loop true loop } | Break i -> cassert __FILE__ __LINE__ (i = 0) st.meta "Breaks to outer loops are not supported yet"; { st with content = nst.content } | _ -> super#visit_statement entered_loop st end in obj#visit_statement false st in (* The visitor *) let obj = object inherit [_] map_statement as super method! visit_Sequence env st1 st2 = match st1.content with | Loop _ -> cassert __FILE__ __LINE__ (statement_has_no_loop_break_continue st2) st2.meta "Sequences of loops are not supported yet"; (replace_breaks_with st1 st2).content | _ -> super#visit_Sequence env st1 st2 end in (* Map *) let body = match f.body with | Some body -> Some { body with body = obj#visit_statement () body.body } | None -> None in let f = { f with body } in log#ldebug (lazy ("Before/after [remove_loop_breaks]:\n" ^ Print.Crate.crate_fun_decl_to_string crate f0 ^ "\n\n" ^ Print.Crate.crate_fun_decl_to_string crate f ^ "\n")); f (** Remove the use of shallow borrows from a function. In theory, this allows the code to do more things than what Rust allows, and in particular it would allow to modify the variant of an enumeration in a guard, while matching over this enumeration. In practice, this is not a soundness issue. **Soundness**: ============== For instance, let's consider the following Rust code: {[ match ls : &mut List<u32> { Nil => return None, Cons(hd, tl) if *hd > 0 => return Some(hd), Cons(hd, tl) => ..., } ]} The Rust compiler enforces the fact that the guard doesn't modify the variant of [ls]. It does so by compiling to (approximately) the following MIR code: {[ let d = discriminant( *ls); switch d { 0 => ... // Nil case 1 => { // Cons case // Introduce hd and tl hd := &mut ( *ls as Cons).0; tl := &mut ( *ls as Cons).1; // Evaluate the guard tmp := &shallow *ls; // Create a shallow borrow of ls b := *hd > 0; fake_read(tmp); // Make sure the shallow borrow lives until the end of the guard // We evaluated the guard: go to the proper branch if b then { ... // First Cons branch } else { ... // Second Cons branch } } } ]} Shallow borrows are a bit like shared borrows but with the following difference: - they do forbid modifying the value directly below the loan - but they allow modifying a strict subvalue For instance, above, for as long as [tmp] lives: - we can't change the variant of [*ls] - but we can update [hd] and [tl] On our side, we have to pay attention to two things: - Removing shallow borrows don't modify the behavior of the program. In practice, adding shallow borrows can lead to a MIR program being rejected, but it doesn't change this program's behavior. Regarding this, there is something important. At the top-level AST, if the guard modifies the variant (say, to [Nil]) and evaluates to [false], then we go to the second [Cons] branch, which doesn't really make sense (though it is not a soundness issue - for soundness, see next point). At the level of MIR, as the match has been desugared, there is no issue in modifying the variant of the scrutinee. - We have to make sure the evaluation in sound. In particular, modifying the variant of [*ls] should invalidate [hd] and [tl]. This is important for the Rust compiler to enforce this on its side. In the case of LLBC, we don't need additional constraints because modifying [*ls] will indeed invalidate [hd] and [tl]. More specifically, at the beginning of the [Cons] branch and just after we introduced [hd] and [tl] we have the following environment: {[ ... // l0 comes from somewhere - we omit the corresponding loan ls -> MB l0 (Cons (ML l1) (ML l2)) hd -> MB l1 s1 tl -> MB l2 s2 ]} If we evaluate: [*ls := Nil], we get: {[ ... // l0 comes from somewhere - we omit the corresponding loan ls -> MB l0 Nil hd -> ⊥ // invalidated tl -> ⊥ // invalidated ]} **Implementation**: =================== The pass is implemented as follows: - we look for all the variables which appear in pattern of the following shape and remove them: {[ let x = &shallow ...; ... ]} - whenever we find such a variable [x], we remove all the subsequent occurrences of [fake_read(x)]. We then check that [x] completely disappeared from the function body (for sanity). *) let remove_shallow_borrows (crate : crate) (f : fun_decl) : fun_decl = let f0 = f in let filter_in_body (body : statement) : statement = let filtered = ref VarId.Set.empty in let filter_visitor = object inherit [_] map_statement as super method! visit_Assign env p rv = match (p.projection, rv) with | [], RvRef (_, BShallow) -> (* Filter *) filtered := VarId.Set.add p.var_id !filtered; Nop | _ -> (* Don't filter *) super#visit_Assign env p rv method! visit_FakeRead env p = if p.projection = [] && VarId.Set.mem p.var_id !filtered then (* Filter *) Nop else super#visit_FakeRead env p end in (* Filter the variables *) let body = filter_visitor#visit_statement () body in (* Check that the filtered variables completely disappeared from the body *) let check_visitor = object inherit [_] iter_statement as super (* Remember the span of the statement we enter *) method! visit_statement _ st = super#visit_statement st.meta st method! visit_var_id meta id = cassert __FILE__ __LINE__ (not (VarId.Set.mem id !filtered)) meta "Filtered variables should have completely disappeared from the \ body" end in check_visitor#visit_statement body.meta body; (* Return the updated body *) body in let body = match f.body with | None -> None | Some body -> Some { body with body = filter_in_body body.body } in let f = { f with body } in log#ldebug (lazy ("Before/after [remove_shallow_borrows]:\n" ^ Print.Crate.crate_fun_decl_to_string crate f0 ^ "\n\n" ^ Print.Crate.crate_fun_decl_to_string crate f ^ "\n")); f let apply_passes (crate : crate) : crate = let passes = [ remove_loop_breaks crate; remove_shallow_borrows crate ] in (* Attempt to apply a pass: if it fails we replace the body by [None] *) let apply_pass (pass : fun_decl -> fun_decl) (f : fun_decl) = try pass f with CFailure (_, _) -> (* The error was already registered, we don't need to register it twice. However, we replace the body of the function, and save an error to report to the user the fact that we will ignore the function body *) let fmt = Print.Crate.crate_to_fmt_env crate in let name = Print.name_to_string fmt f.name in save_error __FILE__ __LINE__ (Some f.item_meta.meta) ("Ignoring the body of '" ^ name ^ "' because of previous error"); { f with body = None } in let fun_decls = List.fold_left (fun fl pass -> FunDeclId.Map.map (apply_pass pass) fl) crate.fun_decls passes in let crate = { crate with fun_decls } in log#ldebug (lazy ("After pre-passes:\n" ^ Print.Crate.crate_to_string crate ^ "\n")); crate