summaryrefslogtreecommitdiff
path: root/compiler/PrePasses.ml
blob: a46ef79ce7aa5c2e24ce6dbf4c64f1c0224467ad (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
(** This files contains passes we apply on the AST *before* calling the
    (concrete/symbolic) interpreter on it
 *)

open Types
open Expressions
open LlbcAst
open Utils
open LlbcAstUtils
open Errors

let log = Logging.pre_passes_log

(** Rustc inserts a lot of drops before the assignments.

    We consider those drops are part of the assignment, and splitting the
    drop and the assignment is problematic for us because it can introduce
    [⊥] under borrows. For instance, we encountered situations like the
    following one:
    
    {[
      drop( *x ); // Illegal! Inserts a ⊥ under a borrow
      *x = move ...;
    ]}

    Rem.: we don't use this anymore
 *)
let filter_drop_assigns (f : fun_decl) : fun_decl =
  (* The visitor *)
  let obj =
    object (self)
      inherit [_] map_statement as super

      method! visit_Sequence env st1 st2 =
        match (st1.content, st2.content) with
        | Drop p1, Assign (p2, _) ->
            if p1 = p2 then (self#visit_statement env st2).content
            else super#visit_Sequence env st1 st2
        | Drop p1, Sequence ({ content = Assign (p2, _); meta = _ }, _) ->
            if p1 = p2 then (self#visit_statement env st2).content
            else super#visit_Sequence env st1 st2
        | _ -> super#visit_Sequence env st1 st2
    end
  in
  (* Map  *)
  let body =
    match f.body with
    | Some body -> Some { body with body = obj#visit_statement () body.body }
    | None -> None
  in
  { f with body }

(** This pass slightly restructures the control-flow to remove the need to
    merge branches during the symbolic execution in some quite common cases
    where doing a merge is actually not necessary and leads to an ugly translation.

    TODO: this is useless

    For instance, it performs the following transformation:
    {[
      if b {
          var@0 := &mut *x;
      }
      else {
          var@0 := move y;
      }
      return;

      ~~>

      if b {
          var@0 := &mut *x;
          return;
      }
      else {
          var@0 := move y;
          return;
      }
    ]}

    This way, the translated body doesn't have an intermediate assignment,
    for the `if ... then ... else ...` expression (together with a backward
    function).

    More precisly, we move (and duplicate) a statement happening after a branching
    inside the branches if:
    - this statement ends with [return] or [panic]
    - this statement is only made of a sequence of nops, assignments (with some
      restrictions on the rvalue), fake reads, drops (usually, returns will be
      followed by such statements)
 *)
let remove_useless_cf_merges (crate : crate) (f : fun_decl) : fun_decl =
  let f0 = f in
  (* Return [true] if the statement can be moved inside the branches of a switch.
   *
   * [must_end_with_exit]: we need this boolean because the inner statements
   * (inside the encountered sequences) don't need to end with [return] or [panic],
   * but all the paths inside the whole statement have to.
   * *)
  let rec can_be_moved_aux (must_end_with_exit : bool) (st : statement) : bool =
    match st.content with
    | SetDiscriminant _ | Assert _ | Call _ | Break _ | Continue _ | Switch _
    | Loop _ ->
        false
    | Assign (_, rv) -> (
        match rv with
        | Use _ | RvRef _ -> not must_end_with_exit
        | Aggregate (AggregatedAdt (TTuple, _, _), []) -> not must_end_with_exit
        | _ -> false)
    | FakeRead _ | Drop _ | Nop -> not must_end_with_exit
    | Panic | Return -> true
    | Sequence (st1, st2) ->
        can_be_moved_aux false st1 && can_be_moved_aux must_end_with_exit st2
  in
  let can_be_moved = can_be_moved_aux true in

  (* The visitor *)
  let obj =
    object
      inherit [_] map_statement as super

      method! visit_Sequence env st1 st2 =
        match st1.content with
        | Switch switch ->
            if can_be_moved st2 then
              super#visit_Switch env (chain_statements_in_switch switch st2)
            else super#visit_Sequence env st1 st2
        | _ -> super#visit_Sequence env st1 st2
    end
  in

  (* Map  *)
  let body =
    match f.body with
    | Some body -> Some { body with body = obj#visit_statement () body.body }
    | None -> None
  in
  let f = { f with body } in
  log#ldebug
    (lazy
      ("Before/after [remove_useless_cf_merges]:\n"
      ^ Print.Crate.crate_fun_decl_to_string crate f0
      ^ "\n\n"
      ^ Print.Crate.crate_fun_decl_to_string crate f
      ^ "\n"));
  f

(** This pass restructures the control-flow by inserting all the statements
    which occur after loops *inside* the loops, thus removing the need to
    have breaks (we later check that we removed all the breaks).

    This is needed because of the way we perform the symbolic execution
    on the loops for now.

    Rem.: we check that there are no nested loops (all the breaks must break
    to the first outer loop, and the statements we insert inside the loops
    mustn't contain breaks themselves).

    For instance, it performs the following transformation:
    {[
      loop {
        if b {
          ...
          continue 0;
        }
        else {
          ...
          break 0;
        }
      };
      x := x + 1;
      return;

      ~~>

      loop {
        if b {
          ...
          continue 0;
        }
        else {
          ...
          x := x + 1;
          return;
        }
      };
    ]}
 *)
let remove_loop_breaks (crate : crate) (f : fun_decl) : fun_decl =
  let f0 = f in

  (* Check that a statement doesn't contain loops, breaks or continues *)
  let statement_has_no_loop_break_continue (st : statement) : bool =
    let obj =
      object
        inherit [_] iter_statement
        method! visit_Loop _ _ = raise Found
        method! visit_Break _ _ = raise Found
        method! visit_Continue _ _ = raise Found
      end
    in
    try
      obj#visit_statement () st;
      true
    with Found -> false
  in

  (* Replace a break statement with another statement (we check that the
     break statement breaks exactly one level, and that there are no nested
     loops.
  *)
  let replace_breaks_with (st : statement) (nst : statement) : statement =
    let obj =
      object
        inherit [_] map_statement as super

        method! visit_statement entered_loop st =
          match st.content with
          | Loop loop ->
              cassert __FILE__ __LINE__ (not entered_loop) st.meta
                "Nested loops are not supported yet";
              { st with content = super#visit_Loop true loop }
          | Break i ->
              cassert __FILE__ __LINE__ (i = 0) st.meta
                "Breaks to outer loops are not supported yet";
              { st with content = nst.content }
          | _ -> super#visit_statement entered_loop st
      end
    in
    obj#visit_statement false st
  in

  (* The visitor *)
  let obj =
    object
      inherit [_] map_statement as super

      method! visit_Sequence env st1 st2 =
        match st1.content with
        | Loop _ ->
            cassert __FILE__ __LINE__
              (statement_has_no_loop_break_continue st2)
              st2.meta "Sequences of loops are not supported yet";
            (replace_breaks_with st1 st2).content
        | _ -> super#visit_Sequence env st1 st2
    end
  in

  (* Map  *)
  let body =
    match f.body with
    | Some body -> Some { body with body = obj#visit_statement () body.body }
    | None -> None
  in
  let f = { f with body } in
  log#ldebug
    (lazy
      ("Before/after [remove_loop_breaks]:\n"
      ^ Print.Crate.crate_fun_decl_to_string crate f0
      ^ "\n\n"
      ^ Print.Crate.crate_fun_decl_to_string crate f
      ^ "\n"));
  f

(** Remove the use of shallow borrows from a function.

    In theory, this allows the code to do more things than what Rust allows,
    and in particular it would allow to modify the variant of an enumeration
    in a guard, while matching over this enumeration.

    In practice, this is not a soundness issue.

    **Soundness**:
    ==============
    For instance, let's consider the following Rust code:
    {[
      match ls : &mut List<u32> {
        Nil => return None,
        Cons(hd, tl) if *hd > 0 => return Some(hd),
        Cons(hd, tl) => ...,
      }
    ]}

    The Rust compiler enforces the fact that the guard doesn't modify the
    variant of [ls]. It does so by compiling to (approximately) the following
    MIR code:
    {[
      let d = discriminant( *ls);
      switch d {
        0 => ... // Nil case
        1 => { // Cons case
          // Introduce hd and tl
          hd := &mut ( *ls as Cons).0;
          tl := &mut ( *ls as Cons).1;

          // Evaluate the guard
          tmp := &shallow *ls; // Create a shallow borrow of ls
          b := *hd > 0;
          fake_read(tmp); // Make sure the shallow borrow lives until the end of the guard

          // We evaluated the guard: go to the proper branch
          if b then {
            ... // First Cons branch
          }
          else {
            ... // Second Cons branch
          }
        }
      }
    ]}

    Shallow borrows are a bit like shared borrows but with the following
    difference:
    - they do forbid modifying the value directly below the loan
    - but they allow modifying a strict subvalue
    For instance, above, for as long as [tmp] lives:
    - we can't change the variant of [*ls]
    - but we can update [hd] and [tl]

    On our side, we have to pay attention to two things:
    - Removing shallow borrows don't modify the behavior of the program.
      In practice, adding shallow borrows can lead to a MIR program being
      rejected, but it doesn't change this program's behavior.

      Regarding this, there is something important. At the top-level AST,
      if the guard modifies the variant (say, to [Nil]) and evaluates to [false],
      then we go to the second [Cons] branch, which doesn't really make sense
      (though it is not a soundness issue - for soundness, see next point).

      At the level of MIR, as the match has been desugared, there is no issue
      in modifying the variant of the scrutinee.

    - We have to make sure the evaluation in sound. In particular, modifying
      the variant of [*ls] should invalidate [hd] and [tl]. This is important
      for the Rust compiler to enforce this on its side. In the case of LLBC,
      we don't need additional constraints because modifying [*ls] will
      indeed invalidate [hd] and [tl].

      More specifically, at the beginning of the [Cons] branch and just after
      we introduced [hd] and [tl] we have the following environment:
      {[
        ... // l0 comes from somewhere - we omit the corresponding loan
        ls -> MB l0 (Cons (ML l1) (ML l2))
        hd -> MB l1 s1
        tl -> MB l2 s2
      ]}

      If we evaluate: [*ls := Nil], we get:
      {[
        ... // l0 comes from somewhere - we omit the corresponding loan
        ls -> MB l0 Nil
        hd -> ⊥ // invalidated
        tl -> ⊥ // invalidated
      ]}

    **Implementation**:
    ===================
    The pass is implemented as follows:
    - we look for all the variables which appear in pattern of the following
      shape and remove them:
      {[
        let x = &shallow ...;
        ...
      ]}
    - whenever we find such a variable [x], we remove all the subsequent
      occurrences of [fake_read(x)].

    We then check that [x] completely disappeared from the function body (for
    sanity).
 *)
let remove_shallow_borrows (crate : crate) (f : fun_decl) : fun_decl =
  let f0 = f in
  let filter_in_body (body : statement) : statement =
    let filtered = ref VarId.Set.empty in

    let filter_visitor =
      object
        inherit [_] map_statement as super

        method! visit_Assign env p rv =
          match (p.projection, rv) with
          | [], RvRef (_, BShallow) ->
              (* Filter *)
              filtered := VarId.Set.add p.var_id !filtered;
              Nop
          | _ ->
              (* Don't filter *)
              super#visit_Assign env p rv

        method! visit_FakeRead env p =
          if p.projection = [] && VarId.Set.mem p.var_id !filtered then
            (* Filter *)
            Nop
          else super#visit_FakeRead env p
      end
    in

    (* Filter the variables *)
    let body = filter_visitor#visit_statement () body in

    (* Check that the filtered variables completely disappeared from the body *)
    let check_visitor =
      object
        inherit [_] iter_statement as super

        (* Remember the span of the statement we enter *)
        method! visit_statement _ st = super#visit_statement st.meta st

        method! visit_var_id meta id =
          cassert __FILE__ __LINE__
            (not (VarId.Set.mem id !filtered))
            meta
            "Filtered variables should have completely disappeared from the \
             body"
      end
    in
    check_visitor#visit_statement body.meta body;

    (* Return the updated body *)
    body
  in

  let body =
    match f.body with
    | None -> None
    | Some body -> Some { body with body = filter_in_body body.body }
  in
  let f = { f with body } in
  log#ldebug
    (lazy
      ("Before/after [remove_shallow_borrows]:\n"
      ^ Print.Crate.crate_fun_decl_to_string crate f0
      ^ "\n\n"
      ^ Print.Crate.crate_fun_decl_to_string crate f
      ^ "\n"));
  f

let apply_passes (crate : crate) : crate =
  let passes = [ remove_loop_breaks crate; remove_shallow_borrows crate ] in
  (* Attempt to apply a pass: if it fails we replace the body by [None] *)
  let apply_pass (pass : fun_decl -> fun_decl) (f : fun_decl) =
    try pass f
    with CFailure (_, _) ->
      (* The error was already registered, we don't need to register it twice.
         However, we replace the body of the function, and save an error to
         report to the user the fact that we will ignore the function body *)
      let fmt = Print.Crate.crate_to_fmt_env crate in
      let name = Print.name_to_string fmt f.name in
      save_error __FILE__ __LINE__ (Some f.meta)
        ("Ignoring the body of '" ^ name ^ "' because of previous error");
      { f with body = None }
  in
  let fun_decls =
    List.fold_left
      (fun fl pass -> FunDeclId.Map.map (apply_pass pass) fl)
      crate.fun_decls passes
  in
  let crate = { crate with fun_decls } in
  log#ldebug
    (lazy ("After pre-passes:\n" ^ Print.Crate.crate_to_string crate ^ "\n"));
  crate