From 02b075a2f0be787860bf431e04b92cc450d9a888 Mon Sep 17 00:00:00 2001 From: Son Ho Date: Mon, 18 Sep 2023 12:29:08 +0200 Subject: Start writing a tutorial --- tests/lean/Tutorial.lean | 297 +++++++++++++++++++++++++++++++++++++++++++++++ tests/lean/lakefile.lean | 1 + 2 files changed, 298 insertions(+) create mode 100644 tests/lean/Tutorial.lean (limited to 'tests') diff --git a/tests/lean/Tutorial.lean b/tests/lean/Tutorial.lean new file mode 100644 index 00000000..2c37626e --- /dev/null +++ b/tests/lean/Tutorial.lean @@ -0,0 +1,297 @@ +/- A tutorial about using Lean to verify properties of programs generated by Aeneas -/ +import Base + +open Primitives +open Result + +namespace Tutorial + +/- # Simple Arithmetic Example -/ + +/- As a first step, we want to consider the function below which performs + simple arithmetic. There are several things to note. + -/ + +def mul2_add1 (x : U32) : Result U32 := do + let x1 ← x + x + let x2 ← x1 + (U32.ofInt 1) + ret x2 + +/- # Machine integers + Because Rust programs manipulate machine integers which occupy a fixed + size in memory, we model integers with types like [U32], which are simply + bounded integers. [U32.ofInt 1] is simply the machine integer constant 1. + + Because machine integers are bounded, arithmetic operations can fail, for instance + because of an overflow: this is the reason why the output of [mul2_add1] uses + the [Result] type. + + You can see a definition or its type by using the #print and #check commands. + It is also possible to jump to definitions (right-click + "Go to Definition" + in VS Code). + + For instance, you can see below that [U32] is defined in terms of a more generic + type [Scalar] (just move the cursor to the #print command below). + + -/ +#print U32 -- This shows the definition of [U32] + +#check mul2_add1 -- This shows the type of [mul2_add1] +#print mul2_add1 -- This show the full definition of [mul2_add1] + +/- # Syntax + We use a lightweight "do"-notation to write code which calls potentially failing + functions. In practice, all our function bodies start with a [do] keyword, + which indicates to Lean we want to use this lightweight syntax, and after + the do, instead of writing let-bindings as [let x1 := ...], we write them + as: [let x1 ← ...]. We also have lightweight notations for common operations + like the addition. + + *Remark:* in order to type the left-arrow [←] you can type: [\l]. Generally + speaking, your editor can tell you how to type the symbols you see in .lean + files. For instance in VS Code, you can simply hover your mouse over the + symbol and a pop-up window will open displaying all the information you need. + + For instance, in [let x1 ← x + x], the [x + x] expression desugars to + [Scalar.add x x] and the [let x1 ← ...] desugars to a call to [bind]. + + The definition of [bind x f] is worth investigating. It simply checks whether + [x : Result ...] successfully evaluated to some result, in which case it + calls [f] with this result, propagating the error otherwise. + -/ +#print Primitives.bind + +/- We show a desugared version of [mul2_add1] below: we remove the syntactic + sugar, and inline the definition of [bind] to make the matches over the + results explicit. + -/ +def mul2_add1_desugared (x : U32) : Result U32 := + match Scalar.add x x with + | ret x1 => -- Success case + match Scalar.add x1 (U32.ofInt 1) with + | ret x2 => ret x2 + | error => error + | error => error -- Propagating the errors + +/- Now that we have seen how [mul2_add1] is defined precisely, we can prove + simple properties about it. For instance, what about proving that it evaluates + to [2 * x + 1]? + + We advise writing specifications in a Hoare-logic style, that is with + preconditions (requirements which must be satisfied by the inputs upon + calling the function) and postconditions (properties that we know about + the output after the function call). + + In the case of [mul2_add1] we could state the theorem as follows. + -/ + +theorem mul2_add1_spec + -- The input + (x : U32) + /- The precondition (we give it the name "h" to be able to refer to it in the proof). + We simply state that [2 * x + 1] must not overflow. + + The ↑ notation (\u) is used to coerce values. Here, we coerce [x], which is + a bounded machine integer, to an unbounded mathematical integer. Note that + writing [↑ x] is the same as writing [x.val]. + -/ + (h : 2 * (↑ x) + 1 ≤ U32.max) + /- The postcondition -/ + : ∃ y, mul2_add1 x = ret y ∧ -- The call succeeds + ↑ y = 2 * (↑ x) + (1 : Int) -- The output has the expected value + := by + /- The proof -/ + -- Start by a call to the rewriting tactic to reveal the body of [mul2_add1] + rw [mul2_add1] + /- Here we use the fact that if [x + x] doesn't overflow, then the addition + succeeds and returns the value we expect, as given by the theorem [U32.add_spec]. + Doing this properly requires a few manipulations: we need to instantiate + the theorem, introduce it in the context, destruct it to introduce [x1], etc. + We automate this with the [progress] tactic: [progress with th as ⟨ x1 .. ⟩] + uses theorem [th], rename the output to [x1] and further decomposes the + postcondition of [th] (it is possible to provide more inputs to name the + assumptions introduced by the postcondition, for instance: [as ⟨ x1, h ⟩]). + + If you look at the goal after the call to [progress], you wil see there is + a new variable [x1] and an assumption stating that [↑ x1 = ↑ x + ↑ x]. + + Also, [U32.add_spec] has the precondition that the addition doesn't overflow. + In the present case, [progress] manages to prove this automatically by using + the fact that [2 * x + 1 < U32.max]. In case [progress] fails to prove a + precondition, it leaves it as a subgoal. + -/ + progress with U32.add_spec as ⟨ x1 .. ⟩ + /- We can call [progress] a second time for the second addition -/ + progress with U32.add_spec as ⟨ x2 .. ⟩ + /- We are now left with the remaining goal. We do this by calling the simplifier + then [scalar_tac], a tactic to solve linear arithmetic problems (i.e., + arithmetic problems in which there are not multiplications between two + variables): + -/ + simp at * + scalar_tac + +/- The proof above works, but it can actually be simplified a bit. In particular, + it is a bit tedious to specify that [progress] should use [U32.add_spec], while + in most situations the theorem to use is obvious by looking at the function. + + For this reason, we provide the possibility of registering theorems in a database + so that [progress] can automatically look them up. This is done by marking + theorems with custom attributes, like [pspec] below. + Theorems in the standard library like [U32.add_spec] have already been marked with such + attributes, meaning we don't need to tell [progress] to use them. + -/ +@[pspec] -- the [pspec] attribute saves the theorem in a database, for [progress] to use it +theorem mul2_add1_spec2 (x : U32) (h : 2 * (↑ x) + 1 ≤ U32.max) + : ∃ y, mul2_add1 x = ret y ∧ + ↑ y = 2 * (↑ x) + (1 : Int) + := by + rw [mul2_add1] + progress as ⟨ x1 .. ⟩ -- [progress] automatically lookups [U32.add_spec] + progress as ⟨ x2 .. ⟩ -- same + simp at *; scalar_tac + +/- Because we marked [mul2_add1_spec2] theorem with [pspec], [progress] can + now automatically look it up. For instance, below: + -/ +-- A dummy function which uses [mul2_add1] +def use_mul2_add1 (x : U32) (y : U32) : Result U32 := do + let x1 ← mul2_add1 x + x1 + y + +@[pspec] +theorem use_mul2_add1_spec (x : U32) (y : U32) (h : 2 * (↑ x) + 1 + ↑ y ≤ U32.max) : + ∃ z, use_mul2_add1 x y = ret z ∧ + ↑ z = 2 * (↑ x) + (1 : Int) + ↑ y := by + rw [use_mul2_add1] + -- Here we use [progress] on [mul2_add1] + progress as ⟨ x1 .. ⟩ + progress as ⟨ z .. ⟩ + simp at *; scalar_tac + +/- # Recursion -/ + +/- We can have a look at more complex examples, for example recursive functions. -/ + +/- A custom list type. + + Original Rust code: + ``` + pub enum CList { + CCons(T, Box>), + CNil, + } + ``` +-/ +inductive CList (T : Type) := +| CCons : T → CList T → CList T +| CNil : CList T + +-- Open the [CList] namespace, so that we can write [CCons] instead of [CList.CCons] +open CList + +/- A function accessing the ith element of a list. + + Original Rust code: + ``` + pub fn list_nth<'a, T>(l: &'a CList, i: u32) -> &'a T { + match l { + List::CCons(x, tl) => { + if i == 0 { + return x; + } else { + return list_nth(tl, i - 1); + } + } + List::CNil => { + panic!() + } + } + } + ``` + -/ +divergent def list_nth (T : Type) (l : CList T) (i : U32) : Result T := + match l with + | CCons x tl => + if i = U32.ofInt 0 + then Result.ret x + else do + let i1 ← i - (U32.ofInt 1) + list_nth T tl i1 + | CNil => Result.fail Error.panic + +/- Conversion to Lean's standard list type. + + Note that because we define the function as belonging to the namespace + [CList], we can use the notation [l.to_list] if [l] has type [CList ...]. + -/ +def CList.to_list {α : Type} (x : CList α) : List α := + match x with + | CNil => [] + | CCons hd tl => hd :: tl.to_list + +/- Let's prove that [list_nth] indeed accesses the ith element of the list. + + About the parameter [Inhabited T]: this tells us that we must have an instance of the + typeclass [Inhabited] for the type [T]. As of today we can only use [index] with + inhabited types, that is to say types which are not empty (i.e., for which it is + possible to construct a value - for instance, [Int] is inhabited because we can exhibit + the value [0: Int]). This is a technical detail. + -/ +theorem list_nth_spec {T : Type} [Inhabited T] (l : CList T) (i : U32) + -- Precondition: the index is in bounds + (h : (↑ i) < l.to_list.len) + -- Postcondition + : ∃ x, list_nth T l i = ret x ∧ + x = l.to_list.index (↑ i) + := by + -- Here we can to be careful when unfolding the body of [list_nth]: we could + -- use the [simp] tactic, but it will sometimes loop on recursive definitions. + rw [list_nth] + -- Let's simply follow the structure of the function + match l with + | CNil => + -- We can't get there: we can derive a contradiction from the precondition + -- First, let's simplify [to_list CNil] to [0] + simp [CList.to_list] at h + -- Proving we have a contrdiction: this is just linear arithmetic (note that + -- [U32] integers are unsigned, that is, they are greater than or equal to 0). + scalar_tac + | CCons hd tl => + -- Simplifying the match + simp only [] + -- Cases on i + -- The notation [hi : ...] allows us to introduce an assumption in the + -- context, to use the fact that in the branches we have [i = U32.ofInt 0] + -- and [¬ i = U32.ofInt 0] + if hi: i = U32.ofInt 0 then + -- We can finish the proof simply by using the simplier. + -- We decompose the proof into several calls on purpose, so that it is + -- easier to understand what is going on. + -- Simplify the condition and the [if then else] + simp [hi] + -- Proving the final equality + simp [CList.to_list] + else + -- The interesting branch + -- Simplify the condition and the [if then else] + simp [hi] + -- i0 := i - 1 + progress as ⟨ i1, hi1 ⟩ + -- Recursive call + simp [CList.to_list] at h + progress as ⟨ l1 ⟩ + -- Proving the postcondition + -- We need this to trigger the simplification of [index to.to_list i.val] + -- + -- Among other things, the call to [simp] below will apply the theorem + -- [List.index_nzero_cons], which has the precondition [i.val ≠ 0]. [simp] + -- can automatically use the assumptions/theorems we give it to prove + -- preconditions when applying rewriting lemmas. In the present case, + -- by giving it [*] as argument, we tell [simp] to use all the assumptions + -- to perform rewritings. In particular, it will use [i.val ≠ 0] to + -- apply [List.index_nzero_cons]. + have : i.val ≠ 0 := by scalar_tac + simp [CList.to_list, *] + +end Tutorial diff --git a/tests/lean/lakefile.lean b/tests/lean/lakefile.lean index cc63c48f..8acf6973 100644 --- a/tests/lean/lakefile.lean +++ b/tests/lean/lakefile.lean @@ -8,6 +8,7 @@ require Base from "../../backends/lean" package «tests» {} +@[default_target] lean_lib tutorial @[default_target] lean_lib betreeMain @[default_target] lean_lib constants @[default_target] lean_lib external -- cgit v1.2.3