From eeda2eb5df9d9ef8bb7faa6e2f4552555d2d7e39 Mon Sep 17 00:00:00 2001 From: Son Ho Date: Sat, 12 Feb 2022 18:51:40 +0100 Subject: Add comments --- tests/hashmap/Hashmap.Properties.fst | 134 +++++++++++++++++++++++++++++------ 1 file changed, 113 insertions(+), 21 deletions(-) (limited to 'tests/hashmap/Hashmap.Properties.fst') diff --git a/tests/hashmap/Hashmap.Properties.fst b/tests/hashmap/Hashmap.Properties.fst index bec35dd8..5f403c61 100644 --- a/tests/hashmap/Hashmap.Properties.fst +++ b/tests/hashmap/Hashmap.Properties.fst @@ -12,7 +12,34 @@ module InteractiveHelpers = FStar.InteractiveHelpers #set-options "--z3rlimit 50 --fuel 0 --ifuel 1" -/// The proofs actually caused a lot more trouble than expected, because: +/// The proofs actually caused a lot more trouble than expected, because of the +/// below points. All those are problems I already encountered in the past, but: +/// +/// - the fact that I spent 9 months mostly focusing on Aeneas made me forget them +/// a bit +/// - they seem exacerbated by the fact that they really matter when doing +/// functional correctness proofs, while Aeneas allows me to focus on the +/// functional behaviour of my programs. +/// +/// As a simple example, when I implemented linked lists (with loops) in Low* +/// for Noise*, most of the work consisted in making the Low* proofs work +/// (which was painful). +/// +/// There was a bit of functional reasoning (for which I already encountered the +/// below issues), but it was pretty simple and shadowed by the memory management +/// part. In the current situation, as we got rid of the memory management annoyance, +/// we could move on to the more the complex hash maps where the functional correctness +/// proofs *actually* require some work, making extremely obvious the problems F* has +/// when dealing with this kind of proofs. +/// +/// Here, I would like to emphasize the fact that if hash maps *do* have interesting +/// functional properties to study, I don't believe those properties are *intrinsically* +/// complex. In particular, I am very eager to try to do the same proofs in Coq or +/// HOL4, which I believe are more suited to this kind of proofs, and see how things go. +/// I'm aware that those provers also suffer from drawbacks, but I believe those are +/// less severe than F* in the present case. +/// +/// The problems I encountered (once again, all this is well known): /// /// - we are blind when doing the proofs. After a very intensive use of F* I got /// used to it meaning I *can* do proofs in F*, but it still takes me a tremendous @@ -25,22 +52,51 @@ module InteractiveHelpers = FStar.InteractiveHelpers /// - F* is extremely bad at reasoning with quantifiers, which is made worse by /// the fact we are blind when making proofs. This forced me to be extremely /// careful about the way I wrote the specs/invariants (by writing "functional" -/// specs and invariants, mostly, so as not to manipulate quantifiers), ending -/// up proofs which are not written in the most natural and efficient manner. +/// specs and invariants, mostly, so as not to manipulate quantifiers). +/// /// In particular, I had to cut the proofs into many steps just for this reason, /// while if I had been able to properly use quantifiers (I tried: in many /// situations I manage to massage F* to make it work, but in the below proofs /// it was horrific) I would have proven many results in one go. /// +/// More specifically: the hash map has an invariant stating that all the keys +/// are pairwise disjoint. This invariant is extremely simple to write with +/// forall quantifiers and looks like the following: +/// `forall i j. i <> j ==> key_at i hm <> key_at j hm` +/// +/// If you can easily manipulate forall quantifiers, you can prove that the +/// invariant is maintained by, say, the insertion functions in one go. +/// +/// However here, because I couldn't make the quantification work (and I really +/// tried hard, because this is a very natural way of doing the proofs), I had +/// to resort to invariants written in terms of [pairwise_rel]. This is +/// extremely annoying, because then the process becomes: +/// - prove that the insertion, etc. functions refine some higher level functions +/// (that I have to introduce) +/// - prove that those higher level functions preserve the invariants +/// +/// All this results in a huge amount of intermediary lemmas and definitions... +/// Of course, I'm totally fine with introducing refinements steps when the +/// proofs are *actually* intrinsically complex, but here we are studying hash +/// maps, so come on!! +/// +/// - the abundance of intermediate definitions and lemmas causes a real problem +/// because we then have to remember them, find naming conventions (otherwise +/// it is a mess) and go look for them. All in all, it takes engineering time, +/// and it can quickly cause scaling issues... +/// /// - F* doesn't encode closures properly, the result being that it is very -/// awkward to reason about functions like `map` or `find`, because we have +/// awkward to reason about functions like [map] or [find], because we have /// to introduce auxiliary definitions for the parameters we give to those /// functions (if we use anonymous lambda functions, we're screwed by the /// encoding). +/// See all the definitions like [same_key], [binding_neq], etc. which cluter +/// the file and worsen the problem mentionned in the previous point. /// -/// - we can't prove intermediate results which require a recursive proofs +/// - we can't prove intermediate results which require a *recursive* proof /// inside of other proofs, meaning that whenever we need such a result we need /// to write an intermediate lemma, which is extremely cumbersome. +/// /// What is extremely frustrating is that in most situations, those intermediate /// lemmas are extremely simple to prove: they would simply need 2 or 3 tactic /// calls in Coq or HOL4, and in F* the proof is reduced to a recursive call. @@ -48,25 +104,36 @@ module InteractiveHelpers = FStar.InteractiveHelpers /// non-negligible time, which is made worse by the fact that, once again, /// we don't have proof contexts to stare at which would help figuring out /// how to write such lemmas. +/// +/// Simple example: see [for_all_binding_neq_find_lem]. This lemma states that: +/// "if a key is not in a map, then looking up this key returns None". +/// This lemma is used in *exactly* one place, and simply needs a recursive call. +/// Stating the lemma took a lot more time (and place) than proving it. +/// +/// - more generally, it can be difficult to figure out which intermediate results +/// to prove. In an interactive theorem prover based on tactics, it often happens +/// that we start proving the theorem we target, then get stuck on a proof obligation +/// for which we realize we need to prove an intermediate result. +/// +/// This process is a lot more difficult in F*, and I have to spend a lot of energy +/// figuring out what I *might* need in the future. While this is probably a good +/// habit, there are many situations where it is really a constraint: I'm often +/// reluctant before starting a new proof in F*, because I anticipate on this very +/// annoying loop: try to prove something, get an unknown assertion failed error, +/// insert a lot of assertions or think *really* deeply to figure out what might +/// have happened, etc. All this seems a lot more natural when working with tactics. /// /// - the proofs often fail or succeed for extremely unpredictable reasons, and are -/// extremely hard to debug. See [hash_map_slots_s_nes] below: it is simply a -/// definition with a refinment. For some reason, at some places if we use this -/// type abbreviation some proofs break, meaning we have to write the unfolded -/// version. -/// I guess it has something to do with the fact that F*'s type inference yields -/// a different result, in combination with the poor support for subtyping. The -/// problem is that it is extremely hard to debug, and I definitely don't want -/// to waste time with this kind of boring, tedious proofs. +/// extremely hard to debug. /// -/// The result is that I had to poor a lot more thinking than I expected in the below -/// proofs, in particular to: -/// - write invariants and specs that I could *manage* in F* -/// - subdivide all the theorems into very small, modular lemmas that I could reason -/// about independently, in a small context, and with quick responses from F*. -/// -/// Finally, I strongly that in a theorem prover with tactics, most of the below proof -/// would have been extremely straightforward. +/// See [hash_map_slots_s_nes] below: it is simply a definition with a refinment. +/// For some reason, at some places if we use this type abbreviation some proofs +/// break, meaning we have to write the unfolded version instead. +/// +/// I guess this specific type has something to do with the fact that F*'s type +/// inference yields a different result, in combination with the poor support for +/// subtyping. The problem is that it is extremely hard to debug, and I definitely +/// don't want to waste any more time with this kind of boring, tedious proofs. (*** List lemmas *) @@ -1914,6 +1981,31 @@ let rec hash_map_move_elements_s_flat_lem #t hm al = hash_map_move_elements_s_flat_lem hm' al' #pop-options +/// We need to prove that the invariants on the "low-level" representations of +/// the hash map imply the invariants on the "high-level" representations. +val slots_t_inv_implies_slots_s_inv + (#t : Type0) (slots : slots_t t{length slots <= usize_max}) : + Lemma (requires (slots_t_inv slots)) + (ensures (slots_s_inv (slots_t_v slots))) + +let slots_t_inv_implies_slots_s_inv #t slots = + + +let slots_s_inv (#t : Type0) (slots : slots_s t{length slots <= usize_max}) : Type0 = + forall(i:nat{i < length slots}). + {:pattern index slots i} + slot_s_inv (length slots) i (index slots i) + +let slots_t_inv (#t : Type0) (slots : slots_t t{length slots <= usize_max}) : Type0 = + forall(i:nat{i < length slots}). + {:pattern index slots i} + slot_t_inv (length slots) i (index slots i) + +hash_map_t_inv +hash_map_slot_s_inv + +val slots_t_inv_implies_assoc_list_lem (#t : Type0) (hm : hash_map_t) + (* let rec hash_map_move_elements_s_flat (#t : Type0) (ntable : hash_map_slots_s_nes t) -- cgit v1.2.3