From c2850c42a7df38f777d1a713dbcf528e57f46c7b Mon Sep 17 00:00:00 2001 From: Son Ho Date: Sat, 12 Feb 2022 17:50:09 +0100 Subject: Make progress on the move_elements proof and add some comments --- tests/hashmap/Hashmap.Properties.fst | 201 +++++++++++++++++++++++++++++++++-- 1 file changed, 190 insertions(+), 11 deletions(-) (limited to 'tests/hashmap/Hashmap.Properties.fst') diff --git a/tests/hashmap/Hashmap.Properties.fst b/tests/hashmap/Hashmap.Properties.fst index 89401c02..2c40f417 100644 --- a/tests/hashmap/Hashmap.Properties.fst +++ b/tests/hashmap/Hashmap.Properties.fst @@ -13,25 +13,51 @@ module InteractiveHelpers = FStar.InteractiveHelpers #set-options "--z3rlimit 50 --fuel 0 --ifuel 1" /// The proofs actually caused a lot more trouble than expected, because: +/// /// - we are blind when doing the proofs. After a very intensive use of F* I got -/// used to it meaning I can *do* proofs in F*, but it still takes me a tremendous +/// used to it meaning I *can* do proofs in F*, but it still takes me a tremendous /// amout of energy to visualize the context in my head and, for instance, /// properly instantiate the lemmas or insert the necessary assertions in the code. /// I actually often write assertions that I assume just to *check* that those /// assertions make the proofs pass and are thus indeed the ones I want to prove, /// which is something very specific to working with F*. +/// /// - F* is extremely bad at reasoning with quantifiers, which is made worse by /// the fact we are blind when making proofs. This forced me to be extremely /// careful about the way I wrote the specs/invariants (by writing "functional" -/// specs and invariants, mostly, so as not to manipulate quantifiers) +/// specs and invariants, mostly, so as not to manipulate quantifiers), ending +/// up proofs which are not written in the most natural and efficient manner. +/// In particular, I had to cut the proofs into many steps just for this reason, +/// while if I had been able to properly use quantifiers (I tried: in many +/// situations I manage to massage F* to make it work, but in the below proofs +/// it was horrific) I would have proven many results in one go. +/// /// - F* doesn't encode closures properly, the result being that it is very /// awkward to reason about functions like `map` or `find`, because we have /// to introduce auxiliary definitions for the parameters we give to those /// functions (if we use anonymous lambda functions, we're screwed by the /// encoding). -/// - we can't easily prove intermediate results which require a recursive proofs +/// +/// - we can't prove intermediate results which require a recursive proofs /// inside of other proofs, meaning that whenever we need such a result we need /// to write an intermediate lemma, which is extremely cumbersome. +/// What is extremely frustrating is that in most situations, those intermediate +/// lemmas are extremely simple to prove: they would simply need 2 or 3 tactic +/// calls in Coq or HOL4, and in F* the proof is reduced to a recursive call. +/// Isolating the lemma (i.e., writing its signature), however, takes some +/// non-negligible time, which is made worse by the fact that, once again, +/// we don't have proof contexts to stare at which would help figuring out +/// how to write such lemmas. +/// +/// - the proofs often fail or succeed for extremely unpredictable reasons, and are +/// extremely hard to debug. See [hash_map_slots_s_nes] below: it is simply a +/// definition with a refinment. For some reason, at some places if we use this +/// type abbreviation some proofs break, meaning we have to write the unfolded +/// version. +/// I guess it has something to do with the fact that F*'s type inference yields +/// a different result, in combination with the poor support for subtyping. The +/// problem is that it is extremely hard to debug, and I definitely don't want +/// to waste time with this kind of boring, tedious proofs. /// /// The result is that I had to poor a lot more thinking than I expected in the below /// proofs, in particular to: @@ -39,8 +65,8 @@ module InteractiveHelpers = FStar.InteractiveHelpers /// - subdivide all the theorems into very small, modular lemmas that I could reason /// about independently, in a small context, and with quick responses from F*. /// -/// Note that in a theorem prover with tactics, most of the below proof would have -/// been extremely straightforward. +/// Finally, I strongly that in a theorem prover with tactics, most of the below proof +/// would have been extremely straightforward. (*** List lemmas *) @@ -359,6 +385,9 @@ let slot_find (#t : Type0) (k : key) (slot : list (binding t)) : option t = | None -> None | Some (_, v) -> Some v +let assoc_list_find (#t : Type0) (k : key) (slot : assoc_list t) : option t = + slot_find k slot + let slot_t_find_s (#t : Type0) (k : key) (slot : list_t t) : option t = match find (same_key k) (list_t_v slot) with | None -> None @@ -367,6 +396,7 @@ let slot_t_find_s (#t : Type0) (k : key) (slot : list_t t) : option t = // This is a simpler version of the "find" function, which captures the essence // of what happens and operates on [hash_map_slots_s]. // TODO: useful? +// TODO: at some point I used hash_map_slots_s_nes and it broke proofs... let hash_map_slots_s_find (#t : Type0) (hm : hash_map_slots_s t{length hm <= usize_max /\ length hm > 0}) (k : key) : option t = @@ -374,6 +404,7 @@ let hash_map_slots_s_find let slot = index hm i in slot_find k slot +// TODO: at some point I used hash_map_slots_s_nes and it broke proofs... let hash_map_slots_s_len (#t : Type0) (hm : hash_map_slots_s t{length hm <= usize_max /\ length hm > 0}) : nat = @@ -432,7 +463,7 @@ type slot_t_inv_not_same_keys_f (#t : Type0) (i : usize) (slot : list_t t) = /// Invariants for the slots let slot_s_inv -(#t : Type0) (len : usize{len > 0}) (i : usize) (slot : list (binding t)) : bool = + (#t : Type0) (len : usize{len > 0}) (i : usize) (slot : list (binding t)) : bool = // All the bindings are in the proper slot for_all (same_hash_mod_key len i) slot && // All the keys are pairwise distinct @@ -1226,6 +1257,7 @@ let hash_map_insert_in_list_back_lem t len key value ls = /// We work on [hash_map_slots_s] (we use a higher-level view of the hash-map, but /// not too high). +// TODO: at some point I used hash_map_slots_s_nes and it broke proofs...x let hash_map_insert_no_resize_s (#t : Type0) (hm : hash_map_slots_s t{length hm <= usize_max /\ length hm > 0}) (key : usize) (value : t) : @@ -1260,7 +1292,7 @@ val hash_map_insert_no_resize_fwd_back_lem_s | Return hm, Return hm_v -> hash_map_t_inv hm /\ hash_map_t_slots_v hm == hm_v /\ - hash_map_slots_s_len hm_v = hash_map_t_len_s hm + hash_map_slots_s_len hm_v == hash_map_t_len_s hm | _ -> False end)) @@ -1334,6 +1366,46 @@ let hash_map_insert_no_resize_fwd_back_lem_s t self key value = end end +(**** insert_no_resize: invariants *) + +val hash_map_insert_no_resize_s_lem + (#t : Type0) (hm : hash_map_slots_s_nes t) + (key : usize) (value : t) : + Lemma + (requires ( + hash_map_slots_s_inv hm)) + (ensures ( + match hash_map_insert_no_resize_s hm key value with + | Fail -> + // Can fail only if we need to create a new binding in + // an already saturated map + hash_map_slots_s_len hm = usize_max /\ + None? (hash_map_slots_s_find hm key) + | Return hm' -> + // The invariant is preserved + hash_map_slots_s_inv hm' /\ + // [key] maps to [value] + hash_map_slots_s_find hm' key == Some value /\ + // The other bindings are preserved + (forall k'. k' <> key ==> hash_map_slots_s_find hm' k' == hash_map_slots_s_find hm k') /\ + // The length is incremented, iff we inserted a new key + (match hash_map_slots_s_find hm key with + | None -> hash_map_slots_s_len hm' = hash_map_slots_s_len hm + 1 + | Some _ -> hash_map_slots_s_len hm' = hash_map_slots_s_len hm))) + +let hash_map_insert_no_resize_s_lem #t hm key value = + let num_entries = length (flatten hm) in + if None? (hash_map_slots_s_find hm key) && num_entries = usize_max then () + else + begin + let len = length hm in + let i = hash_mod_key key len in + let slot = index hm i in + hash_map_insert_in_list_s_lem t len key value slot; + let slot' = hash_map_insert_in_list_s key value slot in + length_flatten_update hm i slot' + end + (**** find after insert *) /// Lemmas about what happens if we call [find] after an insertion @@ -1500,6 +1572,11 @@ val hash_map_move_elements_fwd_back_lem | _ -> False)) (decreases (length slots - i)) +// Rk.: if above we update some definitions to use [hash_map_slots_s_nes] +// instead of writing refinements explicitly, it makes the proof breaks. +// I guess it is because F* does a different type inference, which combines +// poorly with its poor support for subtyping. Problem is: I have no idea +// where this happens and it is extremely hard to debug. #restart-solver #push-options "--z3rlimit 200 --fuel 1" let rec hash_map_move_elements_fwd_back_lem t ntable slots i = @@ -1713,8 +1790,8 @@ let rec flatten_i_same_suffix #a l0 l1 i = /// Refinement lemma: /// [hash_map_move_elements_s] refines [hash_map_move_elements_s_flat] -val hash_map_move_elements_s_flat_lem - (#t : Type0) (hm : hash_map_slots_s t{is_pos_usize (length hm)}) +val hash_map_move_elements_s_lem_refin_flat + (#t : Type0) (hm : hash_map_slots_s_nes t) (slots : slots_s t) (i : nat{i <= length slots /\ length slots <= usize_max}) : Lemma @@ -1728,7 +1805,7 @@ val hash_map_move_elements_s_flat_lem (decreases (length slots - i)) #push-options "--fuel 1" -let rec hash_map_move_elements_s_flat_lem #t hm slots i = +let rec hash_map_move_elements_s_lem_refin_flat #t hm slots i = let len = length slots in if i < len then begin @@ -1742,13 +1819,115 @@ let rec hash_map_move_elements_s_flat_lem #t hm slots i = | Return hm' -> let slots' = list_update slots i [] in flatten_i_same_suffix slots slots' (i+1); - hash_map_move_elements_s_flat_lem hm' slots' (i+1); + hash_map_move_elements_s_lem_refin_flat hm' slots' (i+1); hash_map_move_elements_s_flat_append_lem hm slot (flatten_i slots' (i+1)); () end else () #pop-options +let assoc_list_inv (#t : Type0) (al : assoc_list t) : Type0 = + // All the keys are pairwise distinct + pairwise_rel binding_neq al + +let disjoint_hm_al_on_key + (#t : Type0) (hm : hash_map_slots_s_nes t) (al : assoc_list t) (k : key) : Type0 = + match hash_map_slots_s_find hm k, find (same_key k) al with + | Some _, None + | None, Some _ + | None, None -> True + | Some _, Some _ -> False + +/// Playing a dangerous game here: using forall quantifiers +let disjoint_hm_al (#t : Type0) (hm : hash_map_slots_s_nes t) (al : assoc_list t) : Type0 = + forall (k:key). disjoint_hm_al_on_key hm al k + +let find_in_union_hm_al + (#t : Type0) (hm : hash_map_slots_s_nes t) (al : assoc_list t) (k : key) : + option t = + match hash_map_slots_s_find hm k with + | Some b -> Some b + | None -> assoc_list_find k al + +val hash_map_move_elements_s_flat_lem + (#t : Type0) (hm : hash_map_slots_s_nes t) (al : assoc_list t) : + Lemma + (requires ( + // Invariants + hash_map_slots_s_inv hm /\ + assoc_list_inv al /\ + // The two are disjoint + disjoint_hm_al hm al /\ + // We can add all the elements to the hashmap + hash_map_slots_s_len hm + length al <= usize_max)) + (ensures ( + match hash_map_move_elements_s_flat hm al with + | Fail -> False // We can't fail + | Return hm' -> + // The invariant is preserved + hash_map_slots_s_inv hm' /\ + // The new hash map is the union of the two maps + (forall (k:key). hash_map_slots_s_find hm' k == find_in_union_hm_al hm al k))) + (decreases al) + +// TODO: here +#push-options "--z3rlimit 200 --fuel 1" +let rec hash_map_move_elements_s_flat_lem #t hm al = + match al with + | [] -> () + | (k,v) :: al' -> + hash_map_insert_no_resize_s_lem hm k v; + match hash_map_insert_no_resize_s hm k v with + | Fail -> () + | Return hm' -> + assert(hash_map_slots_s_inv hm'); + assert(assoc_list_inv al'); + assume(disjoint_hm_al hm' al'); + assume(hash_map_slots_s_len hm' + length al' <= usize_max); + hash_map_move_elements_s_flat_lem hm' al'; + admit() +#pop-options + +val hash_map_insert_no_resize_s_lem + (#t : Type0) (hm : hash_map_slots_s_nes t) + (key : usize) (value : t) : + Lemma + (requires ( + hash_map_slots_s_inv hm)) + (ensures ( + match hash_map_insert_no_resize_s hm key value with + | Fail -> + // Can fail only if we need to create a new binding in + // an already saturated map + hash_map_slots_s_len hm = usize_max /\ + None? (hash_map_slots_s_find hm key) + | Return hm' -> + // The invariant is preserved + hash_map_slots_s_inv hm' /\ + // [key] maps to [value] + hash_map_slots_s_find hm' key == Some value /\ + // The other bindings are preserved + (forall k'. k' <> key ==> hash_map_slots_s_find hm' k' == hash_map_slots_s_find hm k') /\ + // The length is incremented, iff we inserted a new key + (match hash_map_slots_s_find hm key with + | None -> hash_map_slots_s_len hm' = hash_map_slots_s_len hm + 1 + | Some _ -> hash_map_slots_s_len hm' = hash_map_slots_s_len hm))) + +(* +let rec hash_map_move_elements_s_flat + (#t : Type0) (ntable : hash_map_slots_s_nes t) + (slots : hash_map_s t) : + Tot (result_hash_map_slots_s_nes t) + (decreases slots) = + match slots with + | [] -> Return ntable + | (k,v) :: slots' -> + match hash_map_insert_no_resize_s ntable k v with + | Fail -> Fail + | Return ntable' -> + hash_map_move_elements_s_flat ntable' slots' + + (* /// [nhm]: the new hash map (in which we insert elements) -- cgit v1.2.3