diff options
Diffstat (limited to 'tests/hashmap')
-rw-r--r-- | tests/hashmap/Hashmap.Funs.fst | 23 | ||||
-rw-r--r-- | tests/hashmap/Hashmap.Properties.fst | 189 | ||||
-rw-r--r-- | tests/hashmap/Makefile | 48 | ||||
-rw-r--r-- | tests/hashmap/Primitives.fst | 3 |
4 files changed, 65 insertions, 198 deletions
diff --git a/tests/hashmap/Hashmap.Funs.fst b/tests/hashmap/Hashmap.Funs.fst index 83c245fb..921ed142 100644 --- a/tests/hashmap/Hashmap.Funs.fst +++ b/tests/hashmap/Hashmap.Funs.fst @@ -188,6 +188,10 @@ let hash_map_insert_no_resize_fwd_back end end +(** [core::num::u32::{8}::MAX] *) +let core_num_u32_max_body : result u32 = Return 4294967295 +let core_num_u32_max_c : u32 = eval_global core_num_u32_max_body + (** [hashmap::HashMap::{0}::move_elements_from_list] *) let rec hash_map_move_elements_from_list_fwd_back (t : Type0) (ntable : hash_map_t t) (ls : list_t t) : @@ -244,23 +248,24 @@ let rec hash_map_move_elements_fwd_back (** [hashmap::HashMap::{0}::try_resize] *) let hash_map_try_resize_fwd_back (t : Type0) (self : hash_map_t t) : result (hash_map_t t) = - begin match scalar_cast U32 Usize 4294967295 with + let i = core_num_u32_max_c in + begin match scalar_cast U32 Usize i with | Fail -> Fail | Return max_usize -> let capacity = vec_len (list_t t) self.hash_map_slots in begin match usize_div max_usize 2 with | Fail -> Fail | Return n1 -> - let (i, i0) = self.hash_map_max_load_factor in - begin match usize_div n1 i with + let (i0, i1) = self.hash_map_max_load_factor in + begin match usize_div n1 i0 with | Fail -> Fail - | Return i1 -> - if capacity <= i1 + | Return i2 -> + if capacity <= i2 then begin match usize_mul capacity 2 with | Fail -> Fail - | Return i2 -> - begin match hash_map_new_with_capacity_fwd t i2 i i0 with + | Return i3 -> + begin match hash_map_new_with_capacity_fwd t i3 i0 i1 with | Fail -> Fail | Return ntable -> begin match @@ -268,13 +273,13 @@ let hash_map_try_resize_fwd_back with | Fail -> Fail | Return (ntable0, _) -> - Return (Mkhash_map_t self.hash_map_num_entries (i, i0) + Return (Mkhash_map_t self.hash_map_num_entries (i0, i1) ntable0.hash_map_max_load ntable0.hash_map_slots) end end end else - Return (Mkhash_map_t self.hash_map_num_entries (i, i0) + Return (Mkhash_map_t self.hash_map_num_entries (i0, i1) self.hash_map_max_load self.hash_map_slots) end end diff --git a/tests/hashmap/Hashmap.Properties.fst b/tests/hashmap/Hashmap.Properties.fst index 12c47d1f..21a46c73 100644 --- a/tests/hashmap/Hashmap.Properties.fst +++ b/tests/hashmap/Hashmap.Properties.fst @@ -11,195 +11,6 @@ open Hashmap.Funs let _align_fsti = () -/// Issues encountered with F*: -/// =========================== -/// -/// The proofs actually caused a lot more trouble than expected, because of the -/// below points. All those are problems I already encountered in the past, but: -/// -/// - the fact that I spent 9 months mostly focusing on Aeneas made me forget them -/// a bit -/// - they seem exacerbated by the fact that they really matter when doing -/// functional correctness proofs, while Aeneas allows me to focus on the -/// functional behaviour of my programs. -/// -/// As a simple example, when I implemented linked lists (with loops) in Low* -/// for Noise*, most of the work consisted in making the Low* proofs work -/// (which was painful). -/// -/// There was a bit of functional reasoning (for which I already encountered the -/// below issues), but it was pretty simple and shadowed by the memory management -/// part. In the current situation, as we got rid of the memory management annoyance, -/// we could move on to the more the complex hash maps where the functional correctness -/// proofs *actually* require some work, making extremely obvious the problems F* has -/// when dealing with this kind of proofs. -/// -/// Here, I would like to emphasize the fact that if hash maps *do* have interesting -/// functional properties to study, I don't believe those properties are *intrinsically* -/// complex. In particular, I am very eager to try to do the same proofs in Coq or -/// HOL4, which I believe are more suited to this kind of proofs, and see how things go. -/// I'm aware that those provers also suffer from drawbacks, but I believe those are -/// less severe than F* in the present case. -/// -/// The problems I encountered (once again, all this is well known): -/// -/// - we are blind when doing the proofs. After a very intensive use of F* I got -/// used to it meaning I *can* do proofs in F*, but it still takes me a tremendous -/// amout of energy to visualize the context in my head and, for instance, -/// properly instantiate the lemmas or insert the necessary assertions in the code. -/// I actually often write assertions that I assume just to *check* that those -/// assertions make the proofs pass and are thus indeed the ones I want to prove, -/// which is something very specific to working with F*. -/// -/// About the fact that we are blind: see [hash_map_try_resize_fwd_back_lem_refin] -/// -/// - the fact that we don't reason with tactics but with the SMT solver with an -/// "intrinsic" style of proofs makes it a bit awkward to reason about pure -/// functions in a modular manner, because every proof requires to basically -/// copy-paste the function we are studying. As a consequence, this file is -/// very verbose (look at the number of lines of code...). -/// -/// - F* is extremely bad at reasoning with quantifiers, which is made worse by -/// the fact we are blind when making proofs. This forced me to be extremely -/// careful about the way I wrote the specs/invariants (by writing "functional" -/// specs and invariants, mostly, so as not to manipulate quantifiers). -/// -/// In particular, I had to cut the proofs into many steps just for this reason, -/// while if I had been able to properly use quantifiers (I tried: in many -/// situations I manage to massage F* to make it work, but in the below proofs -/// it was horrific) I would have proven many results in one go. -/// -/// More specifically: the hash map has an invariant stating that all the keys -/// are pairwise disjoint. This invariant is extremely simple to write with -/// forall quantifiers and looks like the following: -/// `forall i j. i <> j ==> key_at i hm <> key_at j hm` -/// -/// If you can easily manipulate forall quantifiers, you can prove that the -/// invariant is maintained by, say, the insertion functions in one go. -/// -/// However here, because I couldn't make the quantification work (and I really -/// tried hard, because this is a very natural way of doing the proofs), I had -/// to resort to invariants written in terms of [pairwise_rel]. This is -/// extremely annoying, because then the process becomes: -/// - prove that the insertion, etc. functions refine some higher level functions -/// (that I have to introduce) -/// - prove that those higher level functions preserve the invariants -/// -/// All this results in a huge amount of intermediary lemmas and definitions... -/// Of course, I'm totally fine with introducing refinements steps when the -/// proofs are *actually* intrinsically complex, but here we are studying hash -/// maps, so come on!! -/// -/// - the abundance of intermediate definitions and lemmas causes a real problem -/// because we then have to remember them, find naming conventions (otherwise -/// it is a mess) and go look for them. All in all, it takes engineering time, -/// and it can quickly cause scaling issues... -/// -/// - F* doesn't encode closures properly, the result being that it is very -/// awkward to reason about functions like [map] or [find], because we have -/// to introduce auxiliary definitions for the parameters we give to those -/// functions (if we use anonymous lambda functions, we're screwed by the -/// encoding). -/// See all the definitions like [same_key], [binding_neq], etc. which cluter -/// the file and worsen the problem mentionned in the previous point. -/// -/// - we can't prove intermediate results which require a *recursive* proof -/// inside of other proofs, meaning that whenever we need such a result we need -/// to write an intermediate lemma, which is extremely cumbersome. -/// -/// What is extremely frustrating is that in most situations, those intermediate -/// lemmas are extremely simple to prove: they would simply need 2 or 3 tactic -/// calls in Coq or HOL4, and in F* the proof is reduced to a recursive call. -/// Isolating the lemma (i.e., writing its signature), however, takes some -/// non-negligible time, which is made worse by the fact that, once again, -/// we don't have proof contexts to stare at which would help figure out -/// how to write such lemmas. -/// -/// Simple example: see [for_all_binding_neq_find_lem]. This lemma states that: -/// "if a key is not in a map, then looking up this key returns None" (and those -/// properties are stated in two different styles, hence the need for a proof). -/// This lemma is used in *exactly* one place, and simply needs a recursive call. -/// Stating the lemma took a lot more time (and place) than proving it. -/// -/// - more generally, it can be difficult to figure out which intermediate results -/// to prove. In an interactive theorem prover based on tactics, it often happens -/// that we start proving the theorem we target, then get stuck on a proof obligation -/// for which we realize we need to prove an intermediate result. -/// -/// This process is a lot more difficult in F*, and I have to spend a lot of energy -/// figuring out what I *might* need in the future. While this is probably a good -/// habit, there are many situations where it is really a constraint: I'm often -/// reluctant before starting a new proof in F*, because I anticipate on this very -/// annoying loop: try to prove something, get an unknown assertion failed error, -/// insert a lot of assertions or think *really* deeply to figure out what might -/// have happened, etc. All this seems a lot more natural when working with tactics. -/// -/// Simple example: see [slots_t_inv_implies_slots_s_inv]. This lemma is super -/// simple and was probably not required (it is proven with `()`). But I often feel -/// forced to anticipate on problems, otherwise proofs become too painful. -/// -/// - the proofs often fail or succeed for extremely unpredictable reasons, and are -/// extremely hard to debug. -/// -/// 1. See the comments for [hash_map_move_elements_fwd_back_lem_refin], which -/// describe the various breakages I encountered, and the different attempts I -/// made to fix them. As explained in those comments, the frustrating part is that -/// the proof is a very simple refinement step: this is not the kind of place where -/// I expected to spend time. -/// -/// Also, now that I know why it was brittle in the first place, I don't understand -/// why it worked at some point. One big issue is that when trying to figure out -/// why F* (or rather Z3) fails (for instance when playing with Z3's parameters), we -/// are constantly shooting in the dark. -/// -/// 2. See [hash_map_is_assoc_list] and [hash_map_move_elements_fwd_back_lem]. -/// -/// In particular, [hash_map_move_elements_fwd_back_lem] was very painful, with -/// assertions directly given by some postconditions which failed for no reasons, -/// or "unknown assertion failed" which forced us to manually insert -/// the postconditions given by the lemmas we called (resulting in a verbose -/// proof)... -/// -/// 4. As usual, the unstable arithmetic proofs are a lot of fun. We have a few -/// of them because we prove that the table is never over-loaded (it resizes itself -/// in order to respect the max load factor). See [new_max_load_lem] for instance. -/// -/// Finally: remember (again) that we are in a pure setting. Imagine having to -/// deal with Low*/separation logic at the same time. -/// -/// - debugging a proof can be difficult, especially when Z3 simply answers with -/// "Unknown assertion failed". Rolling admits work reasonably well, though time -/// consuming, but they cause trouble when the failing proof obligation is in the -/// postcondition of the function: in this situation we need to copy-paste the -/// postcondition in order to be able to do the rolling admit. As we may need to -/// rename some variables, this implies copying the post, instantiating it (by hand), -/// checking that it is correct (by assuming it and making sure the proofs pass), -/// then doing the rolling admit, assertion by assertion. This is tedious and, -/// combined with F*'s answer time, very time consuming (and boring!). -/// -/// See [hash_map_insert_fwd_back_lem] for instance. -/// -/// As a sub-issue, I often encountered in my experience with F* the problem of -/// failing to prove the equality between two records, in which case F* just -/// tells you that Z3 was not able to prove the *whole* equality, but doesn't -/// give you information about the precise fields. In a prover with tactics -/// you immediately see which fields is problematic, because you get stuck with -/// a goal like: -/// ``` -/// Cons x y z == Cons x' y z -/// ``` -/// -/// In F* you have to manually expand the equality to a conjunction of field -/// equalities. See [hash_map_try_resize_fwd_back_lem_refin] for an illustration. -/// -/// - overall, there are many things we have to do to debug the failing proofs -/// which are very tedious, repetitive, manual and boring (which is kind of -/// ironic for computer scientists), are specific to F* and require *writing -/// a lot*. For instance, taking items from the above points: -/// - inserting assertions -/// - copy-pasting pre/postconditions to apply a rolling admit technique -/// - expanding a record equality to a conjunction of field equalities - /// The proofs: /// =========== /// diff --git a/tests/hashmap/Makefile b/tests/hashmap/Makefile new file mode 100644 index 00000000..ea838d2d --- /dev/null +++ b/tests/hashmap/Makefile @@ -0,0 +1,48 @@ +INCLUDE_DIRS = . + +FSTAR_INCLUDES = $(addprefix --include ,$(INCLUDE_DIRS)) + +FSTAR_HINTS ?= --use_hints --use_hint_hashes --record_hints + +FSTAR_OPTIONS = $(FSTAR_HINTS) \ + --odir obj --cache_checked_modules $(FSTAR_INCLUDES) --cmi \ + --warn_error '+241@247+285-274' \ + --cache_dir obj + +FSTAR_NO_FLAGS = fstar.exe + +FSTAR = $(FSTAR_NO_FLAGS) $(FSTAR_OPTIONS) + +# The F* roots are used to compute the dependency graph, and generate the .depend file +FSTAR_ROOTS ?= $(wildcard *.fst *.fsti) + +# This is the right way to ensure the .depend file always gets re-built. +ifeq (,$(filter %-in,$(MAKECMDGOALS))) +ifndef NODEPEND +ifndef MAKE_RESTARTS +.depend: .FORCE + $(FSTAR_NO_FLAGS) --dep full $(notdir $(FSTAR_ROOTS)) > $@ + +.PHONY: .FORCE +.FORCE: +endif +endif + +include .depend +endif + +# For the interactive mode +%.fst %.fsti: + $(FSTAR) $@ + +# Generete the .checked files in bash mode +%.checked: + $(FSTAR) $(FSTAR_FLAGS) $* && \ + touch -c $* + +# Build all the files +all: $(ALL_CHECKED_FILES) + +.PHONY: clean +clean: + rm -f obj/* diff --git a/tests/hashmap/Primitives.fst b/tests/hashmap/Primitives.fst index fe351f3a..b3da25c2 100644 --- a/tests/hashmap/Primitives.fst +++ b/tests/hashmap/Primitives.fst @@ -34,6 +34,9 @@ let bind (#a #b : Type0) (m : result a) (f : a -> result b) : result b = // Monadic assert(...) let massert (b:bool) : result unit = if b then Return () else Fail +// Unwrap a successful result by normalisation (used for globals). +let eval_global (#a : Type0) (x : result a{Return? (normalize_term x)}) : a = Return?.v x + (*** Misc *) type char = FStar.Char.char type string = string |