Make progress on the hashmap properties

author: Son Ho 2023-07-25 16:26:08 +0200
committer: Son Ho 2023-07-25 16:26:08 +0200
commit: 1854c631a6a7a3f8d45ad18e05547f9d3782c3ee (patch)
tree: 270e16d5c106d00b0e18520bf89d05d1e202cdb6
parent: 876137dff361620d8ade1a4ee94fa9274df0bdc6 (diff)
8 files changed, 141 insertions, 29 deletions
diff --git a/backends/lean/Base/Arith/Base.lean b/backends/lean/Base/Arith/Base.lean
index e008f7b9..9c11ed45 100644
--- a/backends/lean/Base/Arith/Base.lean
+++ b/backends/lean/Base/Arith/Base.lean
@@ -53,4 +53,8 @@ theorem int_pos_ind (p : Int → Prop) :
     rename_i m
     cases m <;> simp_all
 
+-- We sometimes need this to make sure no natural numbers appear in the goals
+-- TODO: there is probably something more general to do
+theorem nat_zero_eq_int_zero : (0 : Nat) = (0 : Int) := by simp
+
 end Arith
diff --git a/backends/lean/Base/Arith/Int.lean b/backends/lean/Base/Arith/Int.lean
index 3415866e..bc0676d8 100644
--- a/backends/lean/Base/Arith/Int.lean
+++ b/backends/lean/Base/Arith/Int.lean
@@ -225,6 +225,8 @@ def intTac (splitGoalConjs : Bool) (extraPreprocess :  Tactic.TacticM Unit) : Ta
   -- Preprocess - wondering if we should do this before or after splitting
   -- the goal. I think before leads to a smaller proof term?
   Tactic.allGoals (intTacPreprocess extraPreprocess)
+  -- More preprocessing
+  Tactic.allGoals (Utils.simpAt [] [``nat_zero_eq_int_zero] [] .wildcard)
   -- Split the conjunctions in the goal
   if splitGoalConjs then Tactic.allGoals (Utils.repeatTac Utils.splitConjTarget)
   -- Call linarith
diff --git a/backends/lean/Base/Arith/Scalar.lean b/backends/lean/Base/Arith/Scalar.lean
index a56ea08b..6f4a8eba 100644
--- a/backends/lean/Base/Arith/Scalar.lean
+++ b/backends/lean/Base/Arith/Scalar.lean
@@ -21,7 +21,8 @@ def scalarTacExtraPreprocess : Tactic.TacticM Unit := do
                  ``I8.min, ``I16.min, ``I32.min, ``I64.min, ``I128.min,
                  ``I8.max, ``I16.max, ``I32.max, ``I64.max, ``I128.max,
                  ``U8.min, ``U16.min, ``U32.min, ``U64.min, ``U128.min,
-                 ``U8.max, ``U16.max, ``U32.max, ``U64.max, ``U128.max
+                 ``U8.max, ``U16.max, ``U32.max, ``U64.max, ``U128.max,
+                 ``Usize.min
                  ] [] [] .wildcard
 
 elab "scalar_tac_preprocess" : tactic =>
diff --git a/backends/lean/Base/Primitives/Scalar.lean b/backends/lean/Base/Primitives/Scalar.lean
index 1e9b51c2..3beb7527 100644
--- a/backends/lean/Base/Primitives/Scalar.lean
+++ b/backends/lean/Base/Primitives/Scalar.lean
@@ -66,27 +66,33 @@ def U128.smin  : Int := 0
 def U128.smax  : Int := HPow.hPow 2 128 - 1
 
 -- The "normalized" bounds, that we use in practice
-def I8.min    := -128
-def I8.max   := 127
-def I16.min  := -32768
-def I16.max  := 32767
-def I32.min  := -2147483648
-def I32.max  := 2147483647
-def I64.min  := -9223372036854775808
-def I64.max  := 9223372036854775807
-def I128.min := -170141183460469231731687303715884105728
-def I128.max := 170141183460469231731687303715884105727
-@[simp] def U8.min   := 0
-def U8.max   := 255
-@[simp] def U16.min  := 0
-def U16.max  := 65535
-@[simp] def U32.min  := 0
-def U32.max  := 4294967295
-@[simp] def U64.min  := 0
-def U64.max  := 18446744073709551615
-@[simp] def U128.min := 0
-def U128.max := 340282366920938463463374607431768211455
-@[simp] def Usize.min := 0
+def I8.min    : Int   := -128
+def I8.max    : Int   := 127
+def I16.min   : Int  := -32768
+def I16.max   : Int  := 32767
+def I32.min   : Int  := -2147483648
+def I32.max   : Int  := 2147483647
+def I64.min   : Int  := -9223372036854775808
+def I64.max   : Int  := 9223372036854775807
+def I128.min  : Int := -170141183460469231731687303715884105728
+def I128.max  : Int := 170141183460469231731687303715884105727
+@[simp]
+def U8.min    : Int   := 0
+def U8.max    : Int   := 255
+@[simp]
+def U16.min   : Int  := 0
+def U16.max   : Int  := 65535
+@[simp]
+def U32.min   : Int  := 0
+def U32.max   : Int  := 4294967295
+@[simp]
+def U64.min   : Int  := 0
+def U64.max   : Int  := 18446744073709551615
+@[simp]
+def U128.min  : Int := 0
+def U128.max  : Int := 340282366920938463463374607431768211455
+@[simp]
+def Usize.min : Int := 0
 
 def Isize.refined_min : { n:Int // n = I32.min ∨ n = I64.min } :=
   ⟨ Isize.smin, by
diff --git a/backends/lean/Base/Primitives/Vec.lean b/backends/lean/Base/Primitives/Vec.lean
index 35092c29..5a709566 100644
--- a/backends/lean/Base/Primitives/Vec.lean
+++ b/backends/lean/Base/Primitives/Vec.lean
@@ -22,20 +22,27 @@ def Vec (α : Type u) := { l : List α // l.length ≤ Usize.max }
 instance Vec.cast (a : Type u): Coe (Vec a) (List a)  where coe := λ v => v.val
 
 instance (a : Type u) : Arith.HasIntProp (Vec a) where
-  prop_ty := λ v => v.val.len ≤ Scalar.max ScalarTy.Usize
+  prop_ty := λ v => 0 ≤ v.val.len ∧ v.val.len ≤ Scalar.max ScalarTy.Usize
   prop := λ ⟨ _, l ⟩ => by simp[Scalar.max, List.len_eq_length, *]
 
 @[simp]
 abbrev Vec.length {α : Type u} (v : Vec α) : Int := v.val.len
 
+@[simp]
+abbrev Vec.v {α : Type u} (v : Vec α) : List α := v.val
+
 example {a: Type u} (v : Vec a) : v.length ≤ Scalar.max ScalarTy.Usize := by
   scalar_tac
 
 def Vec.new (α : Type u): Vec α := ⟨ [], by apply Scalar.cMax_suffices .Usize; simp ⟩
 
+-- TODO: very annoying that the α is an explicit parameter
 def Vec.len (α : Type u) (v : Vec α) : Usize :=
-  let ⟨ v, l ⟩ := v
-  Usize.ofIntCore (List.length v) (by simp [Scalar.min, Usize.min]) l
+  Usize.ofIntCore v.val.len (by scalar_tac) (by scalar_tac)
+
+@[simp]
+theorem Vec.len_val {α : Type u} (v : Vec α) : (Vec.len α v).val = v.length :=
+  by rfl
 
 -- This shouldn't be used
 def Vec.push_fwd (α : Type u) (_ : Vec α) (_ : α) : Unit := ()
diff --git a/backends/lean/Base/Progress/Base.lean b/backends/lean/Base/Progress/Base.lean
index b54bdf7a..6f820a84 100644
--- a/backends/lean/Base/Progress/Base.lean
+++ b/backends/lean/Base/Progress/Base.lean
@@ -81,8 +81,8 @@ section Methods
     let (fExpr, f, args) ← do
       if mf.isConst ∧ mf.constName = ``Bind.bind then do
         -- Dive into the bind
-        let fExpr := margs.get! 4
-        fExpr.consumeMData.withApp fun f args => pure (fExpr, f, args)
+        let fExpr := (margs.get! 4).consumeMData
+        fExpr.withApp fun f args => pure (fExpr, f, args)
       else pure (mExpr, mf, margs)
     trace[Progress] "After stripping the arguments of the function call:\n- f: {f}\n- args: {args}"
     if ¬ f.isConst then throwError "Not a constant: {f}"
diff --git a/backends/lean/Base/Progress/Progress.lean b/backends/lean/Base/Progress/Progress.lean
index a281f1d2..a2c7764f 100644
--- a/backends/lean/Base/Progress/Progress.lean
+++ b/backends/lean/Base/Progress/Progress.lean
@@ -58,9 +58,9 @@ def progressWith (fExpr : Expr) (th : TheoremOrLocal)
     let (thBody, _) ← destEq thBody
     trace[Progress] "After splitting equality: {thBody}"
     -- There shouldn't be any existential variables in thBody
-    pure thBody
+    pure thBody.consumeMData
   -- Match the body with the target
-  trace[Progress] "Matching `{thBody}` with `{fExpr}`"
+  trace[Progress] "Matching:\n- body:\n{thBody}\n- target:\n{fExpr}"
   let ok ← isDefEq thBody fExpr
   if ¬ ok then throwError "Could not unify the theorem with the target:\n- theorem: {thBody}\n- target: {fExpr}"
   let mgoal ← Tactic.getMainGoal
diff --git a/tests/lean/Hashmap/Properties.lean b/tests/lean/Hashmap/Properties.lean
index de6bf636..b2d5570a 100644
--- a/tests/lean/Hashmap/Properties.lean
+++ b/tests/lean/Hashmap/Properties.lean
@@ -136,6 +136,40 @@ def slot_s_inv (l i : Int) (ls : Core.List (Usize × α)) : Prop :=
 
 def slot_t_inv (l i : Int) (s : List α) : Prop := slot_s_inv l i s.v
 
+-- Interpret the hashmap as a list of lists
+def v (hm : HashMap α) : Core.List (Core.List (Usize × α)) :=
+  hm.slots.val.map List.v
+
+-- Interpret the hashmap as an associative list
+def al_v (hm : HashMap α) : Core.List (Usize × α) :=
+  hm.v.flatten
+
+-- TODO: automatic derivation
+instance : Inhabited (List α) where
+  default := .Nil
+
+@[simp]
+def slots_s_inv (s : Core.List (List α)) : Prop :=
+  ∀ (i : Int), 0 ≤ i → i < s.len → slot_t_inv s.len i (s.index i)
+
+def slots_t_inv (s : Vec (List α)) : Prop :=
+  slots_s_inv s.v
+
+@[simp]
+def base_inv (hm : HashMap α) : Prop :=
+  -- [num_entries] correctly tracks the number of entries
+  hm.num_entries.val = hm.al_v.len ∧
+  -- Slots invariant
+  slots_t_inv hm.slots ∧
+  -- The capacity must be > 0 (otherwise we can't resize)
+  0 < hm.slots.length
+  -- TODO: load computation
+
+def inv (hm : HashMap α) : Prop :=
+  -- Base invariant
+  base_inv hm
+  -- TODO: either the hashmap is not overloaded, or we can't resize it
+
 theorem insert_in_list_back_spec_aux {α : Type} (l : Int) (key: Usize) (value: α) (l0: List α)
   (hinv : slot_s_inv_hash l (hash_mod_key key l) l0.v)
   (hdk : distinct_keys l0.v) :
@@ -191,6 +225,64 @@ theorem insert_in_list_back_spec_aux {α : Type} (l : Int) (key: Usize) (value:
        -- TODO: canonize addition by default?
        simp_all [Int.add_assoc, Int.add_comm, Int.add_left_comm]
 
+@[pspec]
+theorem insert_in_list_back_spec {α : Type} (l : Int) (key: Usize) (value: α) (l0: List α)
+  (hinv : slot_s_inv_hash l (hash_mod_key key l) l0.v)
+  (hdk : distinct_keys l0.v) :
+  ∃ l1,
+    insert_in_list_back α key value l0 = ret l1 ∧
+    -- We update the binding
+    l1.lookup key = value ∧
+    (∀ k, k ≠ key → l1.lookup k = l0.lookup k) ∧
+    -- We preserve part of the key invariant
+    slot_s_inv_hash l (hash_mod_key key l) l1.v ∧
+    -- Reasoning about the length
+    (match l0.lookup key with
+     | none => l1.len = l0.len + 1
+     | some _ => l1.len = l0.len) ∧
+    -- The keys are distinct
+    distinct_keys l1.v
+  := by
+  progress with insert_in_list_back_spec_aux as ⟨ l1 .. ⟩
+  exists l1
+
+def slots_t_lookup (s : Core.List (List α)) (k : Usize) : Option α :=
+  let i := hash_mod_key k s.len
+  let slot := s.index i
+  slot.lookup k
+
+def lookup (hm : HashMap α) (k : Usize) : Option α :=
+  slots_t_lookup hm.slots.val k
+
+@[simp]
+abbrev len_s (hm : HashMap α) : Int := hm.al_v.len
+
+set_option trace.Progress true
+/-set_option pp.explicit true
+set_option pp.universes true
+set_option pp.notation false-/
+
+theorem insert_no_resize_spec {α : Type} (hm : HashMap α) (key : Usize) (value : α)
+  (hinv : hm.inv) (hnsat : hm.lookup key = none → hm.len_s < Usize.max) :
+  ∃ nhm, hm.insert_no_resize α key value = ret nhm  ∧
+  -- We preserve the invariant
+  nhm.inv ∧
+  -- We updated the binding for key
+  nhm.lookup key = some value ∧
+  -- We left the other bindings unchanged
+  (∀ k, k ≠ key → nhm.lookup k = hm.lookup k) ∧
+  -- Reasoning about the length
+  (match hm.lookup key with
+   | none => nhm.len_s = hm.len_s + 1
+   | some _ => nhm.len_s = hm.len_s) := by
+  rw [insert_no_resize]
+  simp [hash_key]
+  have : (Vec.len (List α) hm.slots).val ≠ 0 := by
+   intro
+   simp_all [inv]
+  progress as ⟨ hash_mod ⟩
+  progress
+
 end HashMap
 
 end hashmap
author	Son Ho	2023-07-25 16:26:08 +0200
committer	Son Ho	2023-07-25 16:26:08 +0200
commit	1854c631a6a7a3f8d45ad18e05547f9d3782c3ee (patch)
tree	270e16d5c106d00b0e18520bf89d05d1e202cdb6
parent	876137dff361620d8ade1a4ee94fa9274df0bdc6 (diff)