diff options
Diffstat (limited to '')
-rw-r--r-- | tests/src/betree/Cargo.lock | 299 | ||||
-rw-r--r-- | tests/src/betree/Cargo.toml | 13 | ||||
-rw-r--r-- | tests/src/betree/Makefile | 11 | ||||
-rw-r--r-- | tests/src/betree/README.md | 1 | ||||
-rw-r--r-- | tests/src/betree/rust-toolchain | 3 | ||||
-rw-r--r-- | tests/src/betree/src/betree.rs | 1084 | ||||
-rw-r--r-- | tests/src/betree/src/betree_utils.rs | 155 | ||||
-rw-r--r-- | tests/src/betree/src/main.rs | 4 |
8 files changed, 1570 insertions, 0 deletions
diff --git a/tests/src/betree/Cargo.lock b/tests/src/betree/Cargo.lock new file mode 100644 index 00000000..1bd274ad --- /dev/null +++ b/tests/src/betree/Cargo.lock @@ -0,0 +1,299 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "aho-corasick" +version = "1.1.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8e60d3430d3a69478ad0993f19238d2df97c507009a52b3c10addcd7f6bcb916" +dependencies = [ + "memchr", +] + +[[package]] +name = "atty" +version = "0.2.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +dependencies = [ + "hermit-abi", + "libc", + "winapi", +] + +[[package]] +name = "betree" +version = "0.1.0" +dependencies = [ + "env_logger", + "log", + "serde", + "serde_json", +] + +[[package]] +name = "env_logger" +version = "0.8.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a19187fea3ac7e84da7dacf48de0c45d63c6a76f9490dae389aead16c243fce3" +dependencies = [ + "atty", + "humantime", + "log", + "regex", + "termcolor", +] + +[[package]] +name = "hermit-abi" +version = "0.1.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33" +dependencies = [ + "libc", +] + +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + +[[package]] +name = "itoa" +version = "1.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49f1f14873335454500d59611f1cf4a4b0f786f9ac11f4312a78e4cf2566695b" + +[[package]] +name = "libc" +version = "0.2.155" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" + +[[package]] +name = "log" +version = "0.4.21" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90ed8c1e510134f979dbc4f070f87d4313098b704861a105fe34231c70a3901c" + +[[package]] +name = "memchr" +version = "2.7.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c8640c5d730cb13ebd907d8d04b52f55ac9a2eec55b440c8892f40d56c76c1d" + +[[package]] +name = "proc-macro2" +version = "1.0.83" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b33eb56c327dec362a9e55b3ad14f9d2f0904fb5a5b03b513ab5465399e9f43" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.36" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "regex" +version = "1.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c117dbdfde9c8308975b6a18d71f3f385c89461f7b3fb054288ecf2a2058ba4c" +dependencies = [ + "aho-corasick", + "memchr", + "regex-automata", + "regex-syntax", +] + +[[package]] +name = "regex-automata" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "86b83b8b9847f9bf95ef68afb0b8e6cdb80f498442f5179a29fad448fcc1eaea" +dependencies = [ + "aho-corasick", + "memchr", + "regex-syntax", +] + +[[package]] +name = "regex-syntax" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "adad44e29e4c806119491a7f06f03de4d1af22c3a680dd47f1e6e179439d1f56" + +[[package]] +name = "ryu" +version = "1.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3cb5ba0dc43242ce17de99c180e96db90b235b8a9fdc9543c96d2209116bd9f" + +[[package]] +name = "serde" +version = "1.0.202" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "226b61a0d411b2ba5ff6d7f73a476ac4f8bb900373459cd00fab8512828ba395" +dependencies = [ + "serde_derive", +] + +[[package]] +name = "serde_derive" +version = "1.0.202" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6048858004bcff69094cd972ed40a32500f153bd3be9f716b2eed2e8217c4838" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "serde_json" +version = "1.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "455182ea6142b14f93f4bc5320a2b31c1f266b66a4a5c858b013302a5d8cbfc3" +dependencies = [ + "itoa", + "ryu", + "serde", +] + +[[package]] +name = "syn" +version = "2.0.65" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d2863d96a84c6439701d7a38f9de935ec562c8832cc55d1dde0f513b52fad106" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "termcolor" +version = "1.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-util" +version = "0.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4d4cc384e1e73b93bafa6fb4f1df8c41695c8a91cf9c4c64358067d15a7b6c6b" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-sys" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" +dependencies = [ + "windows-targets", +] + +[[package]] +name = "windows-targets" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f0713a46559409d202e70e28227288446bf7841d3211583a4b53e3f6d96e7eb" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_gnullvm", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7088eed71e8b8dda258ecc8bac5fb1153c5cffaf2578fc8ff5d61e23578d3263" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9985fd1504e250c615ca5f281c3f7a6da76213ebd5ccc9561496568a2752afb6" + +[[package]] +name = "windows_i686_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "88ba073cf16d5372720ec942a8ccbf61626074c6d4dd2e745299726ce8b89670" + +[[package]] +name = "windows_i686_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "87f4261229030a858f36b459e748ae97545d6f1ec60e5e0d6a3d32e0dc232ee9" + +[[package]] +name = "windows_i686_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db3c2bf3d13d5b658be73463284eaf12830ac9a26a90c717b7f771dfe97487bf" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4e4246f76bdeff09eb48875a0fd3e2af6aada79d409d33011886d3e1581517d9" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "852298e482cd67c356ddd9570386e2862b5673c85bd5f88df9ab6802b334c596" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.52.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bec47e5bfd1bff0eeaf6d8b485cc1074891a197ab4225d504cb7a1ab88b02bf0" diff --git a/tests/src/betree/Cargo.toml b/tests/src/betree/Cargo.toml new file mode 100644 index 00000000..c05c7d93 --- /dev/null +++ b/tests/src/betree/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "betree" +version = "0.1.0" +authors = ["Son Ho <hosonmarc@gmail.com>"] +edition = "2018" + +[dependencies] +serde_json = "1.0.91" +serde = { version = "1.0.152", features = ["derive"] } +log = "0.4.17" +env_logger = "0.8.4" + +# TODO: If we turn this package into a library, building the tests fails. diff --git a/tests/src/betree/Makefile b/tests/src/betree/Makefile new file mode 100644 index 00000000..7b41e56d --- /dev/null +++ b/tests/src/betree/Makefile @@ -0,0 +1,11 @@ +.PHONY: all +all: tests + +.PHONY: test +test: + cargo rustc -- --test -Zpolonius + cd target/debug/ && ./betree + +.PHONY: clean +clean: + cargo clean diff --git a/tests/src/betree/README.md b/tests/src/betree/README.md new file mode 100644 index 00000000..a71fe884 --- /dev/null +++ b/tests/src/betree/README.md @@ -0,0 +1 @@ +This project contains tests which require the Polonius borrow checker. diff --git a/tests/src/betree/rust-toolchain b/tests/src/betree/rust-toolchain new file mode 100644 index 00000000..9460b1a8 --- /dev/null +++ b/tests/src/betree/rust-toolchain @@ -0,0 +1,3 @@ +[toolchain] +channel = "nightly-2023-06-02" +components = [ "rustc-dev", "llvm-tools-preview" ] diff --git a/tests/src/betree/src/betree.rs b/tests/src/betree/src/betree.rs new file mode 100644 index 00000000..12f2847d --- /dev/null +++ b/tests/src/betree/src/betree.rs @@ -0,0 +1,1084 @@ +//! The following module implements a minimal betree. +//! We don't have loops for now, so we will need to update the code to remove +//! the recursive functions at some point. +//! We drew a lot of inspiration from the C++ [Be-Tree](https://github.com/oscarlab/Be-Tree). +//! implementation. +#![allow(dead_code)] + +use crate::betree_utils as utils; + +pub type NodeId = u64; +pub type Key = u64; +pub type Value = u64; + +type Map<K, V> = List<(K, V)>; + +/// We use linked lists for the maps from keys to messages/bindings +pub(crate) enum List<T> { + Cons(T, Box<List<T>>), + Nil, +} + +/// Every node has a unique identifier (the betree has a counter). +/// Whenever we need to read/update the content of a node, we read/update +/// the whole content from disk at once. +/// +/// In order to make things simple, the content of each node is saved in +/// a single file, identified by the node index. Also, we use json. +/// +/// We don't reason about the content of the load/store node functions +/// (which are assumed), while the purpose of this example is to illustrate the +/// proof experience we have with Aeneas: we are not looking for performance here. +/// +/// Rk.: in the future, we will directly use the functions from betree_utils +/// and setup the translation to consider this module as assumed (i.e., no +/// wrappers) +fn load_internal_node(id: NodeId) -> InternalContent { + utils::load_internal_node(id) +} + +/// See [load_internal_node]. +fn store_internal_node(id: NodeId, content: InternalContent) { + utils::store_internal_node(id, content) +} + +/// See [load_internal_node]. +fn load_leaf_node(id: NodeId) -> LeafContent { + utils::load_leaf_node(id) +} + +/// See [load_internal_node]. +fn store_leaf_node(id: NodeId, content: LeafContent) { + utils::store_leaf_node(id, content) +} + +fn fresh_node_id(counter: &mut NodeId) -> NodeId { + let id = *counter; + *counter += 1; + id +} + +/// We use this type to encode closures. +/// See [Message::Upsert] and [upsert_update] +pub enum UpsertFunState { + Add(u64), + Sub(u64), +} + +/// A message - note that all those messages have to be linked to a key +pub(crate) enum Message { + /// Insert a binding from value to key + Insert(Value), + /// Delete a binding from value to key + Delete, + /// [Upsert] is "query then update" (query a value, then update the binding + /// by using the result of the query). This is pretty expensive if we + /// actually *do* query, *then* update: queries are expensive, because + /// we potentially have to explore the tree in depth (and every time we + /// lookup a node, we have an expensive I/O operation). + /// Instead, we insert this [Upsert] message in the tree, which progressively + /// gets propagated to the children untils it gets applied (when we find an + /// [Insert], or when we reach a leaf). + /// + /// In practice, [Upsert] should store a closure. For now we don't have + /// support for function pointers and closures, so [Upsert] doesn't store + /// a closure and always applies the same update function. Note that the + /// [Value] stored in [Upsert] acts as a closure's state. + /// + /// The interest of this example is to split the proof in two: + /// 1. a very simple refinement proof (which is made simple by the fact that + /// Aeneas takes care of the memory management proof obligations through + /// the translation) + /// 2. a more complex functional proof. + /// We write a very general model of the b-epsilon tree, prove that it is + /// refined by the translated code in 1., then prove the general functional + /// correctness case once and for all in 2. + /// The idea is that once we add support for closures, we should be able to + /// update the Rust code, and all we would need to do on the proof side is + /// to update the refinement proof in 1., which should hopefully be + /// straightforward. + /// + /// Also note that if we don't have [Upsert], there is no point in using + /// b-epsilon trees, which have the particularity of storing messages: + /// b-trees and their variants work very well (and don't use messages). + /// + /// Note there is something interesting about the proofs we do for [Upsert]. + /// When we use [Insert] or [Delete], we remove the upserts which are pending + /// for the key, because there is no point in applying them (there would be + /// if we wanted to leverage the fact that the update functions we apply may + /// be stateful). + /// The consequence is that we don't observe potentially failing executions of + /// the update functions. At the opposite, the specification of [Upsert] is + /// "greedy": we see [Upsert] as query then update. This means that the + /// implementation refines the specification only if we make sure that the + /// update function used for the upsert doesn't fail on the input we give it + /// (all this can be seen in the specification we prove about the be-tree). + Upsert(UpsertFunState), +} + +/// Internal node content. +/// +/// An internal node contains a map from keys to pending messages +/// +/// Invariants: +/// - the pairs (key, message) are sorted so that the keys are in increasing order +/// - for a given key, there can be: +/// - no message +/// - one insert or delete message +/// - a list of upsert messages. In that case, the upsert message are sorted +/// from the *first* to the *last* added in the betree. +pub(crate) type InternalContent = Map<Key, Message>; + +/// Leaf node content. +/// +/// A leaf node contains a map from keys to values. +/// We store the bindings in order of increasing key. +pub(crate) type LeafContent = Map<Key, Value>; + +/// Internal node. See [Node]. +/// +/// An internal node contains a stack of messages (stored on disk and thus +/// absent from the node itself), and two children. +/// +/// When transmitting messages to the children: the messages/bindings for the +/// keys < pivot are given to the left child, and those for the keys >= pivot +/// are given to the right child. +/// +/// Note that in Be-Tree the internal nodes have lists of children, which +/// allows to do even smarter things: if an internal node has too many +/// messages, then: +/// - either it can transmit big batches of those messages to some of its +/// children, in which case it does +/// - or it can't, in which case it splits, because otherwise we have too +/// many unefficient updates to perform (the aim really is to amortize +/// the cost of I/O, which is achieved by minimizing the number of +/// accesses to node contents) +struct Internal { + id: NodeId, + pivot: Key, + left: Box<Node>, + right: Box<Node>, +} + +/// Leaf node. See [Node] +/// +/// A leaf node contains bindings (stored on disk, and thus absent from the +/// node itself). +struct Leaf { + id: NodeId, + /// The number of bindings in the node + size: u64, +} + +/// A node in the BeTree. +/// +/// The node's content is stored on disk (and hence absent from the node itself). +/// +/// Note that we don't have clean/dirty nodes: all node contents are immediately +/// written on disk upon being updated. +enum Node { + /// An internal node (with children). + Internal(Internal), + /// A leaf node. + Leaf(Leaf), +} + +/// The parameters of a BeTree, which control when to flush or split. +struct Params { + /// The minimum number of messages we flush to the children. + /// We wait to reach 2 * min_flush_size before flushing to children. + /// If one of the children doesn't receive a number of + /// messages >= min_flush_size, we keep those messages in the parent + /// node. Of course, at least one of the two children will receive + /// flushed messages: this gives us that an internal node has at most + /// 2 * min_flush_size pending messages - 1. + min_flush_size: u64, + /// The maximum number of bindings we can store in a leaf node (if we + /// reach this number, we split). + split_size: u64, +} + +struct NodeIdCounter { + next_node_id: NodeId, +} + +impl NodeIdCounter { + fn new() -> Self { + NodeIdCounter { next_node_id: 0 } + } + + fn fresh_id(&mut self) -> NodeId { + let id = self.next_node_id; + self.next_node_id += 1; + id + } +} + +/// The BeTree itself +pub struct BeTree { + /// The parameters of the BeTree + params: Params, + /// Every node has a unique id: we keep a counter to generate fresh ids + node_id_cnt: NodeIdCounter, + /// The root of the tree + root: Node, +} + +/// The update function used for [Upsert]. +/// Will be removed once we have closures (or at least function pointers). +/// This function just computes a saturated sum. +/// Also note that it takes an option as input, for the previous value: +/// we draw inspiration from the C++ Be-Tree implemenation, where +/// in case the binding is not present, the closure stored in upsert is +/// given a default value. +pub fn upsert_update(prev: Option<Value>, st: UpsertFunState) -> Value { + // We just compute the sum, until it saturates + match prev { + Option::None => { + match st { + UpsertFunState::Add(v) => { + // We consider the default value is 0, so we return 0 + v + // (or we could fail - it doesn't really matter) + v + } + UpsertFunState::Sub(_) => { + // Same logic as for [sub], but this time we saturate at 0 + 0 + } + } + } + Option::Some(prev) => { + match st { + UpsertFunState::Add(v) => { + // Note that Aeneas is a bit conservative about the max_usize + let margin = u64::MAX - prev; + // Check if we saturate + if margin >= v { + prev + v + } else { + u64::MAX + } + } + UpsertFunState::Sub(v) => { + // Check if we saturate + if prev >= v { + prev - v + } else { + 0 + } + } + } + } + } +} + +impl<T> List<T> { + fn len(&self) -> u64 { + match self { + List::Nil => 0, + List::Cons(_, tl) => 1 + tl.len(), + } + } + + /// Split a list at a given length + fn split_at(self, n: u64) -> (List<T>, List<T>) { + if n == 0 { + (List::Nil, self) + } else { + match self { + List::Nil => unreachable!(), + List::Cons(hd, tl) => { + let (ls0, ls1) = tl.split_at(n - 1); + (List::Cons(hd, Box::new(ls0)), ls1) + } + } + } + } + + /// Push an element at the front of the list. + fn push_front(&mut self, x: T) { + // Move under borrows: annoying... + let tl = std::mem::replace(self, List::Nil); + *self = List::Cons(x, Box::new(tl)); + } + + /// Pop the element at the front of the list + fn pop_front(&mut self) -> T { + // Move under borrows: annoying... + let ls = std::mem::replace(self, List::Nil); + match ls { + List::Nil => panic!(), + List::Cons(x, tl) => { + *self = *tl; + x + } + } + } + + fn hd(&self) -> &T { + match self { + List::Nil => panic!(), + List::Cons(hd, _) => hd, + } + } +} + +impl<T> Map<Key, T> { + fn head_has_key(&self, key: Key) -> bool { + match self { + List::Nil => false, + List::Cons(hd, _) => hd.0 == key, + } + } + + /// Partition the map between two maps: + /// - a first map where the keys < pivot + /// - a second map where the keys >= pivot + /// Note that the bindings in the map are supposed to be sorted in + /// order of increasing keys. + fn partition_at_pivot(self, pivot: Key) -> (Map<Key, T>, Map<Key, T>) { + match self { + List::Nil => (List::Nil, List::Nil), + List::Cons(hd, tl) => { + if hd.0 >= pivot { + (List::Nil, List::Cons(hd, tl)) + } else { + let (ls0, ls1) = tl.partition_at_pivot(pivot); + (List::Cons(hd, Box::new(ls0)), ls1) + } + } + } + } +} + +impl Leaf { + /// Split a leaf into an internal node with two children. + /// + /// The leaf should have exactly 2 * split_size elements. + /// Also, we use the fact that the keys are sorted in increasing order. + fn split( + &self, + content: Map<Key, Value>, + params: &Params, + node_id_cnt: &mut NodeIdCounter, + ) -> Internal { + // Split the content + let (content0, content1) = content.split_at(params.split_size); + // Get the pivot + let pivot = content1.hd().0; + // Create the two nodes + let id0 = node_id_cnt.fresh_id(); + let id1 = node_id_cnt.fresh_id(); + let left = Leaf { + id: id0, + size: params.split_size, + }; + let right = Leaf { + id: id1, + size: params.split_size, + }; + // Store the content + store_leaf_node(id0, content0); + store_leaf_node(id1, content1); + // Return + Internal { + id: self.id, + pivot, + left: Box::new(Node::Leaf(left)), + right: Box::new(Node::Leaf(right)), + } + } +} + +impl Internal { + /// Small utility: lookup a value in the children nodes. + fn lookup_in_children(&mut self, key: Key) -> Option<Value> { + if key < self.pivot { + self.left.lookup(key) + } else { + self.right.lookup(key) + } + } + + /// Flush the messages in an internal node to its children. + /// Note that when flushing, we send messages to a child only if there + /// are more than min_flush_size messages to send. Also, we flush only + /// if the number of messages in the current node is >= 2* num_flush_size. + /// + /// The function returns the messages we couldn't flush to the children + /// nodes. + fn flush<'a>( + &'a mut self, + params: &Params, + node_id_cnt: &'a mut NodeIdCounter, + content: Map<Key, Message>, + ) -> Map<Key, Message> { + // Partition the messages + let (msgs_left, msgs_right) = content.partition_at_pivot(self.pivot); + // Check if we need to flush to the left child + let len_left = msgs_left.len(); + if len_left >= params.min_flush_size { + // Flush to the left + self.left.apply_messages(params, node_id_cnt, msgs_left); + // Check if we need to flush to the right + let len_right = msgs_right.len(); + if len_right >= params.min_flush_size { + self.right.apply_messages(params, node_id_cnt, msgs_right); + // No messages remain in the current node + List::Nil + } else { + // We keep the messages which belong to the right node + msgs_right + } + } else { + // Don't flush to the left: we necessarily flush to the right + self.right.apply_messages(params, node_id_cnt, msgs_right); + // We keep the messages which belong to the left node + msgs_left + } + } +} + +impl Node { + /// Apply a list of message to ourselves: leaf node case + fn apply_messages_to_leaf<'a>( + bindings: &'a mut Map<Key, Value>, + new_msgs: List<(Key, Message)>, + ) { + match new_msgs { + List::Nil => (), + List::Cons(new_msg, new_msgs_tl) => { + Node::apply_to_leaf(bindings, new_msg.0, new_msg.1); + Node::apply_messages_to_leaf(bindings, *new_msgs_tl); + } + } + } + + /// Apply a message to ourselves: leaf node case + /// + /// This simply updates the bindings. + fn apply_to_leaf<'a>(bindings: &'a mut Map<Key, Value>, key: Key, new_msg: Message) { + // Retrieve a mutable borrow to the position of the binding, if there is + // one, or to the end of the list + let bindings = Node::lookup_mut_in_bindings(key, bindings); + // Check if we point to a binding which has the key we are looking for + if bindings.head_has_key(key) { + // We need to pop the binding, and may need to reuse the + // previous value (for an upsert) + let hd = bindings.pop_front(); + // Match over the message + match new_msg { + Message::Insert(v) => { + bindings.push_front((key, v)); + } + Message::Delete => { + // Nothing to do: we popped the binding + () + } + Message::Upsert(s) => { + let v = upsert_update(Option::Some(hd.1), s); + bindings.push_front((key, v)); + } + } + } else { + // Key not found: simply insert + match new_msg { + Message::Insert(v) => { + bindings.push_front((key, v)); + } + Message::Delete => { + // Nothing to do + () + } + Message::Upsert(s) => { + let v = upsert_update(Option::None, s); + bindings.push_front((key, v)); + } + } + } + } + + /// Apply a list of message to ourselves: internal node case + fn apply_messages_to_internal<'a>( + msgs: &'a mut Map<Key, Message>, + new_msgs: List<(Key, Message)>, + ) { + match new_msgs { + List::Nil => (), + List::Cons(new_msg, new_msgs_tl) => { + Node::apply_to_internal(msgs, new_msg.0, new_msg.1); + Node::apply_messages_to_internal(msgs, *new_msgs_tl); + } + } + } + + /// Apply a message to ourselves: internal node case + /// + /// This basically inserts a message in a messages stack. However, + /// we may need to filter previous messages: for insert, if we insert an + /// [Insert] in a stack which contains an [Insert] or a [Delete] for the + /// same key, we replace this old message with the new one. + fn apply_to_internal<'a>(msgs: &'a mut Map<Key, Message>, key: Key, new_msg: Message) { + // Lookup the first message for [key] (if there is no message for [key], + // we get a mutable borrow to the position in the list where we need + // to insert the new message). + let msgs = Node::lookup_first_message_for_key(key, msgs); + // What we do is not the same, depending on whether there is already + // a message or not. + if msgs.head_has_key(key) { + // We need to check the current message + match new_msg { + Message::Insert(_) | Message::Delete => { + // If [Insert] or [Delete]: filter the current + // messages, and insert the new one + Node::filter_messages_for_key(key, msgs); + msgs.push_front((key, new_msg)); + } + Message::Upsert(s) => { + // If [Update]: we need to take into account the + // previous messages. + match msgs.hd().1 { + Message::Insert(prev) => { + // There should be exactly one [Insert]: + // pop it, compute the result of the [Upsert] + // and insert this result + let v = upsert_update(Option::Some(prev), s); + let _ = msgs.pop_front(); + msgs.push_front((key, Message::Insert(v))); + } + Message::Delete => { + // There should be exactly one [Delete] + // message : pop it, compute the result of + // the [Upsert], and insert this result + let _ = msgs.pop_front(); + let v = upsert_update(Option::None, s); + msgs.push_front((key, Message::Insert(v))); + } + Message::Upsert(_) => { + // There may be several msgs upserts: + // we need to insert the new message at + // the end of the list of upserts (so + // that later we can apply them all in + // proper order). + let msgs = Node::lookup_first_message_after_key(key, msgs); + msgs.push_front((key, Message::Upsert(s))); + } + } + } + } + } else { + // No pending message for [key]: simply add the new message + msgs.push_front((key, new_msg)) + } + } + + /// Apply a message to ourselves + fn apply<'a>( + &'a mut self, + params: &Params, + node_id_cnt: &'a mut NodeIdCounter, + key: Key, + new_msg: Message, + ) { + let msgs = List::Cons((key, new_msg), Box::new(List::Nil)); + self.apply_messages(params, node_id_cnt, msgs); + } + + /// Apply a list of messages to ourselves + fn apply_messages<'a>( + &'a mut self, + params: &Params, + node_id_cnt: &'a mut NodeIdCounter, + msgs: List<(Key, Message)>, + ) { + match self { + Node::Leaf(node) => { + // Load the content from disk + let mut content = load_leaf_node(node.id); + // Insert + Node::apply_messages_to_leaf(&mut content, msgs); + // Check if we need to split - in the future, we might want to + // do something smarter to compute the number of messages + let len = content.len(); + if len >= 2 * params.split_size { + // Split + let new_node = node.split(content, params, node_id_cnt); + // Store the content to disk + store_leaf_node(node.id, List::Nil); + // Update the node + *self = Node::Internal(new_node); + } else { + // Update the size if necessary + node.size = len; + // Store the content to disk + store_leaf_node(node.id, content); + } + } + Node::Internal(node) => { + // Load the content from disk + let mut content = load_internal_node(node.id); + // Insert + Node::apply_messages_to_internal(&mut content, msgs); + // Check if we need to flush - in the future, we might want to + // do something smarter to compute the number of messages + let num_msgs = content.len(); + if num_msgs >= params.min_flush_size { + content = node.flush(params, node_id_cnt, content); + } + // Store the content to disk + store_internal_node(node.id, content) + } + } + } + + /// Lookup a value in a list of bindings. + /// Note that the values should be stored in order of increasing key. + fn lookup_in_bindings(key: Key, bindings: &Map<Key, Value>) -> Option<Value> { + match bindings { + List::Nil => Option::None, + List::Cons(hd, tl) => { + if hd.0 == key { + Option::Some(hd.1) + } else if hd.0 > key { + Option::None + } else { + Node::lookup_in_bindings(key, tl) + } + } + } + } + + /// Lookup a value in a list of bindings, and return a borrow to the position + /// where the value is (or should be inserted, if the key is not in the bindings). + fn lookup_mut_in_bindings<'a>( + key: Key, + bindings: &'a mut Map<Key, Value>, + ) -> &'a mut Map<Key, Value> { + match bindings { + List::Nil => bindings, + List::Cons(hd, tl) => { + // This requires Polonius + if hd.0 >= key { + bindings + } else { + Node::lookup_mut_in_bindings(key, tl) + } + } + } + } + + /// Filter all the messages which concern [key]. + /// + /// Note that the stack of messages must start with a message for [key]: + /// we stop filtering at the first message which is not about [key]. + fn filter_messages_for_key<'a>(key: Key, msgs: &'a mut Map<Key, Message>) { + match msgs { + List::Nil => (), + List::Cons((k, _), _) => { + if *k == key { + msgs.pop_front(); + Node::filter_messages_for_key(key, msgs); + } else { + // Stop + () + } + } + } + } + + fn lookup_first_message_after_key<'a>( + key: Key, + msgs: &'a mut Map<Key, Message>, + ) -> &'a mut Map<Key, Message> { + match msgs { + List::Nil => msgs, + List::Cons((k, _), next_msgs) => { + if *k == key { + Node::lookup_first_message_after_key(key, next_msgs) + } else { + msgs + } + } + } + } + + /// Returns the value bound to a key. + /// Note that while looking for the binding, we might reorganize the + /// internals of the betree to apply or flush messages: hence the mutable + /// borrow. + fn lookup<'a>(&'a mut self, key: Key) -> Option<Value> { + match self { + Node::Leaf(node) => { + // Load the node content + let bindings = load_leaf_node(node.id); + // Just lookup the binding in the map + Node::lookup_in_bindings(key, &bindings) + } + Node::Internal(node) => { + // Load the node content + let mut msgs = load_internal_node(node.id); + // Look for the first message pending for the key. + // Note that we maintain the following invariants: + // - if there are > 1 messages, they must be upsert messages only + // - the upsert messages are sorted from the *first* added to the + // *last* added to the betree. + // Also note that if there are upsert messages, we have to apply + // them immediately. + // + // Rk.: [lookup_first_message_for_key] below returns a borrow to + // the portion of the list we will update (if we have upserts, + // we will apply the messages, filter them while doing so, + // insert an [Insert] message, etc.). Should be interesting + // to see how the proof experience with the backward functions + // is at this for this piece of code. Note that this was inpired + // by Be-Tree. + // Also, can't wait to see how all this will work with loops. + let pending = Node::lookup_first_message_for_key(key, &mut msgs); + match pending { + List::Nil => { + // Nothing: dive into the children + node.lookup_in_children(key) + } + List::Cons((k, msg), _) => { + // Check if the borrow which points inside the messages + // stack points to a message for [key] + if *k != key { + // Note the same key: dive into the children + node.lookup_in_children(key) + } else { + // Same key: match over the message for this key + match msg { + Message::Insert(v) => Some(*v), + Message::Delete => None, + Message::Upsert(_) => { + // There are pending upserts: we have no choice but to + // apply them. + // + // Rk.: rather than calling [lookup], we could actually + // go down the tree accumulating upserts. On the other + // hand, the key is now "hotter", so it is not a bad + // idea to keep it as close to the root as possible. + // Note that what we do is what Be-Tree does. + // + // First, lookup the value from the children. + let v = node.lookup_in_children(key); + // Apply the pending updates, and replace them with + // an [Insert] containing the updated value. + // + // Rk.: Be-Tree doesn't seem to store the newly computed + // value, which I don't understand. + let v = Node::apply_upserts(pending, v, key); + // Update the node content + store_internal_node(node.id, msgs); + // Return the value + Option::Some(v) + } + } + } + } + } + } + } + } + + /// Return a mutable borrow to the first message whose key is <= than [key]. + /// If the key is [key], then it is the first message about [key]. + /// Otherwise, it gives us a mutable borrow to the place where we need + /// to insert new messages (note that the borrow may point to the end + /// of the list). + fn lookup_first_message_for_key<'a>( + key: Key, + msgs: &'a mut Map<Key, Message>, + ) -> &'a mut Map<Key, Message> { + match msgs { + List::Nil => msgs, + List::Cons(x, next_msgs) => { + // Rk.: we need Polonius here + // We wouldn't need Polonius if directly called the proper + // function to make the modifications here (because we wouldn't + // need to return anything). However, it would not be very + // idiomatic, especially with regards to the fact that we will + // rewrite everything with loops at some point. + if x.0 >= key { + msgs + } else { + Node::lookup_first_message_for_key(key, next_msgs) + } + } + } + } + + /// Apply the upserts from the current messages stack to a looked up value. + /// + /// The input mutable borrow must point to the first upsert message in the + /// messages stack of the current node. We remove the messages from the stack + /// while applying them. + /// Note that if there are no more upserts to apply, then the value must be + /// `Some`. Also note that we use the invariant that in the message stack, + /// upsert messages are sorted from the first to the last to apply. + fn apply_upserts(msgs: &mut Map<Key, Message>, prev: Option<Value>, key: Key) -> Value { + if msgs.head_has_key(key) { + // Pop the front message. + // Note that it *must* be an upsert. + let msg = msgs.pop_front(); + match msg.1 { + Message::Upsert(s) => { + // Apply the update + let v = upsert_update(prev, s); + let prev = Option::Some(v); + // Continue + Node::apply_upserts(msgs, prev, key) + } + _ => { + // Unreachable: we can only have [Upsert] messages + // for the key + unreachable!(); + } + } + } else { + // We applied all the upsert messages: simply put an [Insert] + // message and return the value. + let v = prev.unwrap(); + msgs.push_front((key, Message::Insert(v))); + return v; + } + } +} + +impl BeTree { + pub fn new(min_flush_size: u64, split_size: u64) -> Self { + let mut node_id_cnt = NodeIdCounter::new(); + let id = node_id_cnt.fresh_id(); + let root = Node::Leaf(Leaf { id, size: 0 }); + store_leaf_node(id, List::Nil); + let params = Params { + min_flush_size, + split_size, + }; + BeTree { + params, + node_id_cnt, + root, + } + } + + /// Apply a message to the tree. + /// + /// This is an auxiliary function. + fn apply(&mut self, key: Key, msg: Message) { + self.root + .apply(&self.params, &mut self.node_id_cnt, key, msg) + } + + /// Insert a binding from [key] to [value] + pub fn insert(&mut self, key: Key, value: Value) { + let msg = Message::Insert(value); + self.apply(key, msg); + } + + /// Delete the bindings for [key] + pub fn delete(&mut self, key: Key) { + let msg = Message::Delete; + self.apply(key, msg); + } + + /// Apply a query-update + pub fn upsert(&mut self, key: Key, upd: UpsertFunState) { + let msg = Message::Upsert(upd); + self.apply(key, msg); + } + + /// Returns the value bound to a key. + /// Note that while looking for the binding, we might reorganize the + /// internals of the betree to apply or flush messages: hence the mutable + /// borrow. + pub fn lookup<'a>(&'a mut self, key: Key) -> Option<Value> { + self.root.lookup(key) + } +} + +#[cfg(test)] +mod tests { + use crate::betree::*; + use std::collections::HashMap; + use std::fmt::{Display, Error, Formatter}; + use std::vec::Vec; + + struct Maps { + betree: BeTree, + refmap: HashMap<Key, Value>, + } + + impl Maps { + fn insert(&mut self, k: Key, v: Value) { + log::trace!("insert: {} -> {}", k, v); + self.betree.insert(k, v); + self.refmap.insert(k, v); + } + + fn delete(&mut self, k: Key) { + log::trace!("delete: {}", k); + self.betree.delete(k); + self.refmap.remove(&k); + } + + /// This function doesn't return a value: it simply checks that the + /// b-epsilon tree and the reference map have the same bindings. + fn lookup(&mut self, k: Key) { + let v0 = self.betree.lookup(k); + let v1 = self.refmap.get(&k).map(|x| *x); + log::trace!("lookup {k}: betree: {:?}, ref: {:?}", v0, v1); + assert!(v0 == v1); + } + + /// Only testing the addition (the choice of the update function doesn't + /// make much difference) + fn upsert(&mut self, k: Key, v: Value) { + log::trace!("upsert: {} -> add({})", k, v); + self.betree.upsert(k, UpsertFunState::Add(v)); + let prev = self.refmap.get(&k).map(|x| *x); + let nv = upsert_update(prev, UpsertFunState::Add(v)); + self.refmap.insert(k, nv); + } + + /// Check that all the bindings in the betree give the same result as the + /// reference. + /// + /// Rk.: looking up actually updates the b-epsilon tree. + fn check_equal(&mut self) { + let keys: Vec<Key> = self.refmap.keys().map(|k| *k).collect(); + for k in keys { + self.lookup(k); + } + } + } + + impl Display for Map<Key, Value> { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + match self { + List::Nil => write!(f, ""), + List::Cons(hd, tl) => { + write!(f, "{} -> {}, ", hd.0, hd.1).unwrap(); + tl.fmt(f) + } + } + } + } + + impl Display for UpsertFunState { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + match self { + UpsertFunState::Add(v) => write!(f, "add({})", v), + UpsertFunState::Sub(v) => write!(f, "sub({})", v), + } + } + } + + impl Display for Message { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + match self { + Message::Insert(v) => write!(f, "insert({})", v), + Message::Delete => write!(f, "delete"), + Message::Upsert(v) => write!(f, "upsert({})", v), + } + } + } + + impl Display for Map<Key, Message> { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + match self { + List::Nil => write!(f, ""), + List::Cons(hd, tl) => { + write!(f, "{} -> {}, ", hd.0, hd.1).unwrap(); + tl.fmt(f) + } + } + } + } + + impl Node { + fn fmt(&self, indent: &String, f: &mut Formatter<'_>) -> Result<(), Error> { + match self { + Node::Leaf(node) => { + let content = load_leaf_node(node.id); + write!(f, "{}{}: [{}]", indent, node.id, content) + } + Node::Internal(node) => { + let content = load_internal_node(node.id); + let indent1 = format!("{} ", indent).to_string(); + write!( + f, + "{}{{\n{}{},\n{}[{}],", + indent, indent, node.id, indent, &content + ) + .unwrap(); + write!(f, "\n{}", indent1).unwrap(); + node.left.fmt(&indent1, f).unwrap(); + write!(f, "\n{}", indent1).unwrap(); + node.right.fmt(&indent1, f).unwrap(); + write!(f, "\n{}}}", indent) + } + } + } + } + + impl Display for BeTree { + fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error> { + self.root.fmt(&"".to_string(), f) + } + } + + #[test] + fn test1() { + // Initialize the logger + env_logger::init(); + + let mut m = Maps { + betree: BeTree::new(5, 5), + refmap: HashMap::new(), + }; + let num_keys = 100; + + // Insert bindings + for k in 0..num_keys { + let v = 2 * k + 1; + m.insert(k, v); + log::trace!("\n{}", &m.betree); + } + + // Make various queries + for kb in 0..(10 * num_keys) { + let k = kb % num_keys; + match k % 4 { + 0 => { + let v = 3 * k + 2; + m.insert(k, v); + } + 1 => { + m.delete(k); + } + 2 => { + let v = kb % 7; + m.upsert(k, v); + } + 3 => { + m.lookup(k); + } + _ => { + unreachable!(); + } + } + log::trace!("\n{}", &m.betree); + } + + // Check that the b-epsilon tree didn't diverge (we check twice, + // because looking up performs updates that we also want to test) + m.check_equal(); + m.check_equal(); + } + fn range_insert(tree: &mut BeTree, start: Key, end: Key) { + for k in start..end { + tree.insert(k, 2 * k + 1); + } + } +} diff --git a/tests/src/betree/src/betree_utils.rs b/tests/src/betree/src/betree_utils.rs new file mode 100644 index 00000000..fd269f4d --- /dev/null +++ b/tests/src/betree/src/betree_utils.rs @@ -0,0 +1,155 @@ +//! The following module implements utilities for [betree.rs]. +//! Those utilities are only used for serialization/deserialization (we don't +//! reason about them). +//! +//! The issue is that we can't derive serialization/deserialization +//! implementations directly in betree.rs, otherwise we can't translate. +//! We could have hacked in Aeneas to skip those implementations, but I'd +//! rather put a little bit of engineering time into this file, while thinking +//! about a cleaner way of doing this in general. The following file is not +//! difficult to write and maintain anyway. +#![allow(dead_code)] + +use crate::betree::{ + InternalContent, Key, LeafContent, List, Message, NodeId, UpsertFunState, Value, +}; +use serde::{Deserialize, Serialize}; +use std::fs::File; +use std::vec::Vec; + +/// Note that I tried using Serde's facilities to define serialization/ +/// deserialization functions for external types, but it proved cumbersome +/// for the betree case. +#[derive(Serialize, Deserialize)] +pub enum UpsertFunStateSerde { + Add(u64), + Sub(u64), +} + +impl UpsertFunStateSerde { + fn to_state(self) -> UpsertFunState { + match self { + UpsertFunStateSerde::Add(v) => UpsertFunState::Add(v), + UpsertFunStateSerde::Sub(v) => UpsertFunState::Sub(v), + } + } + + fn from_state(msg: UpsertFunState) -> Self { + match msg { + UpsertFunState::Add(v) => UpsertFunStateSerde::Add(v), + UpsertFunState::Sub(v) => UpsertFunStateSerde::Sub(v), + } + } +} + +/// Same remark as for [UpsertFunStateSerde] +#[derive(Serialize, Deserialize)] +enum MessageSerde { + Insert(Value), + Delete, + Upsert(UpsertFunStateSerde), +} + +impl MessageSerde { + fn to_msg(self) -> Message { + match self { + MessageSerde::Insert(v) => Message::Insert(v), + MessageSerde::Delete => Message::Delete, + MessageSerde::Upsert(v) => Message::Upsert(v.to_state()), + } + } + + fn from_msg(msg: Message) -> Self { + match msg { + Message::Insert(v) => MessageSerde::Insert(v), + Message::Delete => MessageSerde::Delete, + Message::Upsert(v) => MessageSerde::Upsert(UpsertFunStateSerde::from_state(v)), + } + } +} + +// For some reason, I don't manage to make that in an impl... +pub(crate) fn list_from_vec<T>(mut v: Vec<T>) -> List<T> { + // We need to reverse + v.reverse(); + let mut l = List::Nil; + for x in v.into_iter() { + l = List::Cons(x, Box::new(l)); + } + l +} + +// For some reason, I don't manage to make that in an impl... +pub(crate) fn list_to_vec<T>(mut l: List<T>) -> Vec<T> { + let mut v = Vec::new(); + loop { + match l { + List::Nil => break, + List::Cons(hd, tl) => { + v.push(hd); + l = *tl; + } + } + } + v +} + +/// See the equivalent function in betree.rs +pub(crate) fn load_internal_node(id: NodeId) -> InternalContent { + // Open the file + std::fs::create_dir_all("tmp").unwrap(); + let filename = format!("tmp/node{}", id).to_string(); + // Read + let f = File::open(filename).unwrap(); + // Serde makes things easy + let c: Vec<(Key, MessageSerde)> = serde_json::from_reader(&f).unwrap(); + let c: Vec<(Key, Message)> = c + .into_iter() + .map(|(key, msg)| (key, msg.to_msg())) + .collect(); + // Convert + list_from_vec(c) +} + +/// See the equivalent function in betree.rs +pub(crate) fn store_internal_node(id: NodeId, content: InternalContent) { + // Open the file + std::fs::create_dir_all("tmp").unwrap(); + let filename = format!("tmp/node{}", id).to_string(); + // Write + let f = File::create(filename).unwrap(); + // Convert + let v: Vec<(Key, Message)> = list_to_vec(content); + let v: Vec<(Key, MessageSerde)> = v + .into_iter() + .map(|(k, msg)| (k, MessageSerde::from_msg(msg))) + .collect(); + // Serde makes things easy + serde_json::to_writer(&f, &v).unwrap(); +} + +/// See the equivalent function in betree.rs +pub(crate) fn load_leaf_node(id: NodeId) -> LeafContent { + // Open the file + std::fs::create_dir_all("tmp").unwrap(); + let filename = format!("tmp/node{}", id).to_string(); + // Read + let f = File::open(filename).unwrap(); + // Serde makes things easy + let c: Vec<(Key, Value)> = serde_json::from_reader(&f).unwrap(); + // Convert + list_from_vec(c) +} + +/// See the equivalent function in betree.rs +pub(crate) fn store_leaf_node(id: NodeId, content: LeafContent) { + // Open the file + std::fs::create_dir_all("tmp").unwrap(); + let filename = format!("tmp/node{}", id).to_string(); + // Write + let f = File::create(filename).unwrap(); + // Convert + let v: Vec<(Key, Value)> = list_to_vec(content); + // Serde makes things easy + serde_json::to_writer(&f, &v).unwrap(); +} diff --git a/tests/src/betree/src/main.rs b/tests/src/betree/src/main.rs new file mode 100644 index 00000000..64e9f7db --- /dev/null +++ b/tests/src/betree/src/main.rs @@ -0,0 +1,4 @@ +mod betree; +mod betree_utils; + +fn main() {} |