(.require [library [lux (.except has revised) [abstract [hash (.only Hash)] [equivalence (.only Equivalence)] [monoid (.only Monoid)] [functor (.only Functor)]] [control ["[0]" maybe] ["[0]" try (.only Try)] ["[0]" exception (.only exception)]] [data ["[0]" product] [collection ["[0]" list (.use "[1]#[0]" mix functor monoid)] ["[0]" array ["[1]" \\unsafe (.only Array)]]]] [macro ["^" pattern]] [math ["[0]" number (.only) ["n" nat] ["[0]" i64]]]]]) ... This implementation of Hash Array Mapped Trie (HAMT) is based on ... Clojure's PersistentHashMap implementation. ... That one is further based on Phil Bagwell's Hash Array Mapped Trie. ... Bitmaps are used to figure out which branches on a #Base node are ... populated. The number of bits that are 1s in a bitmap signal the ... size of the #Base node. (type Bit_Map Nat) ... Represents the position of a node in a Bit_Map. ... It's meant to be a single bit set on a 32-bit word. ... The position of the bit reflects whether an entry in an analogous ... position exists within a #Base, as reflected in its Bit_Map. (type Bit_Position Nat) ... An index into an array. (type Index Nat) ... A hash-code derived from a key during tree-traversal. (type Hash_Code Nat) ... Represents the nesting level of a leaf or node, when looking-it-up ... while exploring the tree. ... Changes in levels are done by right-shifting the hashes of keys by ... the appropriate multiple of the branching-exponent. ... A shift of 0 means root level. ... A shift of (* branching_exponent 1) means level 2. ... A shift of (* branching_exponent N) means level N+1. (type Level Nat) ... Nodes for the tree data-structure that organizes the data inside ... Dictionaries. (type (Node k v) (Variant {#Hierarchy Nat (Array (Node k v))} {#Base Bit_Map (Array (Either (Node k v) [k v]))} {#Collisions Hash_Code (Array [k v])})) ... #Hierarchy nodes are meant to point down only to lower-level nodes. (type (Hierarchy k v) [Nat (Array (Node k v))]) ... #Base nodes may point down to other nodes, but also to leaves, ... which are KV-pairs. (type (Base k v) (Array (Either (Node k v) [k v]))) ... #Collisions are collections of KV-pairs for which the key is ... different on each case, but their hashes are all the same (thus ... causing a collision). (type (Collisions k v) (Array [k v])) ... That bitmap for an empty #Base is 0. ... Which is the same as 0000 0000 0000 0000 0000 0000 0000 0000. ... Or 0x00000000. ... Which is 32 zeroes, since the branching factor is 32. (def clean_bitmap Bit_Map 0) ... Bitmap position (while looking inside #Base nodes) is determined by ... getting 5 bits from a hash of the key being looked up and using ... them as an index into the array inside #Base. ... Since the data-structure can have multiple levels (and the hash has ... more than 5 bits), the binary-representation of the hash is shifted ... by 5 positions on each step (2^5 = 32, which is the branching ... factor). ... The initial shifting level, though, is 0 (which corresponds to the ... shift in the shallowest node on the tree, which is the root node). (def root_level Level 0) ... The exponent to which 2 must be elevated, to reach the branching ... factor of the data-structure. (def branching_exponent Nat 5) ... The threshold on which #Hierarchy nodes are demoted to #Base nodes, ... which is 1/4 of the branching factor (or a left-shift 2). (def demotion_threshold Nat (i64.left_shifted (n.- 2 branching_exponent) 1)) ... The threshold on which #Base nodes are promoted to #Hierarchy nodes, ... which is 1/2 of the branching factor (or a left-shift 1). (def promotion_threshold Nat (i64.left_shifted (n.- 1 branching_exponent) 1)) ... The size of hierarchy-nodes, which is 2^(branching-exponent). (def hierarchy_nodes_size Nat (i64.left_shifted branching_exponent 1)) ... The cannonical empty node, which is just an empty #Base node. (def empty_node Node {#Base clean_bitmap (array.empty 0)}) ... Expands a copy of the array, to have 1 extra slot, which is used ... for storing the value. (def (array#has idx value old_array) (All (_ a) (-> Index a (Array a) (Array a))) (let [old_size (array.size old_array)] (|> (array.empty (++ old_size)) (array.copy! idx 0 old_array 0) (array.has! idx value) (array.copy! (n.- idx old_size) idx old_array (++ idx))))) ... Creates a copy of an array with an index set to a particular value. (def (array#revised idx value array) (All (_ a) (-> Index a (Array a) (Array a))) (|> array array.clone (array.has! idx value))) ... Creates a clone of the array, with an empty position at index. (def (array#clear idx array) (All (_ a) (-> Index (Array a) (Array a))) (|> array array.clone (array.lacks! idx))) ... Shrinks a copy of the array by removing the space at index. (def (array#lacks idx array) (All (_ a) (-> Index (Array a) (Array a))) (let [new_size (-- (array.size array))] (|> (array.empty new_size) (array.copy! idx 0 array 0) (array.copy! (n.- idx new_size) (++ idx) array idx)))) ... Increases the level-shift by the branching-exponent, to explore ... levels further down the tree. (def level_up (-> Level Level) (n.+ branching_exponent)) (def hierarchy_mask Bit_Map (-- hierarchy_nodes_size)) ... Gets the branching-factor sized section of the hash corresponding ... to a particular level, and uses that as an index into the array. (def (level_index level hash) (-> Level Hash_Code Index) (i64.and ..hierarchy_mask (i64.right_shifted level hash))) ... A mechanism to go from indices to bit-positions. (def (to_bit_position index) (-> Index Bit_Position) (i64.left_shifted index 1)) ... The bit-position within a base that a given hash-code would have. (def (level_bit_position level hash) (-> Level Hash_Code Bit_Position) (to_bit_position (level_index level hash))) (def (with_bit_position? bit bitmap) (-> Bit_Position Bit_Map Bit) (|> bitmap (i64.and bit) (n.= clean_bitmap) not)) ... Figures out whether a bitmap only contains a single bit-position. (def only_bit_position? (-> Bit_Position Bit_Map Bit) n.=) (def (with_bit_position bit bitmap) (-> Bit_Position Bit_Map Bit_Map) (i64.or bit bitmap)) (def without_bit_position (-> Bit_Position Bit_Map Bit_Map) i64.xor) ... Figures out the size of a bitmap-indexed array by counting all the ... 1s within the bitmap. (def bitmap_size (-> Bit_Map Nat) i64.ones) ... A mask that, for a given bit position, only allows all the 1s prior ... to it, which would indicate the bitmap-size (and, thus, index) ... associated with it. (def bit_position_mask (-> Bit_Position Bit_Map) --) ... The index on the base array, based on its bit-position. (def (base_index bit_position bitmap) (-> Bit_Position Bit_Map Index) (bitmap_size (i64.and (bit_position_mask bit_position) bitmap))) ... Produces the index of a KV-pair within a #Collisions node. (def (collision_index key_hash key colls) (All (_ k v) (-> (Hash k) k (Collisions k v) (Maybe Index))) (at maybe.monad each product.left (array.example' (function (_ idx [key' val']) (at key_hash = key key')) colls))) ... When #Hierarchy nodes grow too small, they're demoted to #Base ... nodes to save space. (def (demotion except_idx [h_size h_array]) (All (_ k v) (-> Index (Hierarchy k v) [Bit_Map (Base k v)])) (product.right (list#mix (function (_ idx [insertion_idx node]) (let [[bitmap base] node] (if (array.lacks? idx h_array) [insertion_idx node] (if (n.= except_idx idx) [insertion_idx node] [(++ insertion_idx) [(with_bit_position (to_bit_position idx) bitmap) (array.has! insertion_idx {.#Left (array.item idx h_array)} base)]])))) [0 [clean_bitmap (array.empty (-- h_size))]] (list.indices (array.size h_array))))) ... When #Base nodes grow too large, they're promoted to #Hierarchy to ... add some depth to the tree and help keep its balance. (def hierarchy_indices (List Index) (list.indices hierarchy_nodes_size)) (def (promotion node#has key_hash level bitmap base) (All (_ k v) (-> (-> Level Hash_Code k v (Hash k) (Node k v) (Node k v)) (Hash k) Level Bit_Map (Base k v) (Array (Node k v)))) (product.right (list#mix (function (_ hierarchy_idx (^.let default [base_idx h_array])) (if (with_bit_position? (to_bit_position hierarchy_idx) bitmap) [(++ base_idx) (case (array.item base_idx base) {.#Left sub_node} (array.has! hierarchy_idx sub_node h_array) {.#Right [key' val']} (array.has! hierarchy_idx (node#has (level_up level) (at key_hash hash key') key' val' key_hash ..empty_node) h_array))] default)) [0 (array.empty hierarchy_nodes_size)] hierarchy_indices))) ... All empty nodes look the same (a #Base node with clean bitmap is ... used). ... So, this test is introduced to detect them. (def (node#empty? node) (All (_ k v) (-> (Node k v) Bit)) (`` (case node {#Base (~~ (static ..clean_bitmap)) _} #1 _ #0))) (def (node#has level hash key val key_hash node) (All (_ k v) (-> Level Hash_Code k v (Hash k) (Node k v) (Node k v))) (case node ... For #Hierarchy nodes, check whether one can add the element to ... a sub-node. If impossible, introduce a new singleton sub-node. {#Hierarchy _size hierarchy} (let [idx (level_index level hash) [_size' sub_node] (if (not (array.lacks? idx hierarchy)) [_size (array.item idx hierarchy)] [(++ _size) ..empty_node])] {#Hierarchy _size' (array#revised idx (node#has (level_up level) hash key val key_hash sub_node) hierarchy)}) ... For #Base nodes, check if the corresponding Bit_Position has ... already been used. {#Base bitmap base} (let [bit (level_bit_position level hash)] (if (with_bit_position? bit bitmap) ... If so... (let [idx (base_index bit bitmap)] {#Base bitmap (case (array.item idx base) ... If it's being used by a node, add the KV to it. {.#Left sub_node} (let [sub_node' (node#has (level_up level) hash key val key_hash sub_node)] (array#revised idx {.#Left sub_node'} base)) ... Otherwise, if it's being used by a KV, compare the keys. {.#Right key' val'} (array#revised idx (if (at key_hash = key key') ... If the same key is found, replace the value. {.#Right key val} ... Otherwise, compare the hashes of the keys. {.#Left (let [hash' (at key_hash hash key')] (if (n.= hash hash') ... If the hashes are ... the same, a new ... #Collisions node ... is added. {#Collisions hash (|> (array.empty 2) (array.has! 0 [key' val']) (array.has! 1 [key val]))} ... Otherwise, one can ... just keep using ... #Base nodes, so ... add both KV-pairs ... to the empty one. (let [next_level (level_up level)] (|> ..empty_node (node#has next_level hash' key' val' key_hash) (node#has next_level hash key val key_hash)))))}) base))}) ... However, if the Bit_Position has not been used yet, check ... whether this #Base node is ready for a promotion. (let [base_count (bitmap_size bitmap)] (if (n.< ..promotion_threshold base_count) ... If so, resize the #Base node to accommodate the ... new KV-pair. {#Base (with_bit_position bit bitmap) (array#has (base_index bit bitmap) {.#Right [key val]} base)} ... Otherwise, promote it to a #Hierarchy node, and add the new ... KV-pair as a singleton node to it. {#Hierarchy (++ base_count) (let [... TODO: These bindings were established to get around a compilation error. Fix and inline! index (level_index level hash) item (node#has (level_up level) hash key val key_hash ..empty_node) array (promotion node#has key_hash level bitmap base)] (array.has! index item array))})))) ... For #Collisions nodes, compare the hashes. {#Collisions _hash _colls} (if (n.= hash _hash) ... If they're equal, that means the new KV contributes to the ... collisions. (case (collision_index key_hash key _colls) ... If the key was already present in the collisions-list, its ... value gets updated. {.#Some coll_idx} {#Collisions _hash (array#revised coll_idx [key val] _colls)} ... Otherwise, the KV-pair is added to the collisions-list. {.#None} {#Collisions _hash (array#has (array.size _colls) [key val] _colls)}) ... If the hashes are not equal, create a new #Base node that ... contains the old #Collisions node, plus the new KV-pair. (|> {#Base (level_bit_position level _hash) (|> (array.empty 1) (array.has! 0 {.#Left node}))} (node#has level hash key val key_hash))) )) (def (node#lacks level hash key key_hash node) (All (_ k v) (-> Level Hash_Code k (Hash k) (Node k v) (Node k v))) (case node ... For #Hierarchy nodes, find out if there's a valid sub-node for ... the Hash-Code. {#Hierarchy h_size h_array} (let [idx (level_index level hash)] (if (array.lacks? idx h_array) ... If not, there's nothing to remove. node ... But if there is, try to remove the key from the sub-node. (let [sub_node (array.item idx h_array) sub_node' (node#lacks (level_up level) hash key key_hash sub_node)] ... Then check if a removal was actually done. (if (same? sub_node sub_node') ... If not, then there's nothing to change here either. node ... But if the sub_removal yielded an empty sub_node... (if (node#empty? sub_node') ... Check if it's due time for a demotion. (if (n.> demotion_threshold h_size) ... If so, just clear the space. {#Hierarchy (-- h_size) (array#clear idx h_array)} ... Otherwise, perform it. {#Base (demotion idx [h_size h_array])}) ... But if the sub_removal yielded a non_empty node, then ... just update the hiearchy branch. {#Hierarchy h_size (array#revised idx sub_node' h_array)}))))) ... For #Base nodes, check whether the Bit_Position is set. {#Base bitmap base} (let [bit (level_bit_position level hash)] (if (with_bit_position? bit bitmap) (let [idx (base_index bit bitmap)] (case (array.item idx base) ... If set, check if it's a sub_node, and remove the KV ... from it. {.#Left sub_node} (let [sub_node' (node#lacks (level_up level) hash key key_hash sub_node)] ... Verify that it was removed. (if (same? sub_node sub_node') ... If not, there's also nothing to change here. node ... But if it came out empty... (if (node#empty? sub_node') ...at ... figure out whether that's the only position left. (if (only_bit_position? bit bitmap) ... If so, removing it leaves this node empty too. ..empty_node ... But if not, then just unset the position and ... remove the node. {#Base (without_bit_position bit bitmap) (array#lacks idx base)}) ... But, if it did not come out empty, then the ... position is kept, and the node gets updated. {#Base bitmap (array#revised idx {.#Left sub_node'} base)}))) ... If, however, there was a KV-pair instead of a sub-node. {.#Right [key' val']} ... Check if the keys match. (if (at key_hash = key key') ... If so, remove the KV-pair and unset the Bit_Position. {#Base (without_bit_position bit bitmap) (array#lacks idx base)} ... Otherwise, there's nothing to remove. node))) ... If the Bit_Position is not set, there's nothing to remove. node)) ... For #Collisions nodes, It need to find out if the key already existst. {#Collisions _hash _colls} (case (collision_index key_hash key _colls) ... If not, then there's nothing to remove. {.#None} node ... But if so, then check the size of the collisions list. {.#Some idx} (if (n.= 1 (array.size _colls)) ... If there's only one left, then removing it leaves us with ... an empty node. ..empty_node ... Otherwise, just shrink the array by removing the KV-pair. {#Collisions _hash (array#lacks idx _colls)})) )) (def (node#value level hash key key_hash node) (All (_ k v) (-> Level Hash_Code k (Hash k) (Node k v) (Maybe v))) (case node ... For #Hierarchy nodes, just look-up the key on its children. {#Hierarchy _size hierarchy} (let [index (level_index level hash)] (if (array.lacks? index hierarchy) {.#None} (node#value (level_up level) hash key key_hash (array.item index hierarchy)))) ... For #Base nodes, check the leaves, and recursively check the branches. {#Base bitmap base} (let [bit (level_bit_position level hash)] (if (with_bit_position? bit bitmap) (case (array.item (base_index bit bitmap) base) {.#Left sub_node} (node#value (level_up level) hash key key_hash sub_node) {.#Right [key' val']} (if (at key_hash = key key') {.#Some val'} {.#None})) {.#None})) ... For #Collisions nodes, do a linear scan of all the known KV-pairs. {#Collisions _hash _colls} (at maybe.monad each product.right (array.example (|>> product.left (at key_hash = key)) _colls)) )) (def (node#size node) (All (_ k v) (-> (Node k v) Nat)) (case node {#Hierarchy _size hierarchy} (array.mix (function (_ _ item total) (n.+ item total)) 0 (array.each node#size hierarchy)) {#Base _ base} (array.mix (function (_ _ item total) (n.+ item total)) 0 (array.each (function (_ sub_node') (case sub_node' {.#Left sub_node} (node#size sub_node) {.#Right _} 1)) base)) {#Collisions hash colls} (array.size colls) )) (def (node#mix f init node) (All (_ k v a) (-> (-> [k v] a a) a (Node k v) a)) (case node {#Hierarchy _size hierarchy} (array.mix (function (_ _ sub_node current) (node#mix f current sub_node)) init hierarchy) {#Base bitmap base} (array.mix (function (_ _ branch current) (case branch {.#Left sub_node} (node#mix f current sub_node) {.#Right kv} (f kv current))) init base) {#Collisions hash colls} (array.mix (function (_ _ item total) (f item total)) init colls))) (def node#entries (All (_ k v) (-> (Node k v) (List [k v]))) (node#mix (function (_ head tail) {.#Item head tail}) {.#End})) (type .public (Dictionary k v) (Record [#hash (Hash k) #root (Node k v)])) (def .public key_hash (All (_ k v) (-> (Dictionary k v) (Hash k))) (the ..#hash)) (def .public (empty key_hash) (All (_ k v) (-> (Hash k) (Dictionary k v))) [#hash key_hash #root ..empty_node]) (def .public (has key val dict) (All (_ k v) (-> k v (Dictionary k v) (Dictionary k v))) (let [[key_hash node] dict] [key_hash (node#has root_level (at key_hash hash key) key val key_hash node)])) (def .public (lacks key dict) (All (_ k v) (-> k (Dictionary k v) (Dictionary k v))) (let [[key_hash node] dict] [key_hash (node#lacks root_level (at key_hash hash key) key key_hash node)])) (def .public (value key dict) (All (_ k v) (-> k (Dictionary k v) (Maybe v))) (let [[key_hash node] dict] (node#value root_level (at key_hash hash key) key key_hash node))) (def .public (key? dict key) (All (_ k v) (-> (Dictionary k v) k Bit)) (case (value key dict) {.#None} #0 {.#Some _} #1)) (exception .public key_already_exists) (def .public (has' key val dict) (All (_ k v) (-> k v (Dictionary k v) (Try (Dictionary k v)))) (case (value key dict) {.#None} {try.#Success (has key val dict)} {.#Some _} (exception.except ..key_already_exists []))) (def .public (revised key f dict) (All (_ k v) (-> k (-> v v) (Dictionary k v) (Dictionary k v))) (case (value key dict) {.#None} dict {.#Some val} (has key (f val) dict))) (def .public (revised' key default f dict) (All (_ k v) (-> k v (-> v v) (Dictionary k v) (Dictionary k v))) (..has key (f (maybe.else default (..value key dict))) dict)) (def .public size (All (_ k v) (-> (Dictionary k v) Nat)) (|>> (the #root) ..node#size)) (def .public empty? (All (_ k v) (-> (Dictionary k v) Bit)) (|>> size (n.= 0))) (def .public entries (All (_ k v) (-> (Dictionary k v) (List [k v]))) (|>> (the #root) ..node#entries)) (def .public (of_list key_hash kvs) (All (_ k v) (-> (Hash k) (List [k v]) (Dictionary k v))) (list#mix (function (_ [k v] dict) (..has k v dict)) (empty key_hash) kvs)) (with_template [ ] [(def .public (All (_ k v) (-> (Dictionary k v) (List ))) (|>> (the #root) (node#mix (function (_ [k v] bundle) {.#Item bundle}) {.#End})))] [k keys] [v values] ) (def .public (composite dict2 dict1) (All (_ k v) (-> (Dictionary k v) (Dictionary k v) (Dictionary k v))) (node#mix (function (_ [key val] dict) (has key val dict)) dict1 (the #root dict2))) (def .public (composite_with f dict2 dict1) (All (_ k v) (-> (-> v v v) (Dictionary k v) (Dictionary k v) (Dictionary k v))) (node#mix (function (_ [key val2] dict) (case (value key dict) {.#None} (has key val2 dict) {.#Some val1} (has key (f val2 val1) dict))) dict1 (the #root dict2))) (def .public (re_bound from_key to_key dict) (All (_ k v) (-> k k (Dictionary k v) (Dictionary k v))) (case (value from_key dict) {.#None} dict {.#Some val} (|> dict (lacks from_key) (has to_key val)))) (def .public (sub keys dict) (All (_ k v) (-> (List k) (Dictionary k v) (Dictionary k v))) (let [[key_hash _] dict] (list#mix (function (_ key new_dict) (case (value key dict) {.#None} new_dict {.#Some val} (has key val new_dict))) (empty key_hash) keys))) (def .public (equivalence (open ",#[0]")) (All (_ k v) (-> (Equivalence v) (Equivalence (Dictionary k v)))) (implementation (def (= reference subject) (and (n.= (..size reference) (..size subject)) (list.every? (function (_ [k rv]) (case (..value k subject) {.#Some sv} (,#= rv sv) _ #0)) (..entries reference)))))) (def node_functor (All (_ k) (Functor (Node k))) (implementation (def (each f fa) (case fa {#Hierarchy size hierarchy} {#Hierarchy size (array.each (each f) hierarchy)} {#Base bitmap base} {#Base bitmap (array.each (function (_ either) (case either {.#Left fa'} {.#Left (each f fa')} {.#Right [k v]} {.#Right [k (f v)]})) base)} {#Collisions hash collisions} {#Collisions hash (array.each (function (_ [k v]) [k (f v)]) collisions)})))) (def .public functor (All (_ k) (Functor (Dictionary k))) (implementation (def (each f fa) (.revised #root (at ..node_functor each f) fa)))) (def .public (monoid hash) (All (_ k v) (-> (Hash k) (Monoid (Dictionary k v)))) (implementation (def identity (..empty hash)) (def composite ..composite)))