From a8e696f62f14296b94964adb1946d7e2b5ef5ebd Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Fri, 10 May 2019 19:49:38 +0200 Subject: Write a custom map type that allows duplicates --- Cargo.lock | 8 ++ dhall/src/phase/binary.rs | 67 ++++++++------- dhall_proc_macros/src/quote.rs | 13 ++- dhall_syntax/Cargo.toml | 2 + dhall_syntax/src/core/expr.rs | 9 +- dhall_syntax/src/core/map.rs | 178 +++++++++++++++++++++++++++++++++++++++ dhall_syntax/src/core/mod.rs | 1 + dhall_syntax/src/core/visitor.rs | 29 ++++--- dhall_syntax/src/parser.rs | 37 +++----- 9 files changed, 269 insertions(+), 75 deletions(-) create mode 100644 dhall_syntax/src/core/map.rs diff --git a/Cargo.lock b/Cargo.lock index 53e019b..e837a27 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -106,10 +106,12 @@ name = "dhall_syntax" version = "0.1.0" dependencies = [ "dhall_generated_parser 0.1.0", + "either 1.5.2 (registry+https://github.com/rust-lang/crates.io-index)", "improved_slice_patterns 2.0.0", "itertools 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", "percent-encoding 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "pest 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "take_mut 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -328,6 +330,11 @@ dependencies = [ "unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "take_mut" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" + [[package]] name = "term" version = "0.4.6" @@ -453,6 +460,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum serde_derive 1.0.90 (registry+https://github.com/rust-lang/crates.io-index)" = "58fc82bec244f168b23d1963b45c8bf5726e9a15a9d146a067f9081aeed2de79" "checksum sha-1 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "51b9d1f3b5de8a167ab06834a7c883bd197f2191e1dda1a22d9ccfeedbf9aded" "checksum syn 0.15.31 (registry+https://github.com/rust-lang/crates.io-index)" = "d2b4cfac95805274c6afdb12d8f770fa2d27c045953e7b630a81801953699a9a" +"checksum take_mut 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "f764005d11ee5f36500a149ace24e00e3da98b0158b3e2d53a7495660d3f4d60" "checksum term 0.4.6 (registry+https://github.com/rust-lang/crates.io-index)" = "fa63644f74ce96fbeb9b794f66aff2a52d601cbd5e80f4b97123e3899f4570f1" "checksum term-painter 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)" = "dcaa948f0e3e38470cd8dc8dcfe561a75c9e43f28075bb183845be2b9b3c08cf" "checksum typed-arena 1.4.1 (registry+https://github.com/rust-lang/crates.io-index)" = "c6c06a92aef38bb4dc5b0df00d68496fc31307c5344c867bb61678c6e1671ec5" diff --git a/dhall/src/phase/binary.rs b/dhall/src/phase/binary.rs index 5110241..7f72e80 100644 --- a/dhall/src/phase/binary.rs +++ b/dhall/src/phase/binary.rs @@ -1,5 +1,6 @@ use itertools::Itertools; use serde_cbor::value::value as cbor; +use std::iter::FromIterator; use dhall_syntax::{ rc, ExprF, FilePrefix, Hash, Import, ImportHashed, ImportLocation, @@ -131,11 +132,11 @@ fn cbor_value_to_dhall(data: &cbor::Value) -> Result { Merge(x, y, Some(z)) } [U64(7), Object(map)] => { - let map = cbor_map_to_dhall_map(map.iter())?; + let map = cbor_map_to_dhall_map(map)?; RecordType(map) } [U64(8), Object(map)] => { - let map = cbor_map_to_dhall_map(map.iter())?; + let map = cbor_map_to_dhall_map(map)?; RecordLit(map) } [U64(9), x, String(l)] => { @@ -144,11 +145,11 @@ fn cbor_value_to_dhall(data: &cbor::Value) -> Result { Field(x, l) } [U64(11), Object(map)] => { - let map = cbor_map_to_dhall_opt_map(map.iter())?; + let map = cbor_map_to_dhall_opt_map(map)?; UnionType(map) } [U64(12), String(l), x, Object(map)] => { - let map = cbor_map_to_dhall_opt_map(map.iter())?; + let map = cbor_map_to_dhall_opt_map(map)?; let x = cbor_value_to_dhall(&x)?; let l = Label::from(l.as_str()); UnionLit(l, x, map) @@ -331,31 +332,39 @@ fn cbor_value_to_dhall(data: &cbor::Value) -> Result { })) } -fn cbor_map_to_dhall_map<'a>( - map: impl Iterator, -) -> Result, DecodeError> { - map.map(|(k, v)| -> Result<(_, _), _> { - let k = k.as_string().ok_or_else(|| { - DecodeError::WrongFormatError("map/key".to_owned()) - })?; - let v = cbor_value_to_dhall(v)?; - Ok((Label::from(k.as_ref()), v)) - }) - .collect::>() +fn cbor_map_to_dhall_map<'a, T>( + map: impl IntoIterator, +) -> Result +where + T: FromIterator<(Label, ParsedExpr)>, +{ + map.into_iter() + .map(|(k, v)| -> Result<(_, _), _> { + let k = k.as_string().ok_or_else(|| { + DecodeError::WrongFormatError("map/key".to_owned()) + })?; + let v = cbor_value_to_dhall(v)?; + Ok((Label::from(k.as_ref()), v)) + }) + .collect::>() } -fn cbor_map_to_dhall_opt_map<'a>( - map: impl Iterator, -) -> Result)>, DecodeError> { - map.map(|(k, v)| -> Result<(_, _), _> { - let k = k.as_string().ok_or_else(|| { - DecodeError::WrongFormatError("map/key".to_owned()) - })?; - let v = match v { - cbor::Value::Null => None, - _ => Some(cbor_value_to_dhall(v)?), - }; - Ok((Label::from(k.as_ref()), v)) - }) - .collect::>() +fn cbor_map_to_dhall_opt_map<'a, T>( + map: impl IntoIterator, +) -> Result +where + T: FromIterator<(Label, Option)>, +{ + map.into_iter() + .map(|(k, v)| -> Result<(_, _), _> { + let k = k.as_string().ok_or_else(|| { + DecodeError::WrongFormatError("map/key".to_owned()) + })?; + let v = match v { + cbor::Value::Null => None, + _ => Some(cbor_value_to_dhall(v)?), + }; + Ok((Label::from(k.as_ref()), v)) + }) + .collect::>() } diff --git a/dhall_proc_macros/src/quote.rs b/dhall_proc_macros/src/quote.rs index 00bcd45..3dbfba9 100644 --- a/dhall_proc_macros/src/quote.rs +++ b/dhall_proc_macros/src/quote.rs @@ -161,15 +161,15 @@ fn quote_expr(expr: &Expr, ctx: &Context) -> TokenStream { } fn quote_builtin(b: Builtin) -> TokenStream { - format!("dhall_syntax::Builtin::{:?}", b).parse().unwrap() + format!("::dhall_syntax::Builtin::{:?}", b).parse().unwrap() } fn quote_const(c: Const) -> TokenStream { - format!("dhall_syntax::Const::{:?}", c).parse().unwrap() + format!("::dhall_syntax::Const::{:?}", c).parse().unwrap() } fn quote_binop(b: BinOp) -> TokenStream { - format!("dhall_syntax::BinOp::{:?}", b).parse().unwrap() + format!("::dhall_syntax::BinOp::{:?}", b).parse().unwrap() } fn quote_label(l: &Label) -> TokenStream { @@ -178,7 +178,7 @@ fn quote_label(l: &Label) -> TokenStream { } fn rc(x: TokenStream) -> TokenStream { - quote! { dhall_syntax::rc(#x) } + quote! { ::dhall_syntax::rc(#x) } } fn quote_opt(x: Option) -> TokenStream @@ -204,11 +204,10 @@ where { let entries = m.into_iter().map(|(k, v)| { let k = quote_label(&k); - quote!(m.push((#k, #v));) + quote!(m.insert(#k, #v);) }); quote! { { - use std::vec::Vec; - let mut m = Vec::new(); + let mut m = ::dhall_syntax::map::DupTreeMap::new(); #( #entries )* m } } diff --git a/dhall_syntax/Cargo.toml b/dhall_syntax/Cargo.toml index 3e32930..b25a0fe 100644 --- a/dhall_syntax/Cargo.toml +++ b/dhall_syntax/Cargo.toml @@ -12,5 +12,7 @@ doctest = false itertools = "0.8.0" percent-encoding = "1.0.1" pest = "2.1" +either = "1.5.2" +take_mut = "0.2.2" dhall_generated_parser = { path = "../dhall_generated_parser" } improved_slice_patterns = { version = "2.0.0", path = "../improved_slice_patterns" } diff --git a/dhall_syntax/src/core/expr.rs b/dhall_syntax/src/core/expr.rs index c509bae..f8b0215 100644 --- a/dhall_syntax/src/core/expr.rs +++ b/dhall_syntax/src/core/expr.rs @@ -1,6 +1,7 @@ #![allow(non_snake_case)] use std::rc::Rc; +use crate::map::DupTreeMap; use crate::visitor; use crate::*; @@ -201,13 +202,13 @@ pub enum ExprF { /// `Some e` SomeLit(SubExpr), /// `{ k1 : t1, k2 : t1 }` - RecordType(Vec<(Label, SubExpr)>), + RecordType(DupTreeMap), /// `{ k1 = v1, k2 = v2 }` - RecordLit(Vec<(Label, SubExpr)>), + RecordLit(DupTreeMap), /// `< k1 : t1, k2 >` - UnionType(Vec<(Label, Option)>), + UnionType(DupTreeMap>), /// `< k1 = t1, k2 : t2, k3 >` - UnionLit(Label, SubExpr, Vec<(Label, Option)>), + UnionLit(Label, SubExpr, DupTreeMap>), /// `merge x y : t` Merge(SubExpr, SubExpr, Option), /// `e.x` diff --git a/dhall_syntax/src/core/map.rs b/dhall_syntax/src/core/map.rs new file mode 100644 index 0000000..c76ac34 --- /dev/null +++ b/dhall_syntax/src/core/map.rs @@ -0,0 +1,178 @@ +/// A sorted map that allows multiple values for each key. +pub use dup_tree_map::DupTreeMap; + +mod one_or_more { + use either::Either; + use std::{iter, slice, vec}; + + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub enum OneOrMore { + One(T), + More(Vec), + } + + pub type Iter<'a, T> = Either, iter::Once<&'a T>>; + pub type IntoIter = Either, iter::Once>; + + impl OneOrMore { + pub fn new(x: T) -> Self { + OneOrMore::One(x) + } + + pub fn push(&mut self, x: T) { + take_mut::take(self, |sef| match sef { + OneOrMore::More(mut vec) => { + vec.push(x); + OneOrMore::More(vec) + } + OneOrMore::One(one) => OneOrMore::More(vec![one, x]), + }) + } + + pub fn iter(&self) -> Iter<'_, T> { + match self { + OneOrMore::More(vec) => Either::Left(vec.iter()), + OneOrMore::One(x) => Either::Right(iter::once(x)), + } + } + } + + impl IntoIterator for OneOrMore { + type Item = T; + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + match self { + OneOrMore::More(vec) => Either::Left(vec.into_iter()), + OneOrMore::One(x) => Either::Right(iter::once(x)), + } + } + } +} + +mod dup_tree_map { + use super::one_or_more; + use super::one_or_more::OneOrMore; + use std::collections::{btree_map, BTreeMap}; + use std::iter; + + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct DupTreeMap { + map: BTreeMap>, + size: usize, + } + + pub type IterInternal<'a, K, V> = + iter::Zip, one_or_more::Iter<'a, V>>; + pub type Iter<'a, K, V> = iter::FlatMap< + btree_map::Iter<'a, K, OneOrMore>, + IterInternal<'a, K, V>, + for<'b> fn((&'b K, &'b OneOrMore)) -> IterInternal<'b, K, V>, + >; + pub type IntoIterInternal = + iter::Zip, one_or_more::IntoIter>; + pub type IntoIter = iter::FlatMap< + btree_map::IntoIter>, + IntoIterInternal, + fn((K, OneOrMore)) -> IntoIterInternal, + >; + + impl DupTreeMap { + pub fn new() -> Self + where + K: Ord, + { + DupTreeMap { + map: BTreeMap::new(), + size: 0, + } + } + + pub fn insert(&mut self, key: K, value: V) + where + K: Ord, + { + use std::collections::btree_map::Entry; + match self.map.entry(key) { + Entry::Vacant(e) => { + e.insert(OneOrMore::new(value)); + } + Entry::Occupied(mut e) => e.get_mut().push(value), + } + } + + pub fn len(&self) -> usize { + self.size + } + pub fn is_empty(&self) -> bool { + self.size == 0 + } + + pub fn iter(&self) -> Iter<'_, K, V> + where + K: Ord, + { + fn foo<'a, K, V>( + (k, oom): (&'a K, &'a OneOrMore), + ) -> IterInternal<'a, K, V> { + iter::repeat(k).zip(oom.iter()) + } + self.map.iter().flat_map(foo) + } + } + + impl Default for DupTreeMap + where + K: Ord, + { + fn default() -> Self { + Self::new() + } + } + + impl IntoIterator for DupTreeMap + where + K: Ord + Clone, + { + type Item = (K, V); + type IntoIter = IntoIter; + + fn into_iter(self) -> Self::IntoIter { + fn foo((k, oom): (K, OneOrMore)) -> IntoIterInternal + where + K: Clone, + { + iter::repeat(k).zip(oom.into_iter()) + } + self.map.into_iter().flat_map(foo) + } + } + + impl<'a, K, V> IntoIterator for &'a DupTreeMap + where + K: Ord, + { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } + } + + impl iter::FromIterator<(K, V)> for DupTreeMap + where + K: Ord, + { + fn from_iter(iter: T) -> Self + where + T: IntoIterator, + { + let mut map = DupTreeMap::new(); + for (k, v) in iter { + map.insert(k, v); + } + map + } + } +} diff --git a/dhall_syntax/src/core/mod.rs b/dhall_syntax/src/core/mod.rs index 7762479..fe2c0be 100644 --- a/dhall_syntax/src/core/mod.rs +++ b/dhall_syntax/src/core/mod.rs @@ -7,4 +7,5 @@ pub use label::*; mod text; pub use text::*; pub mod context; +pub mod map; pub mod visitor; diff --git a/dhall_syntax/src/core/visitor.rs b/dhall_syntax/src/core/visitor.rs index dd539e3..99a9c11 100644 --- a/dhall_syntax/src/core/visitor.rs +++ b/dhall_syntax/src/core/visitor.rs @@ -1,4 +1,5 @@ use crate::*; +use std::iter::FromIterator; /// A way too generic Visitor trait. pub trait GenericVisitor: Sized { @@ -62,25 +63,29 @@ where None => None, }) } - fn vecmap<'a, V, Ret, SE, E>( - x: &'a Vec<(Label, SE)>, + fn dupmap<'a, V, Ret, SE, E, T>( + x: impl IntoIterator, mut v: V, - ) -> Result, V::Error> + ) -> Result where + SE: 'a, + T: FromIterator<(Label, V::SE2)>, V: ExprFVeryGenericVisitor<'a, Ret, SE, E>, { - x.iter() + x.into_iter() .map(|(k, x)| Ok((k.clone(), v.visit_subexpr(x)?))) .collect() } - fn vecoptmap<'a, V, Ret, SE, E>( - x: &'a Vec<(Label, Option)>, + fn optdupmap<'a, V, Ret, SE, E, T>( + x: impl IntoIterator)>, mut v: V, - ) -> Result)>, V::Error> + ) -> Result where + SE: 'a, + T: FromIterator<(Label, Option)>, V: ExprFVeryGenericVisitor<'a, Ret, SE, E>, { - x.iter() + x.into_iter() .map(|(k, x)| { Ok(( k.clone(), @@ -137,11 +142,11 @@ where v.visit_subexpr(t)?, ), SomeLit(e) => SomeLit(v.visit_subexpr(e)?), - RecordType(kts) => RecordType(vecmap(kts, v)?), - RecordLit(kvs) => RecordLit(vecmap(kvs, v)?), - UnionType(kts) => UnionType(vecoptmap(kts, v)?), + RecordType(kts) => RecordType(dupmap(kts, v)?), + RecordLit(kvs) => RecordLit(dupmap(kvs, v)?), + UnionType(kts) => UnionType(optdupmap(kts, v)?), UnionLit(k, x, kts) => { - UnionLit(k.clone(), v.visit_subexpr(x)?, vecoptmap(kts, v)?) + UnionLit(k.clone(), v.visit_subexpr(x)?, optdupmap(kts, v)?) } Merge(x, y, t) => Merge( v.visit_subexpr(x)?, diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs index 276510e..c847b29 100644 --- a/dhall_syntax/src/parser.rs +++ b/dhall_syntax/src/parser.rs @@ -7,6 +7,7 @@ use std::rc::Rc; use dhall_generated_parser::{DhallParser, Rule}; +use crate::map::DupTreeMap; use crate::ExprF::*; use crate::*; @@ -886,33 +887,29 @@ make_parser! { )); rule!(empty_record_literal as expression; span; - captured_str!(_) => spanned(span, RecordLit(Vec::new())) + captured_str!(_) => spanned(span, RecordLit(Default::default())) ); rule!(empty_record_type as expression; span; - captured_str!(_) => spanned(span, RecordType(Vec::new())) + captured_str!(_) => spanned(span, RecordType(Default::default())) ); rule!(non_empty_record_type_or_literal as expression; span; children!( [label(first_label), non_empty_record_type(rest)] => { let (first_expr, mut map) = rest; - map.push((first_label, first_expr)); - // Sort until we stop using binary decode for parser tests - map.sort_by(|(l1, _), (l2, _)| l1.cmp(l2)); + map.insert(first_label, first_expr); spanned(span, RecordType(map)) }, [label(first_label), non_empty_record_literal(rest)] => { let (first_expr, mut map) = rest; - map.push((first_label, first_expr)); - // Sort until we stop using binary decode for parser tests - map.sort_by(|(l1, _), (l2, _)| l1.cmp(l2)); + map.insert(first_label, first_expr); spanned(span, RecordLit(map)) }, )); rule!(non_empty_record_type - <(ParsedSubExpr, Vec<(Label, ParsedSubExpr)>)>; children!( + <(ParsedSubExpr, DupTreeMap)>; children!( [expression(expr), record_type_entry(entries)..] => { (expr, entries.collect()) } @@ -923,7 +920,7 @@ make_parser! { )); rule!(non_empty_record_literal - <(ParsedSubExpr, Vec<(Label, ParsedSubExpr)>)>; children!( + <(ParsedSubExpr, DupTreeMap)>; children!( [expression(expr), record_literal_entry(entries)..] => { (expr, entries.collect()) } @@ -935,18 +932,12 @@ make_parser! { rule!(union_type_or_literal as expression; span; children!( [empty_union_type(_)] => { - spanned(span, UnionType(Vec::new())) + spanned(span, UnionType(Default::default())) }, [non_empty_union_type_or_literal((Some((l, e)), entries))] => { - let mut entries = entries; - // Sort until we stop using binary decode for parser tests - entries.sort_by(|(l1, _), (l2, _)| l1.cmp(l2)); spanned(span, UnionLit(l, e, entries)) }, [non_empty_union_type_or_literal((None, entries))] => { - let mut entries = entries; - // Sort until we stop using binary decode for parser tests - entries.sort_by(|(l1, _), (l2, _)| l1.cmp(l2)); spanned(span, UnionType(entries)) }, )); @@ -955,20 +946,20 @@ make_parser! { rule!(non_empty_union_type_or_literal <(Option<(Label, ParsedSubExpr)>, - Vec<(Label, Option)>)>; + DupTreeMap>)>; children!( [label(l), union_literal_variant_value((e, entries))] => { (Some((l, e)), entries) }, [label(l), union_type_or_literal_variant_type((e, rest))] => { let (x, mut entries) = rest; - entries.push((l, e)); + entries.insert(l, e); (x, entries) }, )); rule!(union_literal_variant_value - <(ParsedSubExpr, Vec<(Label, Option)>)>; + <(ParsedSubExpr, DupTreeMap>)>; children!( [expression(e), union_type_entry(entries)..] => { (e, entries.collect()) @@ -984,19 +975,19 @@ make_parser! { rule!(union_type_or_literal_variant_type <(Option, (Option<(Label, ParsedSubExpr)>, - Vec<(Label, Option)>))>; + DupTreeMap>))>; children!( [expression(e), non_empty_union_type_or_literal(rest)] => { (Some(e), rest) }, [expression(e)] => { - (Some(e), (None, Vec::new())) + (Some(e), (None, Default::default())) }, [non_empty_union_type_or_literal(rest)] => { (None, rest) }, [] => { - (None, (None, Vec::new())) + (None, (None, Default::default())) }, )); -- cgit v1.2.3