diff options
author | Nadrieril | 2019-12-15 20:10:54 +0000 |
---|---|---|
committer | Nadrieril | 2019-12-15 20:10:54 +0000 |
commit | 78e9e32e1357d50313287dd2a3c437132c83aeb6 (patch) | |
tree | 0e7f6172490241f4e413102a6bf7677f2a6d27c1 /dhall/src/syntax | |
parent | b11a2cd6ca50d5a4dfa71ae8cd0642fb1c75e1cf (diff) |
Move contents of dhall_syntax to dhall
Diffstat (limited to 'dhall/src/syntax')
-rw-r--r-- | dhall/src/syntax/core/context.rs | 80 | ||||
-rw-r--r-- | dhall/src/syntax/core/expr.rs | 377 | ||||
-rw-r--r-- | dhall/src/syntax/core/import.rs | 130 | ||||
-rw-r--r-- | dhall/src/syntax/core/label.rs | 34 | ||||
-rw-r--r-- | dhall/src/syntax/core/map.rs | 394 | ||||
-rw-r--r-- | dhall/src/syntax/core/mod.rs | 13 | ||||
-rw-r--r-- | dhall/src/syntax/core/span.rs | 81 | ||||
-rw-r--r-- | dhall/src/syntax/core/text.rs | 181 | ||||
-rw-r--r-- | dhall/src/syntax/core/visitor.rs | 360 | ||||
-rw-r--r-- | dhall/src/syntax/mod.rs | 15 | ||||
-rw-r--r-- | dhall/src/syntax/parser.rs | 942 | ||||
-rw-r--r-- | dhall/src/syntax/printer.rs | 500 |
12 files changed, 3107 insertions, 0 deletions
diff --git a/dhall/src/syntax/core/context.rs b/dhall/src/syntax/core/context.rs new file mode 100644 index 0000000..6844baa --- /dev/null +++ b/dhall/src/syntax/core/context.rs @@ -0,0 +1,80 @@ +use std::cmp::Eq; +use std::collections::HashMap; +use std::hash::Hash; + +/// A `(Context a)` associates `Text` labels with values of type `a` +/// +/// The `Context` is used for type-checking when `(a = Expr)` +/// +/// * You create a `Context` using `empty` and `insert` +/// * You transform a `Context` using `fmap` +/// * You consume a `Context` using `lookup` and `toList` +/// +/// The difference between a `Context` and a `Map` is that a `Context` lets you +/// have multiple ordered occurrences of the same key and you can query for the +/// `n`th occurrence of a given key. +/// +#[derive(Debug, Clone)] +pub struct Context<K: Eq + Hash, T>(HashMap<K, Vec<T>>); + +impl<K: Hash + Eq + Clone, T> Context<K, T> { + /// An empty context with no key-value pairs + pub fn new() -> Self { + Context(HashMap::new()) + } + + /// Look up a key by name and index + /// + /// ```c + /// lookup _ _ empty = Nothing + /// lookup k 0 (insert k v c) = Just v + /// lookup k n (insert k v c) = lookup k (n - 1) c -- 1 <= n + /// lookup k n (insert j v c) = lookup k n c -- k /= j + /// ``` + pub fn lookup<'a>(&'a self, k: &K, n: usize) -> Option<&'a T> { + self.0.get(k).and_then(|v| { + if n < v.len() { + v.get(v.len() - 1 - n) + } else { + None + } + }) + } + + pub fn map<U, F: Fn(&K, &T) -> U>(&self, f: F) -> Context<K, U> { + Context( + self.0 + .iter() + .map(|(k, vs)| { + ((*k).clone(), vs.iter().map(|v| f(k, v)).collect()) + }) + .collect(), + ) + } + + pub fn lookup_all<'a>(&'a self, k: &K) -> impl Iterator<Item = &T> { + self.0.get(k).into_iter().flat_map(|v| v.iter()) + } + + pub fn iter(&self) -> impl Iterator<Item = (&K, &T)> { + self.0 + .iter() + .flat_map(|(k, vs)| vs.iter().map(move |v| (k, v))) + } + + pub fn iter_keys(&self) -> impl Iterator<Item = (&K, &Vec<T>)> { + self.0.iter() + } +} + +impl<K: Hash + Eq + Clone, T: Clone> Context<K, T> { + /// Add a key-value pair to the `Context` + pub fn insert(&self, k: K, v: T) -> Self { + let mut ctx = (*self).clone(); + { + let m = ctx.0.entry(k).or_insert_with(Vec::new); + m.push(v); + } + ctx + } +} diff --git a/dhall/src/syntax/core/expr.rs b/dhall/src/syntax/core/expr.rs new file mode 100644 index 0000000..7972283 --- /dev/null +++ b/dhall/src/syntax/core/expr.rs @@ -0,0 +1,377 @@ +use crate::syntax::map::{DupTreeMap, DupTreeSet}; +use crate::syntax::visitor::{self, ExprFMutVisitor, ExprFVisitor}; +use crate::syntax::*; + +pub type Integer = isize; +pub type Natural = usize; +pub type Double = NaiveDouble; + +pub fn trivial_result<T>(x: Result<T, !>) -> T { + match x { + Ok(x) => x, + Err(e) => e, + } +} + +/// Double with bitwise equality +#[derive(Debug, Copy, Clone)] +pub struct NaiveDouble(f64); + +impl PartialEq for NaiveDouble { + fn eq(&self, other: &Self) -> bool { + self.0.to_bits() == other.0.to_bits() + } +} + +impl Eq for NaiveDouble {} + +impl std::hash::Hash for NaiveDouble { + fn hash<H>(&self, state: &mut H) + where + H: std::hash::Hasher, + { + self.0.to_bits().hash(state) + } +} + +impl From<f64> for NaiveDouble { + fn from(x: f64) -> Self { + NaiveDouble(x) + } +} + +impl From<NaiveDouble> for f64 { + fn from(x: NaiveDouble) -> f64 { + x.0 + } +} + +/// Constants for a pure type system +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Const { + Type, + Kind, + Sort, +} + +/// Bound variable +/// +/// The `Label` field is the variable's name (i.e. \"`x`\"). +/// The `Int` field is a DeBruijn index. +/// See dhall-lang/standard/semantics.md for details +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct V<Label>(pub Label, pub usize); + +// This is only for the specific `Label` type, not generic +impl From<Label> for V<Label> { + fn from(x: Label) -> V<Label> { + V(x, 0) + } +} +impl<'a> From<&'a Label> for V<Label> { + fn from(x: &'a Label) -> V<Label> { + V(x.clone(), 0) + } +} + +// Definition order must match precedence order for +// pretty-printing to work correctly +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BinOp { + /// `x ? y` + ImportAlt, + /// `x || y` + BoolOr, + /// `x + y` + NaturalPlus, + /// `x ++ y` + TextAppend, + /// `x # y` + ListAppend, + /// `x && y` + BoolAnd, + /// `x ∧ y` + RecursiveRecordMerge, + /// `x ⫽ y` + RightBiasedRecordMerge, + /// `x ⩓ y` + RecursiveRecordTypeMerge, + /// `x * y` + NaturalTimes, + /// `x == y` + BoolEQ, + /// `x != y` + BoolNE, + /// x === y + Equivalence, +} + +/// Built-ins +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum Builtin { + Bool, + Natural, + Integer, + Double, + Text, + List, + Optional, + OptionalNone, + NaturalBuild, + NaturalFold, + NaturalIsZero, + NaturalEven, + NaturalOdd, + NaturalToInteger, + NaturalShow, + NaturalSubtract, + IntegerToDouble, + IntegerShow, + DoubleShow, + ListBuild, + ListFold, + ListLength, + ListHead, + ListLast, + ListIndexed, + ListReverse, + OptionalFold, + OptionalBuild, + TextShow, +} + +// Each node carries an annotation. +#[derive(Debug, Clone)] +pub struct Expr<Embed>(Box<(RawExpr<Embed>, Span)>); + +pub type RawExpr<Embed> = ExprF<Expr<Embed>, Embed>; + +impl<Embed: PartialEq> std::cmp::PartialEq for Expr<Embed> { + fn eq(&self, other: &Self) -> bool { + self.0.as_ref().0 == other.0.as_ref().0 + } +} + +impl<Embed: Eq> std::cmp::Eq for Expr<Embed> {} + +impl<Embed: std::hash::Hash> std::hash::Hash for Expr<Embed> { + fn hash<H>(&self, state: &mut H) + where + H: std::hash::Hasher, + { + (self.0).0.hash(state) + } +} + +/// Syntax tree for expressions +// Having the recursion out of the enum definition enables writing +// much more generic code and improves pattern-matching behind +// smart pointers. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ExprF<SubExpr, Embed> { + Const(Const), + /// `x` + /// `x@n` + Var(V<Label>), + /// `λ(x : A) -> b` + Lam(Label, SubExpr, SubExpr), + /// `A -> B` + /// `∀(x : A) -> B` + Pi(Label, SubExpr, SubExpr), + /// `f a` + App(SubExpr, SubExpr), + /// `let x = r in e` + /// `let x : t = r in e` + Let(Label, Option<SubExpr>, SubExpr, SubExpr), + /// `x : t` + Annot(SubExpr, SubExpr), + /// `assert : t` + Assert(SubExpr), + /// Built-in values + Builtin(Builtin), + // Binary operations + BinOp(BinOp, SubExpr, SubExpr), + /// `True` + BoolLit(bool), + /// `if x then y else z` + BoolIf(SubExpr, SubExpr, SubExpr), + /// `1` + NaturalLit(Natural), + /// `+2` + IntegerLit(Integer), + /// `3.24` + DoubleLit(Double), + /// `"Some ${interpolated} text"` + TextLit(InterpolatedText<SubExpr>), + /// `[] : t` + EmptyListLit(SubExpr), + /// `[x, y, z]` + NEListLit(Vec<SubExpr>), + /// `Some e` + SomeLit(SubExpr), + /// `{ k1 : t1, k2 : t1 }` + RecordType(DupTreeMap<Label, SubExpr>), + /// `{ k1 = v1, k2 = v2 }` + RecordLit(DupTreeMap<Label, SubExpr>), + /// `< k1 : t1, k2 >` + UnionType(DupTreeMap<Label, Option<SubExpr>>), + /// `merge x y : t` + Merge(SubExpr, SubExpr, Option<SubExpr>), + /// `toMap x : t` + ToMap(SubExpr, Option<SubExpr>), + /// `e.x` + Field(SubExpr, Label), + /// `e.{ x, y, z }` + Projection(SubExpr, DupTreeSet<Label>), + /// `e.(t)` + ProjectionByExpr(SubExpr, SubExpr), + /// `./some/path` + Import(Import<SubExpr>), + /// Embeds the result of resolving an import + Embed(Embed), +} + +impl<SE, E> ExprF<SE, E> { + pub fn traverse_ref_with_special_handling_of_binders<'a, SE2, Err>( + &'a self, + visit_subexpr: impl FnMut(&'a SE) -> Result<SE2, Err>, + visit_under_binder: impl FnOnce(&'a Label, &'a SE) -> Result<SE2, Err>, + ) -> Result<ExprF<SE2, E>, Err> + where + E: Clone, + { + visitor::TraverseRefWithBindersVisitor { + visit_subexpr, + visit_under_binder, + } + .visit(self) + } + + fn traverse_ref<'a, SE2, Err>( + &'a self, + visit_subexpr: impl FnMut(&'a SE) -> Result<SE2, Err>, + ) -> Result<ExprF<SE2, E>, Err> + where + E: Clone, + { + visitor::TraverseRefVisitor { visit_subexpr }.visit(self) + } + + fn traverse_mut<'a, Err>( + &'a mut self, + visit_subexpr: impl FnMut(&'a mut SE) -> Result<(), Err>, + ) -> Result<(), Err> { + visitor::TraverseMutVisitor { visit_subexpr }.visit(self) + } + + pub fn map_ref_with_special_handling_of_binders<'a, SE2>( + &'a self, + mut map_subexpr: impl FnMut(&'a SE) -> SE2, + mut map_under_binder: impl FnMut(&'a Label, &'a SE) -> SE2, + ) -> ExprF<SE2, E> + where + E: Clone, + { + trivial_result(self.traverse_ref_with_special_handling_of_binders( + |x| Ok(map_subexpr(x)), + |l, x| Ok(map_under_binder(l, x)), + )) + } + + pub fn map_ref<'a, SE2>( + &'a self, + mut map_subexpr: impl FnMut(&'a SE) -> SE2, + ) -> ExprF<SE2, E> + where + E: Clone, + { + trivial_result(self.traverse_ref(|x| Ok(map_subexpr(x)))) + } + + pub fn map_mut<'a>(&'a mut self, mut map_subexpr: impl FnMut(&'a mut SE)) { + trivial_result(self.traverse_mut(|x| Ok(map_subexpr(x)))) + } +} + +impl<E> Expr<E> { + pub fn as_ref(&self) -> &RawExpr<E> { + &self.0.as_ref().0 + } + pub fn as_mut(&mut self) -> &mut RawExpr<E> { + &mut self.0.as_mut().0 + } + pub fn span(&self) -> Span { + self.0.as_ref().1.clone() + } + + pub fn new(x: RawExpr<E>, n: Span) -> Self { + Expr(Box::new((x, n))) + } + + pub fn rewrap<E2>(&self, x: RawExpr<E2>) -> Expr<E2> { + Expr(Box::new((x, (self.0).1.clone()))) + } + + pub fn traverse_resolve_mut<Err, F1>( + &mut self, + f: &mut F1, + ) -> Result<(), Err> + where + E: Clone, + F1: FnMut(Import<Expr<E>>) -> Result<E, Err>, + { + match self.as_mut() { + ExprF::BinOp(BinOp::ImportAlt, l, r) => { + let garbage_expr = ExprF::BoolLit(false); + let new_self = if l.traverse_resolve_mut(f).is_ok() { + l + } else { + r.traverse_resolve_mut(f)?; + r + }; + *self.as_mut() = + std::mem::replace(new_self.as_mut(), garbage_expr); + } + _ => { + self.as_mut().traverse_mut(|e| e.traverse_resolve_mut(f))?; + if let ExprF::Import(import) = self.as_mut() { + let garbage_import = Import { + mode: ImportMode::Code, + location: ImportLocation::Missing, + hash: None, + }; + // Move out of &mut import + let import = std::mem::replace(import, garbage_import); + *self.as_mut() = ExprF::Embed(f(import)?); + } + } + } + Ok(()) + } +} + +/// Add an isize to an usize +/// Panics on over/underflow +fn add_ui(u: usize, i: isize) -> Option<usize> { + Some(if i < 0 { + u.checked_sub(i.checked_neg()? as usize)? + } else { + u.checked_add(i as usize)? + }) +} + +impl<Label: PartialEq + Clone> V<Label> { + pub fn shift(&self, delta: isize, var: &V<Label>) -> Option<Self> { + let V(x, n) = var; + let V(y, m) = self; + Some(if x == y && n <= m { + V(y.clone(), add_ui(*m, delta)?) + } else { + V(y.clone(), *m) + }) + } + + pub fn over_binder(&self, x: &Label) -> Option<Self> { + self.shift(-1, &V(x.clone(), 0)) + } +} diff --git a/dhall/src/syntax/core/import.rs b/dhall/src/syntax/core/import.rs new file mode 100644 index 0000000..da3e99b --- /dev/null +++ b/dhall/src/syntax/core/import.rs @@ -0,0 +1,130 @@ +/// The beginning of a file path which anchors subsequent path components +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum FilePrefix { + /// Absolute path + Absolute, + /// Path relative to . + Here, + /// Path relative to .. + Parent, + /// Path relative to ~ + Home, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FilePath { + pub file_path: Vec<String>, +} + +/// The location of import (i.e. local vs. remote vs. environment) +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ImportLocation<SubExpr> { + Local(FilePrefix, FilePath), + Remote(URL<SubExpr>), + Env(String), + Missing, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct URL<SubExpr> { + pub scheme: Scheme, + pub authority: String, + pub path: FilePath, + pub query: Option<String>, + pub headers: Option<SubExpr>, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum Scheme { + HTTP, + HTTPS, +} + +/// How to interpret the import's contents (i.e. as Dhall code or raw text) +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum ImportMode { + Code, + RawText, + Location, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Hash { + SHA256(Vec<u8>), +} + +/// Reference to an external resource +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Import<SubExpr> { + pub mode: ImportMode, + pub location: ImportLocation<SubExpr>, + pub hash: Option<Hash>, +} + +impl<SE> URL<SE> { + pub fn traverse_ref<'a, Err, SE2>( + &'a self, + f: impl FnOnce(&'a SE) -> Result<SE2, Err>, + ) -> Result<URL<SE2>, Err> { + let headers = self.headers.as_ref().map(f).transpose()?; + Ok(URL { + scheme: self.scheme, + authority: self.authority.clone(), + path: self.path.clone(), + query: self.query.clone(), + headers, + }) + } + pub fn traverse_mut<'a, Err>( + &'a mut self, + f: impl FnOnce(&'a mut SE) -> Result<(), Err>, + ) -> Result<(), Err> { + if let Some(header) = &mut self.headers { + f(header)?; + } + Ok(()) + } +} + +impl<SE> ImportLocation<SE> { + pub fn traverse_ref<'a, Err, SE2>( + &'a self, + f: impl FnOnce(&'a SE) -> Result<SE2, Err>, + ) -> Result<ImportLocation<SE2>, Err> { + use ImportLocation::*; + Ok(match self { + Local(prefix, path) => Local(*prefix, path.clone()), + Remote(url) => Remote(url.traverse_ref(f)?), + Env(env) => Env(env.clone()), + Missing => Missing, + }) + } + pub fn traverse_mut<'a, Err>( + &'a mut self, + f: impl FnOnce(&'a mut SE) -> Result<(), Err>, + ) -> Result<(), Err> { + if let ImportLocation::Remote(url) = self { + url.traverse_mut(f)?; + } + Ok(()) + } +} + +impl<SE> Import<SE> { + pub fn traverse_ref<'a, Err, SE2>( + &'a self, + f: impl FnOnce(&'a SE) -> Result<SE2, Err>, + ) -> Result<Import<SE2>, Err> { + Ok(Import { + mode: self.mode, + location: self.location.traverse_ref(f)?, + hash: self.hash.clone(), + }) + } + pub fn traverse_mut<'a, Err>( + &'a mut self, + f: impl FnOnce(&'a mut SE) -> Result<(), Err>, + ) -> Result<(), Err> { + self.location.traverse_mut(f) + } +} diff --git a/dhall/src/syntax/core/label.rs b/dhall/src/syntax/core/label.rs new file mode 100644 index 0000000..43c3f53 --- /dev/null +++ b/dhall/src/syntax/core/label.rs @@ -0,0 +1,34 @@ +use std::rc::Rc; + +// The type for labels throughout the AST +// It owns the data because otherwise lifetimes would make recursive imports impossible +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Label(Rc<str>); + +impl From<String> for Label { + fn from(s: String) -> Self { + let s: &str = &s; + Label(s.into()) + } +} + +impl<'a> From<&'a str> for Label { + fn from(s: &'a str) -> Self { + Label(Rc::from(s)) + } +} + +impl From<&Label> for String { + fn from(x: &Label) -> String { + x.0.as_ref().to_owned() + } +} + +impl Label { + pub fn from_str(s: &str) -> Label { + Label(s.into()) + } + pub fn as_ref(&self) -> &str { + self.0.as_ref() + } +} diff --git a/dhall/src/syntax/core/map.rs b/dhall/src/syntax/core/map.rs new file mode 100644 index 0000000..c4c6126 --- /dev/null +++ b/dhall/src/syntax/core/map.rs @@ -0,0 +1,394 @@ +/// A sorted map that allows multiple values for each key. +pub use dup_tree_map::DupTreeMap; +pub use dup_tree_set::DupTreeSet; + +mod one_or_more { + use either::Either; + use std::{iter, slice, vec}; + + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub enum OneOrMore<T> { + One(T), + More(Vec<T>), + } + + pub type Iter<'a, T> = Either<slice::Iter<'a, T>, iter::Once<&'a T>>; + pub type IterMut<'a, T> = + Either<slice::IterMut<'a, T>, iter::Once<&'a mut T>>; + pub type IntoIter<T> = Either<vec::IntoIter<T>, iter::Once<T>>; + + impl<T> OneOrMore<T> { + pub fn new(x: T) -> Self { + OneOrMore::One(x) + } + + pub fn push(&mut self, x: T) { + take_mut::take(self, |sef| match sef { + OneOrMore::More(mut vec) => { + vec.push(x); + OneOrMore::More(vec) + } + OneOrMore::One(one) => OneOrMore::More(vec![one, x]), + }) + } + + pub fn iter(&self) -> Iter<'_, T> { + match self { + OneOrMore::More(vec) => Either::Left(vec.iter()), + OneOrMore::One(x) => Either::Right(iter::once(x)), + } + } + + pub fn iter_mut(&mut self) -> IterMut<'_, T> { + match self { + OneOrMore::More(vec) => Either::Left(vec.iter_mut()), + OneOrMore::One(x) => Either::Right(iter::once(x)), + } + } + } + + impl<T> IntoIterator for OneOrMore<T> { + type Item = T; + type IntoIter = IntoIter<T>; + + fn into_iter(self) -> Self::IntoIter { + match self { + OneOrMore::More(vec) => Either::Left(vec.into_iter()), + OneOrMore::One(x) => Either::Right(iter::once(x)), + } + } + } +} + +mod dup_tree_map { + use super::one_or_more; + use super::one_or_more::OneOrMore; + use std::collections::{btree_map, BTreeMap}; + use std::iter; + + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct DupTreeMap<K, V> { + map: BTreeMap<K, OneOrMore<V>>, + size: usize, + } + + pub type IterInternalIntermediate<'a, K, V> = + iter::Zip<iter::Repeat<&'a K>, one_or_more::Iter<'a, V>>; + pub type IterInternal<'a, K, V> = iter::FlatMap< + btree_map::Iter<'a, K, OneOrMore<V>>, + IterInternalIntermediate<'a, K, V>, + for<'b> fn( + (&'b K, &'b OneOrMore<V>), + ) -> IterInternalIntermediate<'b, K, V>, + >; + pub struct Iter<'a, K, V> { + iter: IterInternal<'a, K, V>, + size: usize, + } + pub type IterMutInternalIntermediate<'a, K, V> = + iter::Zip<iter::Repeat<&'a K>, one_or_more::IterMut<'a, V>>; + pub type IterMutInternal<'a, K, V> = iter::FlatMap< + btree_map::IterMut<'a, K, OneOrMore<V>>, + IterMutInternalIntermediate<'a, K, V>, + for<'b> fn( + (&'b K, &'b mut OneOrMore<V>), + ) -> IterMutInternalIntermediate<'b, K, V>, + >; + pub struct IterMut<'a, K, V> { + iter: IterMutInternal<'a, K, V>, + size: usize, + } + pub type IntoIterInternalIntermediate<K, V> = + iter::Zip<iter::Repeat<K>, one_or_more::IntoIter<V>>; + pub type IntoIterInternal<K, V> = iter::FlatMap< + btree_map::IntoIter<K, OneOrMore<V>>, + IntoIterInternalIntermediate<K, V>, + fn((K, OneOrMore<V>)) -> IntoIterInternalIntermediate<K, V>, + >; + pub struct IntoIter<K: Clone, V> { + iter: IntoIterInternal<K, V>, + size: usize, + } + + impl<K, V> DupTreeMap<K, V> { + pub fn new() -> Self + where + K: Ord, + { + DupTreeMap { + map: BTreeMap::new(), + size: 0, + } + } + + pub fn insert(&mut self, key: K, value: V) + where + K: Ord, + { + use std::collections::btree_map::Entry; + match self.map.entry(key) { + Entry::Vacant(e) => { + e.insert(OneOrMore::new(value)); + } + Entry::Occupied(mut e) => e.get_mut().push(value), + } + self.size += 1; + } + + pub fn len(&self) -> usize { + self.size + } + pub fn is_empty(&self) -> bool { + self.size == 0 + } + + pub fn iter(&self) -> Iter<'_, K, V> + where + K: Ord, + { + fn foo<'a, K, V>( + (k, oom): (&'a K, &'a OneOrMore<V>), + ) -> IterInternalIntermediate<'a, K, V> { + iter::repeat(k).zip(oom.iter()) + } + Iter { + iter: self.map.iter().flat_map(foo), + size: self.size, + } + } + + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> + where + K: Ord, + { + fn foo<'a, K, V>( + (k, oom): (&'a K, &'a mut OneOrMore<V>), + ) -> IterMutInternalIntermediate<'a, K, V> { + iter::repeat(k).zip(oom.iter_mut()) + } + IterMut { + iter: self.map.iter_mut().flat_map(foo), + size: self.size, + } + } + } + + impl<K, V> Default for DupTreeMap<K, V> + where + K: Ord, + { + fn default() -> Self { + Self::new() + } + } + + impl<K, V> IntoIterator for DupTreeMap<K, V> + where + K: Ord + Clone, + { + type Item = (K, V); + type IntoIter = IntoIter<K, V>; + + fn into_iter(self) -> Self::IntoIter { + fn foo<K, V>( + (k, oom): (K, OneOrMore<V>), + ) -> IntoIterInternalIntermediate<K, V> + where + K: Clone, + { + iter::repeat(k).zip(oom.into_iter()) + } + IntoIter { + iter: self.map.into_iter().flat_map(foo), + size: self.size, + } + } + } + + impl<'a, K, V> IntoIterator for &'a DupTreeMap<K, V> + where + K: Ord, + { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } + } + + impl<'a, K, V> IntoIterator for &'a mut DupTreeMap<K, V> + where + K: Ord, + { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } + } + + impl<K, V> iter::FromIterator<(K, V)> for DupTreeMap<K, V> + where + K: Ord, + { + fn from_iter<T>(iter: T) -> Self + where + T: IntoIterator<Item = (K, V)>, + { + let mut map = DupTreeMap::new(); + for (k, v) in iter { + map.insert(k, v); + } + map + } + } + + impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option<Self::Item> { + let next = self.iter.next(); + if next.is_some() { + self.size -= 1; + } + next + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.size, Some(self.size)) + } + } + + impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + fn next(&mut self) -> Option<Self::Item> { + let next = self.iter.next(); + if next.is_some() { + self.size -= 1; + } + next + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.size, Some(self.size)) + } + } + + impl<K, V> Iterator for IntoIter<K, V> + where + K: Clone, + { + type Item = (K, V); + + fn next(&mut self) -> Option<Self::Item> { + let next = self.iter.next(); + if next.is_some() { + self.size -= 1; + } + next + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.size, Some(self.size)) + } + } + + // unsafe impl<K, V> iter::TrustedLen for IntoIter<K, V> {} +} + +mod dup_tree_set { + use super::DupTreeMap; + use std::iter; + + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct DupTreeSet<K> { + map: DupTreeMap<K, ()>, + } + + pub type Iter<'a, K> = iter::Map< + super::dup_tree_map::Iter<'a, K, ()>, + for<'b> fn((&'b K, &'b ())) -> &'b K, + >; + pub type IntoIter<K> = + iter::Map<super::dup_tree_map::IntoIter<K, ()>, fn((K, ())) -> K>; + + impl<K> DupTreeSet<K> { + pub fn new() -> Self + where + K: Ord, + { + DupTreeSet { + map: DupTreeMap::new(), + } + } + + pub fn len(&self) -> usize { + self.map.len() + } + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + pub fn iter(&self) -> Iter<'_, K> + where + K: Ord, + { + fn foo<'a, K>((k, ()): (&'a K, &'a ())) -> &'a K { + k + } + self.map.iter().map(foo) + } + } + + impl<K> Default for DupTreeSet<K> + where + K: Ord, + { + fn default() -> Self { + Self::new() + } + } + + impl<K> IntoIterator for DupTreeSet<K> + where + K: Ord + Clone, + { + type Item = K; + type IntoIter = IntoIter<K>; + + fn into_iter(self) -> Self::IntoIter { + fn foo<K>((k, ()): (K, ())) -> K { + k + } + self.map.into_iter().map(foo) + } + } + + impl<'a, K> IntoIterator for &'a DupTreeSet<K> + where + K: Ord, + { + type Item = &'a K; + type IntoIter = Iter<'a, K>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } + } + + impl<K> iter::FromIterator<K> for DupTreeSet<K> + where + K: Ord, + { + fn from_iter<T>(iter: T) -> Self + where + T: IntoIterator<Item = K>, + { + let map = iter.into_iter().map(|k| (k, ())).collect(); + DupTreeSet { map } + } + } +} diff --git a/dhall/src/syntax/core/mod.rs b/dhall/src/syntax/core/mod.rs new file mode 100644 index 0000000..66bf229 --- /dev/null +++ b/dhall/src/syntax/core/mod.rs @@ -0,0 +1,13 @@ +mod expr; +pub use expr::*; +mod import; +pub use import::*; +mod label; +pub use label::*; +mod span; +pub use span::*; +mod text; +pub use text::*; +pub mod context; +pub mod map; +pub mod visitor; diff --git a/dhall/src/syntax/core/span.rs b/dhall/src/syntax/core/span.rs new file mode 100644 index 0000000..f9c7008 --- /dev/null +++ b/dhall/src/syntax/core/span.rs @@ -0,0 +1,81 @@ +use std::rc::Rc; + +/// A location in the source text +#[derive(Debug, Clone)] +pub struct ParsedSpan { + input: Rc<str>, + /// # Safety + /// + /// Must be a valid character boundary index into `input`. + start: usize, + /// # Safety + /// + /// Must be a valid character boundary index into `input`. + end: usize, +} + +#[derive(Debug, Clone)] +pub enum Span { + /// A location in the source text + Parsed(ParsedSpan), + /// For expressions obtained from decoding binary + Decoded, + /// For expressions constructed during normalization/typecheck + Artificial, +} + +impl Span { + pub(crate) fn make(input: Rc<str>, sp: pest::Span) -> Self { + Span::Parsed(ParsedSpan { + input, + start: sp.start(), + end: sp.end(), + }) + } + + /// Takes the union of the two spans, i.e. the range of input covered by the two spans plus any + /// input between them. Assumes that the spans come from the same input. Fails if one of the + /// spans does not point to an input location. + pub fn union(&self, other: &Span) -> Self { + use std::cmp::{max, min}; + use Span::*; + match (self, other) { + (Parsed(x), Parsed(y)) if Rc::ptr_eq(&x.input, &y.input) => { + Parsed(ParsedSpan { + input: x.input.clone(), + start: min(x.start, y.start), + end: max(x.end, y.end), + }) + } + _ => panic!( + "Tried to union incompatible spans: {:?} and {:?}", + self, other + ), + } + } + + /// Merges two spans assumed to point to a similar thing. If only one of them points to an + /// input location, use that one. + pub fn merge(&self, other: &Span) -> Self { + use Span::*; + match (self, other) { + (Parsed(x), _) | (_, Parsed(x)) => Parsed(x.clone()), + (Artificial, _) | (_, Artificial) => Artificial, + (Decoded, Decoded) => Decoded, + } + } + + pub fn error(&self, message: impl Into<String>) -> String { + use pest::error::{Error, ErrorVariant}; + use pest::Span; + let message: String = message.into(); + let span = match self { + self::Span::Parsed(span) => span, + _ => return format!("[unknown location] {}", message), + }; + let span = Span::new(&*span.input, span.start, span.end).unwrap(); + let err: ErrorVariant<!> = ErrorVariant::CustomError { message }; + let err = Error::new_from_span(err, span); + format!("{}", err) + } +} diff --git a/dhall/src/syntax/core/text.rs b/dhall/src/syntax/core/text.rs new file mode 100644 index 0000000..fb390ee --- /dev/null +++ b/dhall/src/syntax/core/text.rs @@ -0,0 +1,181 @@ +use std::iter::FromIterator; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct InterpolatedText<SubExpr> { + head: String, + tail: Vec<(SubExpr, String)>, +} + +impl<SubExpr> From<(String, Vec<(SubExpr, String)>)> + for InterpolatedText<SubExpr> +{ + fn from(x: (String, Vec<(SubExpr, String)>)) -> Self { + InterpolatedText { + head: x.0, + tail: x.1, + } + } +} + +impl<SubExpr> From<String> for InterpolatedText<SubExpr> { + fn from(s: String) -> Self { + InterpolatedText { + head: s, + tail: vec![], + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum InterpolatedTextContents<SubExpr> { + Text(String), + Expr(SubExpr), +} + +impl<SubExpr> InterpolatedTextContents<SubExpr> { + pub fn is_empty(&self) -> bool { + use InterpolatedTextContents::{Expr, Text}; + match self { + Expr(_) => false, + Text(s) => s.is_empty(), + } + } + + pub fn traverse_ref<'a, SubExpr2, E, F>( + &'a self, + mut f: F, + ) -> Result<InterpolatedTextContents<SubExpr2>, E> + where + F: FnMut(&'a SubExpr) -> Result<SubExpr2, E>, + { + use InterpolatedTextContents::{Expr, Text}; + Ok(match self { + Expr(e) => Expr(f(e)?), + Text(s) => Text(s.clone()), + }) + } + pub fn traverse_mut<'a, E, F>(&'a mut self, mut f: F) -> Result<(), E> + where + F: FnMut(&'a mut SubExpr) -> Result<(), E>, + { + use InterpolatedTextContents::Expr; + if let Expr(e) = self { + f(e)?; + } + Ok(()) + } + pub fn map_ref<'a, SubExpr2, F>( + &'a self, + mut f: F, + ) -> InterpolatedTextContents<SubExpr2> + where + F: FnMut(&'a SubExpr) -> SubExpr2, + { + use InterpolatedTextContents::{Expr, Text}; + match self { + Expr(e) => Expr(f(e)), + Text(s) => Text(s.clone()), + } + } + pub fn map_mut<'a, F>(&'a mut self, mut f: F) + where + F: FnMut(&'a mut SubExpr), + { + use InterpolatedTextContents::Expr; + if let Expr(e) = self { + f(e); + } + } +} + +impl<SubExpr> InterpolatedText<SubExpr> { + pub fn len(&self) -> usize { + 1 + 2 * self.tail.len() + } + + pub fn head(&self) -> &str { + &self.head + } + + pub fn head_mut(&mut self) -> &mut String { + &mut self.head + } + + pub fn is_empty(&self) -> bool { + self.head.is_empty() && self.tail.is_empty() + } + + pub fn traverse_ref<'a, SubExpr2, E, F>( + &'a self, + mut f: F, + ) -> Result<InterpolatedText<SubExpr2>, E> + where + F: FnMut(&'a SubExpr) -> Result<SubExpr2, E>, + { + Ok(InterpolatedText { + head: self.head.clone(), + tail: self + .tail + .iter() + .map(|(e, s)| Ok((f(e)?, s.clone()))) + .collect::<Result<_, _>>()?, + }) + } + + pub fn traverse_mut<'a, E, F>(&'a mut self, mut f: F) -> Result<(), E> + where + F: FnMut(&'a mut SubExpr) -> Result<(), E>, + { + for (e, _) in &mut self.tail { + f(e)? + } + Ok(()) + } + + pub fn iter<'a>( + &'a self, + ) -> impl Iterator<Item = InterpolatedTextContents<&'a SubExpr>> + 'a { + use std::iter::once; + use InterpolatedTextContents::{Expr, Text}; + let exprs = self.tail.iter().map(|(e, _)| Expr(e)); + let texts = self.tail.iter().map(|(_, s)| Text(s.clone())); + once(Text(self.head.clone())).chain(itertools::interleave(exprs, texts)) + } + + pub fn into_iter( + self, + ) -> impl Iterator<Item = InterpolatedTextContents<SubExpr>> { + use std::iter::once; + use InterpolatedTextContents::{Expr, Text}; + once(Text(self.head)).chain( + self.tail + .into_iter() + .flat_map(|(e, s)| once(Expr(e)).chain(once(Text(s)))), + ) + } +} + +impl<SubExpr> FromIterator<InterpolatedTextContents<SubExpr>> + for InterpolatedText<SubExpr> +{ + fn from_iter<T>(iter: T) -> Self + where + T: IntoIterator<Item = InterpolatedTextContents<SubExpr>>, + { + let mut res = InterpolatedText { + head: String::new(), + tail: Vec::new(), + }; + let mut crnt_str = &mut res.head; + for x in iter.into_iter() { + match x { + InterpolatedTextContents::Text(s) => crnt_str.push_str(&s), + InterpolatedTextContents::Expr(e) => { + res.tail.push((e, String::new())); + crnt_str = &mut res.tail.last_mut().unwrap().1; + } + } + } + res + } +} diff --git a/dhall/src/syntax/core/visitor.rs b/dhall/src/syntax/core/visitor.rs new file mode 100644 index 0000000..b76d037 --- /dev/null +++ b/dhall/src/syntax/core/visitor.rs @@ -0,0 +1,360 @@ +use crate::syntax::*; +use std::iter::FromIterator; + +/// A visitor trait that can be used to traverse `ExprF`s. We need this pattern so that Rust lets +/// us have as much mutability as we can. +/// For example, `traverse_ref_with_special_handling_of_binders` cannot be made using only +/// `traverse_ref`, because `traverse_ref` takes a `FnMut` so we would need to pass multiple +/// mutable reverences to this argument to `traverse_ref`. But Rust's ownership system is all about +/// preventing exactly this ! So we have to be more clever. The visitor pattern allows us to have +/// only one mutable thing the whole time: the visitor itself. The visitor can then carry around +/// multiple closures or just one, and Rust is ok with either. See for example TraverseRefVisitor. +pub trait ExprFVisitor<'a, SE1, SE2, E1, E2>: Sized { + type Error; + + fn visit_subexpr(&mut self, subexpr: &'a SE1) -> Result<SE2, Self::Error>; + fn visit_embed(self, embed: &'a E1) -> Result<E2, Self::Error>; + + fn visit_subexpr_under_binder( + mut self, + _label: &'a Label, + subexpr: &'a SE1, + ) -> Result<SE2, Self::Error> { + self.visit_subexpr(subexpr) + } + + fn visit( + self, + input: &'a ExprF<SE1, E1>, + ) -> Result<ExprF<SE2, E2>, Self::Error> { + visit_ref(self, input) + } +} + +/// Like `ExprFVisitor`, but by mutable reference +pub trait ExprFMutVisitor<'a, SE, E>: Sized { + type Error; + + fn visit_subexpr(&mut self, subexpr: &'a mut SE) + -> Result<(), Self::Error>; + fn visit_embed(self, _embed: &'a mut E) -> Result<(), Self::Error> { + Ok(()) + } + + fn visit_subexpr_under_binder( + mut self, + _label: &'a mut Label, + subexpr: &'a mut SE, + ) -> Result<(), Self::Error> { + self.visit_subexpr(subexpr) + } + + fn visit(self, input: &'a mut ExprF<SE, E>) -> Result<(), Self::Error> { + visit_mut(self, input) + } +} + +fn visit_ref<'a, V, SE1, SE2, E1, E2>( + mut v: V, + input: &'a ExprF<SE1, E1>, +) -> Result<ExprF<SE2, E2>, V::Error> +where + V: ExprFVisitor<'a, SE1, SE2, E1, E2>, +{ + fn vec<'a, T, U, Err, F: FnMut(&'a T) -> Result<U, Err>>( + x: &'a [T], + f: F, + ) -> Result<Vec<U>, Err> { + x.iter().map(f).collect() + } + fn opt<'a, T, U, Err, F: FnOnce(&'a T) -> Result<U, Err>>( + x: &'a Option<T>, + f: F, + ) -> Result<Option<U>, Err> { + Ok(match x { + Some(x) => Some(f(x)?), + None => None, + }) + } + fn dupmap<'a, V, SE1, SE2, E1, E2, T>( + x: impl IntoIterator<Item = (&'a Label, &'a SE1)>, + mut v: V, + ) -> Result<T, V::Error> + where + SE1: 'a, + T: FromIterator<(Label, SE2)>, + V: ExprFVisitor<'a, SE1, SE2, E1, E2>, + { + x.into_iter() + .map(|(k, x)| Ok((k.clone(), v.visit_subexpr(x)?))) + .collect() + } + fn optdupmap<'a, V, SE1, SE2, E1, E2, T>( + x: impl IntoIterator<Item = (&'a Label, &'a Option<SE1>)>, + mut v: V, + ) -> Result<T, V::Error> + where + SE1: 'a, + T: FromIterator<(Label, Option<SE2>)>, + V: ExprFVisitor<'a, SE1, SE2, E1, E2>, + { + x.into_iter() + .map(|(k, x)| { + Ok(( + k.clone(), + match x { + Some(x) => Some(v.visit_subexpr(x)?), + None => None, + }, + )) + }) + .collect() + } + + use crate::syntax::ExprF::*; + Ok(match input { + Var(v) => Var(v.clone()), + Lam(l, t, e) => { + let t = v.visit_subexpr(t)?; + let e = v.visit_subexpr_under_binder(l, e)?; + Lam(l.clone(), t, e) + } + Pi(l, t, e) => { + let t = v.visit_subexpr(t)?; + let e = v.visit_subexpr_under_binder(l, e)?; + Pi(l.clone(), t, e) + } + Let(l, t, a, e) => { + let t = opt(t, &mut |e| v.visit_subexpr(e))?; + let a = v.visit_subexpr(a)?; + let e = v.visit_subexpr_under_binder(l, e)?; + Let(l.clone(), t, a, e) + } + App(f, a) => App(v.visit_subexpr(f)?, v.visit_subexpr(a)?), + Annot(x, t) => Annot(v.visit_subexpr(x)?, v.visit_subexpr(t)?), + Const(k) => Const(*k), + Builtin(v) => Builtin(*v), + BoolLit(b) => BoolLit(*b), + NaturalLit(n) => NaturalLit(*n), + IntegerLit(n) => IntegerLit(*n), + DoubleLit(n) => DoubleLit(*n), + TextLit(t) => TextLit(t.traverse_ref(|e| v.visit_subexpr(e))?), + BinOp(o, x, y) => BinOp(*o, v.visit_subexpr(x)?, v.visit_subexpr(y)?), + BoolIf(b, t, f) => BoolIf( + v.visit_subexpr(b)?, + v.visit_subexpr(t)?, + v.visit_subexpr(f)?, + ), + EmptyListLit(t) => EmptyListLit(v.visit_subexpr(t)?), + NEListLit(es) => NEListLit(vec(es, |e| v.visit_subexpr(e))?), + SomeLit(e) => SomeLit(v.visit_subexpr(e)?), + RecordType(kts) => RecordType(dupmap(kts, v)?), + RecordLit(kvs) => RecordLit(dupmap(kvs, v)?), + UnionType(kts) => UnionType(optdupmap(kts, v)?), + Merge(x, y, t) => Merge( + v.visit_subexpr(x)?, + v.visit_subexpr(y)?, + opt(t, |e| v.visit_subexpr(e))?, + ), + ToMap(x, t) => { + ToMap(v.visit_subexpr(x)?, opt(t, |e| v.visit_subexpr(e))?) + } + Field(e, l) => Field(v.visit_subexpr(e)?, l.clone()), + Projection(e, ls) => Projection(v.visit_subexpr(e)?, ls.clone()), + ProjectionByExpr(e, x) => { + ProjectionByExpr(v.visit_subexpr(e)?, v.visit_subexpr(x)?) + } + Assert(e) => Assert(v.visit_subexpr(e)?), + Import(i) => Import(i.traverse_ref(|e| v.visit_subexpr(e))?), + Embed(a) => Embed(v.visit_embed(a)?), + }) +} + +fn visit_mut<'a, V, SE, E>( + mut v: V, + input: &'a mut ExprF<SE, E>, +) -> Result<(), V::Error> +where + V: ExprFMutVisitor<'a, SE, E>, +{ + fn vec<'a, V, SE, E>(v: &mut V, x: &'a mut Vec<SE>) -> Result<(), V::Error> + where + V: ExprFMutVisitor<'a, SE, E>, + { + for x in x { + v.visit_subexpr(x)?; + } + Ok(()) + } + fn opt<'a, V, SE, E>( + v: &mut V, + x: &'a mut Option<SE>, + ) -> Result<(), V::Error> + where + V: ExprFMutVisitor<'a, SE, E>, + { + if let Some(x) = x { + v.visit_subexpr(x)?; + } + Ok(()) + } + fn dupmap<'a, V, SE, E>( + mut v: V, + x: impl IntoIterator<Item = (&'a Label, &'a mut SE)>, + ) -> Result<(), V::Error> + where + SE: 'a, + V: ExprFMutVisitor<'a, SE, E>, + { + for (_, x) in x { + v.visit_subexpr(x)?; + } + Ok(()) + } + fn optdupmap<'a, V, SE, E>( + mut v: V, + x: impl IntoIterator<Item = (&'a Label, &'a mut Option<SE>)>, + ) -> Result<(), V::Error> + where + SE: 'a, + V: ExprFMutVisitor<'a, SE, E>, + { + for (_, x) in x { + opt(&mut v, x)?; + } + Ok(()) + } + + use crate::syntax::ExprF::*; + match input { + Var(_) | Const(_) | Builtin(_) | BoolLit(_) | NaturalLit(_) + | IntegerLit(_) | DoubleLit(_) => {} + Lam(l, t, e) => { + v.visit_subexpr(t)?; + v.visit_subexpr_under_binder(l, e)?; + } + Pi(l, t, e) => { + v.visit_subexpr(t)?; + v.visit_subexpr_under_binder(l, e)?; + } + Let(l, t, a, e) => { + opt(&mut v, t)?; + v.visit_subexpr(a)?; + v.visit_subexpr_under_binder(l, e)?; + } + App(f, a) => { + v.visit_subexpr(f)?; + v.visit_subexpr(a)?; + } + Annot(x, t) => { + v.visit_subexpr(x)?; + v.visit_subexpr(t)?; + } + TextLit(t) => t.traverse_mut(|e| v.visit_subexpr(e))?, + BinOp(_, x, y) => { + v.visit_subexpr(x)?; + v.visit_subexpr(y)?; + } + BoolIf(b, t, f) => { + v.visit_subexpr(b)?; + v.visit_subexpr(t)?; + v.visit_subexpr(f)?; + } + EmptyListLit(t) => v.visit_subexpr(t)?, + NEListLit(es) => vec(&mut v, es)?, + SomeLit(e) => v.visit_subexpr(e)?, + RecordType(kts) => dupmap(v, kts)?, + RecordLit(kvs) => dupmap(v, kvs)?, + UnionType(kts) => optdupmap(v, kts)?, + Merge(x, y, t) => { + v.visit_subexpr(x)?; + v.visit_subexpr(y)?; + opt(&mut v, t)?; + } + ToMap(x, t) => { + v.visit_subexpr(x)?; + opt(&mut v, t)?; + } + Field(e, _) => v.visit_subexpr(e)?, + Projection(e, _) => v.visit_subexpr(e)?, + ProjectionByExpr(e, x) => { + v.visit_subexpr(e)?; + v.visit_subexpr(x)?; + } + Assert(e) => v.visit_subexpr(e)?, + Import(i) => i.traverse_mut(|e| v.visit_subexpr(e))?, + Embed(a) => v.visit_embed(a)?, + } + Ok(()) +} + +pub struct TraverseRefWithBindersVisitor<F1, F2> { + pub visit_subexpr: F1, + pub visit_under_binder: F2, +} + +impl<'a, SE, E, SE2, Err, F1, F2> ExprFVisitor<'a, SE, SE2, E, E> + for TraverseRefWithBindersVisitor<F1, F2> +where + SE: 'a, + E: 'a + Clone, + F1: FnMut(&'a SE) -> Result<SE2, Err>, + F2: FnOnce(&'a Label, &'a SE) -> Result<SE2, Err>, +{ + type Error = Err; + + fn visit_subexpr(&mut self, subexpr: &'a SE) -> Result<SE2, Self::Error> { + (self.visit_subexpr)(subexpr) + } + fn visit_subexpr_under_binder( + self, + label: &'a Label, + subexpr: &'a SE, + ) -> Result<SE2, Self::Error> { + (self.visit_under_binder)(label, subexpr) + } + fn visit_embed(self, embed: &'a E) -> Result<E, Self::Error> { + Ok(embed.clone()) + } +} + +pub struct TraverseRefVisitor<F1> { + pub visit_subexpr: F1, +} + +impl<'a, SE, E, SE2, Err, F1> ExprFVisitor<'a, SE, SE2, E, E> + for TraverseRefVisitor<F1> +where + SE: 'a, + E: 'a + Clone, + F1: FnMut(&'a SE) -> Result<SE2, Err>, +{ + type Error = Err; + + fn visit_subexpr(&mut self, subexpr: &'a SE) -> Result<SE2, Self::Error> { + (self.visit_subexpr)(subexpr) + } + fn visit_embed(self, embed: &'a E) -> Result<E, Self::Error> { + Ok(embed.clone()) + } +} + +pub struct TraverseMutVisitor<F1> { + pub visit_subexpr: F1, +} + +impl<'a, SE, E, Err, F1> ExprFMutVisitor<'a, SE, E> for TraverseMutVisitor<F1> +where + SE: 'a, + E: 'a, + F1: FnMut(&'a mut SE) -> Result<(), Err>, +{ + type Error = Err; + + fn visit_subexpr( + &mut self, + subexpr: &'a mut SE, + ) -> Result<(), Self::Error> { + (self.visit_subexpr)(subexpr) + } +} diff --git a/dhall/src/syntax/mod.rs b/dhall/src/syntax/mod.rs new file mode 100644 index 0000000..177c4f1 --- /dev/null +++ b/dhall/src/syntax/mod.rs @@ -0,0 +1,15 @@ +#![allow( + clippy::many_single_char_names, + clippy::should_implement_trait, + clippy::new_without_default, + clippy::type_complexity +)] + +mod core; +pub use crate::syntax::core::context; +pub use crate::syntax::core::visitor; +pub use crate::syntax::core::*; +mod printer; +pub use crate::syntax::printer::*; +mod parser; +pub use crate::syntax::parser::*; diff --git a/dhall/src/syntax/parser.rs b/dhall/src/syntax/parser.rs new file mode 100644 index 0000000..2f589fe --- /dev/null +++ b/dhall/src/syntax/parser.rs @@ -0,0 +1,942 @@ +use itertools::Itertools; +use pest::prec_climber as pcl; +use pest::prec_climber::PrecClimber; +use std::rc::Rc; + +use pest_consume::{match_nodes, Parser}; + +use crate::syntax::map::{DupTreeMap, DupTreeSet}; +use crate::syntax::ExprF::*; +use crate::syntax::*; + +// This file consumes the parse tree generated by pest and turns it into +// our own AST. All those custom macros should eventually moved into +// their own crate because they are quite general and useful. For now they +// are here and hopefully you can figure out how they work. + +type ParsedText<E> = InterpolatedText<Expr<E>>; +type ParsedTextContents<E> = InterpolatedTextContents<Expr<E>>; +type ParseInput<'input> = pest_consume::Node<'input, Rule, Rc<str>>; + +pub type ParseError = pest::error::Error<Rule>; +pub type ParseResult<T> = Result<T, ParseError>; + +#[derive(Debug)] +enum Selector<E> { + Field(Label), + Projection(DupTreeSet<Label>), + ProjectionByExpr(Expr<E>), +} + +impl crate::syntax::Builtin { + pub fn parse(s: &str) -> Option<Self> { + use crate::syntax::Builtin::*; + match s { + "Bool" => Some(Bool), + "Natural" => Some(Natural), + "Integer" => Some(Integer), + "Double" => Some(Double), + "Text" => Some(Text), + "List" => Some(List), + "Optional" => Some(Optional), + "None" => Some(OptionalNone), + "Natural/build" => Some(NaturalBuild), + "Natural/fold" => Some(NaturalFold), + "Natural/isZero" => Some(NaturalIsZero), + "Natural/even" => Some(NaturalEven), + "Natural/odd" => Some(NaturalOdd), + "Natural/toInteger" => Some(NaturalToInteger), + "Natural/show" => Some(NaturalShow), + "Natural/subtract" => Some(NaturalSubtract), + "Integer/toDouble" => Some(IntegerToDouble), + "Integer/show" => Some(IntegerShow), + "Double/show" => Some(DoubleShow), + "List/build" => Some(ListBuild), + "List/fold" => Some(ListFold), + "List/length" => Some(ListLength), + "List/head" => Some(ListHead), + "List/last" => Some(ListLast), + "List/indexed" => Some(ListIndexed), + "List/reverse" => Some(ListReverse), + "Optional/fold" => Some(OptionalFold), + "Optional/build" => Some(OptionalBuild), + "Text/show" => Some(TextShow), + _ => None, + } + } +} + +fn input_to_span(input: ParseInput) -> Span { + Span::make(input.user_data().clone(), input.as_pair().as_span()) +} +fn spanned<E>(input: ParseInput, x: RawExpr<E>) -> Expr<E> { + Expr::new(x, input_to_span(input)) +} +fn spanned_union<E>(span1: Span, span2: Span, x: RawExpr<E>) -> Expr<E> { + Expr::new(x, span1.union(&span2)) +} + +// Trim the shared indent off of a vec of lines, as defined by the Dhall semantics of multiline +// literals. +fn trim_indent<E: Clone>(lines: &mut Vec<ParsedText<E>>) { + let is_indent = |c: char| c == ' ' || c == '\t'; + + // There is at least one line so this is safe + let last_line_head = lines.last().unwrap().head(); + let indent_chars = last_line_head + .char_indices() + .take_while(|(_, c)| is_indent(*c)); + let mut min_indent_idx = match indent_chars.last() { + Some((i, _)) => i, + // If there is no indent char, then no indent needs to be stripped + None => return, + }; + + for line in lines.iter() { + // Ignore empty lines + if line.is_empty() { + continue; + } + // Take chars from line while they match the current minimum indent. + let indent_chars = last_line_head[0..=min_indent_idx] + .char_indices() + .zip(line.head().chars()) + .take_while(|((_, c1), c2)| c1 == c2); + match indent_chars.last() { + Some(((i, _), _)) => min_indent_idx = i, + // If there is no indent char, then no indent needs to be stripped + None => return, + }; + } + + // Remove the shared indent from non-empty lines + for line in lines.iter_mut() { + if !line.is_empty() { + line.head_mut().replace_range(0..=min_indent_idx, ""); + } + } +} + +lazy_static::lazy_static! { + static ref PRECCLIMBER: PrecClimber<Rule> = { + use Rule::*; + // In order of precedence + let operators = vec![ + import_alt, + bool_or, + natural_plus, + text_append, + list_append, + bool_and, + combine, + prefer, + combine_types, + natural_times, + bool_eq, + bool_ne, + equivalent, + ]; + PrecClimber::new( + operators + .into_iter() + .map(|op| pcl::Operator::new(op, pcl::Assoc::Left)) + .collect(), + ) + }; +} + +#[derive(Parser)] +#[grammar = "../../dhall_syntax/src/dhall.pest"] +struct DhallParser; + +#[pest_consume::parser(parser = DhallParser, rule = Rule)] +impl DhallParser { + fn EOI(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + + #[alias(label)] + fn simple_label(input: ParseInput) -> ParseResult<Label> { + Ok(Label::from(input.as_str())) + } + #[alias(label)] + fn quoted_label(input: ParseInput) -> ParseResult<Label> { + Ok(Label::from(input.as_str())) + } + + fn double_quote_literal<E: Clone>( + input: ParseInput, + ) -> ParseResult<ParsedText<E>> { + Ok(match_nodes!(input.into_children(); + [double_quote_chunk(chunks)..] => { + chunks.collect() + } + )) + } + + fn double_quote_chunk<E: Clone>( + input: ParseInput, + ) -> ParseResult<ParsedTextContents<E>> { + Ok(match_nodes!(input.into_children(); + [expression(e)] => { + InterpolatedTextContents::Expr(e) + }, + [double_quote_char(s)] => { + InterpolatedTextContents::Text(s) + }, + )) + } + #[alias(double_quote_char)] + fn double_quote_escaped(input: ParseInput) -> ParseResult<String> { + Ok(match input.as_str() { + "\"" => "\"".to_owned(), + "$" => "$".to_owned(), + "\\" => "\\".to_owned(), + "/" => "/".to_owned(), + "b" => "\u{0008}".to_owned(), + "f" => "\u{000C}".to_owned(), + "n" => "\n".to_owned(), + "r" => "\r".to_owned(), + "t" => "\t".to_owned(), + // "uXXXX" or "u{XXXXX}" + s => { + use std::convert::{TryFrom, TryInto}; + + let s = &s[1..]; + let s = if &s[0..1] == "{" { + &s[1..s.len() - 1] + } else { + &s[0..s.len()] + }; + + if s.len() > 8 { + Err(input.error(format!( + "Escape sequences can't have more than 8 chars: \"{}\"", + s + )))? + } + + // pad with zeroes + let s: String = std::iter::repeat('0') + .take(8 - s.len()) + .chain(s.chars()) + .collect(); + + // `s` has length 8, so `bytes` has length 4 + let bytes: &[u8] = &hex::decode(s).unwrap(); + let i = u32::from_be_bytes(bytes.try_into().unwrap()); + let c = char::try_from(i).unwrap(); + match i { + 0xD800..=0xDFFF => { + let c_ecapsed = c.escape_unicode(); + Err(input.error(format!("Escape sequences can't contain surrogate pairs: \"{}\"", c_ecapsed)))? + } + 0x0FFFE..=0x0FFFF + | 0x1FFFE..=0x1FFFF + | 0x2FFFE..=0x2FFFF + | 0x3FFFE..=0x3FFFF + | 0x4FFFE..=0x4FFFF + | 0x5FFFE..=0x5FFFF + | 0x6FFFE..=0x6FFFF + | 0x7FFFE..=0x7FFFF + | 0x8FFFE..=0x8FFFF + | 0x9FFFE..=0x9FFFF + | 0xAFFFE..=0xAFFFF + | 0xBFFFE..=0xBFFFF + | 0xCFFFE..=0xCFFFF + | 0xDFFFE..=0xDFFFF + | 0xEFFFE..=0xEFFFF + | 0xFFFFE..=0xFFFFF + | 0x10_FFFE..=0x10_FFFF => { + let c_ecapsed = c.escape_unicode(); + Err(input.error(format!("Escape sequences can't contain non-characters: \"{}\"", c_ecapsed)))? + } + _ => {} + } + std::iter::once(c).collect() + } + }) + } + fn double_quote_char(input: ParseInput) -> ParseResult<String> { + Ok(input.as_str().to_owned()) + } + + fn single_quote_literal<E: Clone>( + input: ParseInput, + ) -> ParseResult<ParsedText<E>> { + Ok(match_nodes!(input.into_children(); + [single_quote_continue(lines)] => { + let newline: ParsedText<E> = "\n".to_string().into(); + + // Reverse lines and chars in each line + let mut lines: Vec<ParsedText<E>> = lines + .into_iter() + .rev() + .map(|l| l.into_iter().rev().collect::<ParsedText<E>>()) + .collect(); + + trim_indent(&mut lines); + + lines + .into_iter() + .intersperse(newline) + .flat_map(InterpolatedText::into_iter) + .collect::<ParsedText<E>>() + } + )) + } + fn single_quote_char(input: ParseInput) -> ParseResult<&str> { + Ok(input.as_str()) + } + #[alias(single_quote_char)] + fn escaped_quote_pair(_input: ParseInput) -> ParseResult<&str> { + Ok("''") + } + #[alias(single_quote_char)] + fn escaped_interpolation(_input: ParseInput) -> ParseResult<&str> { + Ok("${") + } + + // Returns a vec of lines in reversed order, where each line is also in reversed order. + fn single_quote_continue<E: Clone>( + input: ParseInput, + ) -> ParseResult<Vec<Vec<ParsedTextContents<E>>>> { + Ok(match_nodes!(input.into_children(); + [expression(e), single_quote_continue(lines)] => { + let c = InterpolatedTextContents::Expr(e); + let mut lines = lines; + lines.last_mut().unwrap().push(c); + lines + }, + [single_quote_char(c), single_quote_continue(lines)] => { + let mut lines = lines; + if c == "\n" || c == "\r\n" { + lines.push(vec![]); + } else { + // TODO: don't allocate for every char + let c = InterpolatedTextContents::Text(c.to_owned()); + lines.last_mut().unwrap().push(c); + } + lines + }, + [] => { + vec![vec![]] + }, + )) + } + + #[alias(expression)] + fn builtin<E: Clone>(input: ParseInput) -> ParseResult<Expr<E>> { + let s = input.as_str(); + let e = match crate::syntax::Builtin::parse(s) { + Some(b) => Builtin(b), + None => match s { + "True" => BoolLit(true), + "False" => BoolLit(false), + "Type" => Const(crate::syntax::Const::Type), + "Kind" => Const(crate::syntax::Const::Kind), + "Sort" => Const(crate::syntax::Const::Sort), + _ => { + Err(input.error(format!("Unrecognized builtin: '{}'", s)))? + } + }, + }; + Ok(spanned(input, e)) + } + + #[alias(double_literal)] + fn NaN(_input: ParseInput) -> ParseResult<core::Double> { + Ok(std::f64::NAN.into()) + } + #[alias(double_literal)] + fn minus_infinity_literal(_input: ParseInput) -> ParseResult<core::Double> { + Ok(std::f64::NEG_INFINITY.into()) + } + #[alias(double_literal)] + fn plus_infinity_literal(_input: ParseInput) -> ParseResult<core::Double> { + Ok(std::f64::INFINITY.into()) + } + + #[alias(double_literal)] + fn numeric_double_literal(input: ParseInput) -> ParseResult<core::Double> { + let s = input.as_str().trim(); + match s.parse::<f64>() { + Ok(x) if x.is_infinite() => Err(input.error(format!( + "Overflow while parsing double literal '{}'", + s + ))), + Ok(x) => Ok(NaiveDouble::from(x)), + Err(e) => Err(input.error(format!("{}", e))), + } + } + + fn natural_literal(input: ParseInput) -> ParseResult<core::Natural> { + input + .as_str() + .trim() + .parse() + .map_err(|e| input.error(format!("{}", e))) + } + + fn integer_literal(input: ParseInput) -> ParseResult<core::Integer> { + input + .as_str() + .trim() + .parse() + .map_err(|e| input.error(format!("{}", e))) + } + + #[alias(expression, shortcut = true)] + fn identifier<E: Clone>(input: ParseInput) -> ParseResult<Expr<E>> { + Ok(match_nodes!(input.children(); + [variable(v)] => spanned(input, Var(v)), + [expression(e)] => e, + )) + } + + fn variable(input: ParseInput) -> ParseResult<V<Label>> { + Ok(match_nodes!(input.into_children(); + [label(l), natural_literal(idx)] => V(l, idx), + [label(l)] => V(l, 0), + )) + } + + #[alias(path_component)] + fn unquoted_path_component(input: ParseInput) -> ParseResult<String> { + Ok(input.as_str().to_string()) + } + #[alias(path_component)] + fn quoted_path_component(input: ParseInput) -> ParseResult<String> { + #[rustfmt::skip] + const RESERVED: &percent_encoding::AsciiSet = + &percent_encoding::CONTROLS + .add(b'=').add(b':').add(b'/').add(b'?') + .add(b'#').add(b'[').add(b']').add(b'@') + .add(b'!').add(b'$').add(b'&').add(b'\'') + .add(b'(').add(b')').add(b'*').add(b'+') + .add(b',').add(b';'); + Ok(input + .as_str() + .chars() + .map(|c| { + // Percent-encode ascii chars + if c.is_ascii() { + percent_encoding::utf8_percent_encode( + &c.to_string(), + RESERVED, + ) + .to_string() + } else { + c.to_string() + } + }) + .collect()) + } + fn path(input: ParseInput) -> ParseResult<FilePath> { + Ok(match_nodes!(input.into_children(); + [path_component(components)..] => { + FilePath { file_path: components.collect() } + } + )) + } + + #[alias(import_type)] + fn local<E: Clone>( + input: ParseInput, + ) -> ParseResult<ImportLocation<Expr<E>>> { + Ok(match_nodes!(input.into_children(); + [local_path((prefix, p))] => ImportLocation::Local(prefix, p), + )) + } + + #[alias(local_path)] + fn parent_path(input: ParseInput) -> ParseResult<(FilePrefix, FilePath)> { + Ok(match_nodes!(input.into_children(); + [path(p)] => (FilePrefix::Parent, p) + )) + } + #[alias(local_path)] + fn here_path(input: ParseInput) -> ParseResult<(FilePrefix, FilePath)> { + Ok(match_nodes!(input.into_children(); + [path(p)] => (FilePrefix::Here, p) + )) + } + #[alias(local_path)] + fn home_path(input: ParseInput) -> ParseResult<(FilePrefix, FilePath)> { + Ok(match_nodes!(input.into_children(); + [path(p)] => (FilePrefix::Home, p) + )) + } + #[alias(local_path)] + fn absolute_path(input: ParseInput) -> ParseResult<(FilePrefix, FilePath)> { + Ok(match_nodes!(input.into_children(); + [path(p)] => (FilePrefix::Absolute, p) + )) + } + + fn scheme(input: ParseInput) -> ParseResult<Scheme> { + Ok(match input.as_str() { + "http" => Scheme::HTTP, + "https" => Scheme::HTTPS, + _ => unreachable!(), + }) + } + + fn http_raw<E: Clone>(input: ParseInput) -> ParseResult<URL<Expr<E>>> { + Ok(match_nodes!(input.into_children(); + [scheme(sch), authority(auth), path(p)] => URL { + scheme: sch, + authority: auth, + path: p, + query: None, + headers: None, + }, + [scheme(sch), authority(auth), path(p), query(q)] => URL { + scheme: sch, + authority: auth, + path: p, + query: Some(q), + headers: None, + }, + )) + } + + fn authority(input: ParseInput) -> ParseResult<String> { + Ok(input.as_str().to_owned()) + } + + fn query(input: ParseInput) -> ParseResult<String> { + Ok(input.as_str().to_owned()) + } + + #[alias(import_type)] + fn http<E: Clone>( + input: ParseInput, + ) -> ParseResult<ImportLocation<Expr<E>>> { + Ok(ImportLocation::Remote(match_nodes!(input.into_children(); + [http_raw(url)] => url, + [http_raw(url), expression(e)] => URL { headers: Some(e), ..url }, + ))) + } + + #[alias(import_type)] + fn env<E: Clone>( + input: ParseInput, + ) -> ParseResult<ImportLocation<Expr<E>>> { + Ok(match_nodes!(input.into_children(); + [environment_variable(v)] => ImportLocation::Env(v), + )) + } + #[alias(environment_variable)] + fn bash_environment_variable(input: ParseInput) -> ParseResult<String> { + Ok(input.as_str().to_owned()) + } + #[alias(environment_variable)] + fn posix_environment_variable(input: ParseInput) -> ParseResult<String> { + Ok(match_nodes!(input.into_children(); + [posix_environment_variable_character(chars)..] => { + chars.collect() + }, + )) + } + fn posix_environment_variable_character( + input: ParseInput, + ) -> ParseResult<&str> { + Ok(match input.as_str() { + "\\\"" => "\"", + "\\\\" => "\\", + "\\a" => "\u{0007}", + "\\b" => "\u{0008}", + "\\f" => "\u{000C}", + "\\n" => "\n", + "\\r" => "\r", + "\\t" => "\t", + "\\v" => "\u{000B}", + s => s, + }) + } + + #[alias(import_type)] + fn missing<E: Clone>( + _input: ParseInput, + ) -> ParseResult<ImportLocation<Expr<E>>> { + Ok(ImportLocation::Missing) + } + + fn hash(input: ParseInput) -> ParseResult<Hash> { + let s = input.as_str().trim(); + let protocol = &s[..6]; + let hash = &s[7..]; + if protocol != "sha256" { + Err(input.error(format!("Unknown hashing protocol '{}'", protocol)))? + } + Ok(Hash::SHA256(hex::decode(hash).unwrap())) + } + + fn import_hashed<E: Clone>( + input: ParseInput, + ) -> ParseResult<crate::syntax::Import<Expr<E>>> { + use crate::syntax::Import; + let mode = ImportMode::Code; + Ok(match_nodes!(input.into_children(); + [import_type(location)] => Import { mode, location, hash: None }, + [import_type(location), hash(h)] => Import { mode, location, hash: Some(h) }, + )) + } + + #[alias(import_mode)] + fn Text(_input: ParseInput) -> ParseResult<ImportMode> { + Ok(ImportMode::RawText) + } + #[alias(import_mode)] + fn Location(_input: ParseInput) -> ParseResult<ImportMode> { + Ok(ImportMode::Location) + } + + #[alias(expression)] + fn import<E: Clone>(input: ParseInput) -> ParseResult<Expr<E>> { + use crate::syntax::Import; + let import = match_nodes!(input.children(); + [import_hashed(imp)] => { + Import { mode: ImportMode::Code, ..imp } + }, + [import_hashed(imp), import_mode(mode)] => { + Import { mode, ..imp } + }, + ); + Ok(spanned(input, Import(import))) + } + + fn lambda(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn forall(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn arrow(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn merge(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn assert(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn if_(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn toMap(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + + #[alias(expression)] + fn empty_list_literal<E: Clone>(input: ParseInput) -> ParseResult<Expr<E>> { + Ok(match_nodes!(input.children(); + [expression(e)] => spanned(input, EmptyListLit(e)), + )) + } + + fn expression<E: Clone>(input: ParseInput) -> ParseResult<Expr<E>> { + Ok(match_nodes!(input.children(); + [lambda(()), label(l), expression(typ), + arrow(()), expression(body)] => { + spanned(input, Lam(l, typ, body)) + }, + [if_(()), expression(cond), expression(left), + expression(right)] => { + spanned(input, BoolIf(cond, left, right)) + }, + [let_binding(bindings).., expression(final_expr)] => { + bindings.rev().fold( + final_expr, + |acc, x| { + spanned_union( + acc.span(), + x.3, + Let(x.0, x.1, x.2, acc) + ) + } + ) + }, + [forall(()), label(l), expression(typ), + arrow(()), expression(body)] => { + spanned(input, Pi(l, typ, body)) + }, + [expression(typ), arrow(()), expression(body)] => { + spanned(input, Pi("_".into(), typ, body)) + }, + [merge(()), expression(x), expression(y), expression(z)] => { + spanned(input, Merge(x, y, Some(z))) + }, + [assert(()), expression(x)] => { + spanned(input, Assert(x)) + }, + [toMap(()), expression(x), expression(y)] => { + spanned(input, ToMap(x, Some(y))) + }, + [expression(e), expression(annot)] => { + spanned(input, Annot(e, annot)) + }, + [expression(e)] => e, + )) + } + + fn let_binding<E: Clone>( + input: ParseInput, + ) -> ParseResult<(Label, Option<Expr<E>>, Expr<E>, Span)> { + Ok(match_nodes!(input.children(); + [label(name), expression(annot), expression(expr)] => + (name, Some(annot), expr, input_to_span(input)), + [label(name), expression(expr)] => + (name, None, expr, input_to_span(input)), + )) + } + + #[alias(expression, shortcut = true)] + #[prec_climb(expression, PRECCLIMBER)] + fn operator_expression<E: Clone>( + l: Expr<E>, + op: ParseInput, + r: Expr<E>, + ) -> ParseResult<Expr<E>> { + use crate::syntax::BinOp::*; + use Rule::*; + let op = match op.as_rule() { + import_alt => ImportAlt, + bool_or => BoolOr, + natural_plus => NaturalPlus, + text_append => TextAppend, + list_append => ListAppend, + bool_and => BoolAnd, + combine => RecursiveRecordMerge, + prefer => RightBiasedRecordMerge, + combine_types => RecursiveRecordTypeMerge, + natural_times => NaturalTimes, + bool_eq => BoolEQ, + bool_ne => BoolNE, + equivalent => Equivalence, + r => Err(op.error(format!("Rule {:?} isn't an operator", r)))?, + }; + + Ok(spanned_union(l.span(), r.span(), BinOp(op, l, r))) + } + + fn Some_(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + + #[alias(expression, shortcut = true)] + fn application_expression<E: Clone>( + input: ParseInput, + ) -> ParseResult<Expr<E>> { + Ok(match_nodes!(input.children(); + [expression(e)] => e, + [expression(first), expression(rest)..] => { + rest.fold( + first, + |acc, e| { + spanned_union( + acc.span(), + e.span(), + App(acc, e) + ) + } + ) + }, + )) + } + + #[alias(expression, shortcut = true)] + fn first_application_expression<E: Clone>( + input: ParseInput, + ) -> ParseResult<Expr<E>> { + Ok(match_nodes!(input.children(); + [Some_(()), expression(e)] => { + spanned(input, SomeLit(e)) + }, + [merge(()), expression(x), expression(y)] => { + spanned(input, Merge(x, y, None)) + }, + [toMap(()), expression(x)] => { + spanned(input, ToMap(x, None)) + }, + [expression(e)] => e, + )) + } + + #[alias(expression, shortcut = true)] + fn selector_expression<E: Clone>( + input: ParseInput, + ) -> ParseResult<Expr<E>> { + Ok(match_nodes!(input.children(); + [expression(e)] => e, + [expression(first), selector(rest)..] => { + rest.fold( + first, + |acc, e| { + spanned_union( + acc.span(), + e.1, + match e.0 { + Selector::Field(l) => Field(acc, l), + Selector::Projection(ls) => Projection(acc, ls), + Selector::ProjectionByExpr(e) => ProjectionByExpr(acc, e) + } + ) + } + ) + }, + )) + } + + fn selector<E: Clone>( + input: ParseInput, + ) -> ParseResult<(Selector<E>, Span)> { + let stor = match_nodes!(input.children(); + [label(l)] => Selector::Field(l), + [labels(ls)] => Selector::Projection(ls), + [expression(e)] => Selector::ProjectionByExpr(e), + ); + Ok((stor, input_to_span(input))) + } + + fn labels(input: ParseInput) -> ParseResult<DupTreeSet<Label>> { + Ok(match_nodes!(input.into_children(); + [label(ls)..] => ls.collect(), + )) + } + + #[alias(expression, shortcut = true)] + fn primitive_expression<E: Clone>( + input: ParseInput, + ) -> ParseResult<Expr<E>> { + Ok(match_nodes!(input.children(); + [double_literal(n)] => spanned(input, DoubleLit(n)), + [natural_literal(n)] => spanned(input, NaturalLit(n)), + [integer_literal(n)] => spanned(input, IntegerLit(n)), + [double_quote_literal(s)] => spanned(input, TextLit(s)), + [single_quote_literal(s)] => spanned(input, TextLit(s)), + [expression(e)] => e, + )) + } + + #[alias(expression)] + fn empty_record_literal<E: Clone>( + input: ParseInput, + ) -> ParseResult<Expr<E>> { + Ok(spanned(input, RecordLit(Default::default()))) + } + + #[alias(expression)] + fn empty_record_type<E: Clone>(input: ParseInput) -> ParseResult<Expr<E>> { + Ok(spanned(input, RecordType(Default::default()))) + } + + #[alias(expression)] + fn non_empty_record_type_or_literal<E: Clone>( + input: ParseInput, + ) -> ParseResult<Expr<E>> { + let e = match_nodes!(input.children(); + [label(first_label), non_empty_record_type(rest)] => { + let (first_expr, mut map) = rest; + map.insert(first_label, first_expr); + RecordType(map) + }, + [label(first_label), non_empty_record_literal(rest)] => { + let (first_expr, mut map) = rest; + map.insert(first_label, first_expr); + RecordLit(map) + }, + ); + Ok(spanned(input, e)) + } + + fn non_empty_record_type<E: Clone>( + input: ParseInput, + ) -> ParseResult<(Expr<E>, DupTreeMap<Label, Expr<E>>)> { + Ok(match_nodes!(input.into_children(); + [expression(expr), record_type_entry(entries)..] => { + (expr, entries.collect()) + } + )) + } + + fn record_type_entry<E: Clone>( + input: ParseInput, + ) -> ParseResult<(Label, Expr<E>)> { + Ok(match_nodes!(input.into_children(); + [label(name), expression(expr)] => (name, expr) + )) + } + + fn non_empty_record_literal<E: Clone>( + input: ParseInput, + ) -> ParseResult<(Expr<E>, DupTreeMap<Label, Expr<E>>)> { + Ok(match_nodes!(input.into_children(); + [expression(expr), record_literal_entry(entries)..] => { + (expr, entries.collect()) + } + )) + } + + fn record_literal_entry<E: Clone>( + input: ParseInput, + ) -> ParseResult<(Label, Expr<E>)> { + Ok(match_nodes!(input.into_children(); + [label(name), expression(expr)] => (name, expr) + )) + } + + #[alias(expression)] + fn union_type<E: Clone>(input: ParseInput) -> ParseResult<Expr<E>> { + let map = match_nodes!(input.children(); + [empty_union_type(_)] => Default::default(), + [union_type_entry(entries)..] => entries.collect(), + ); + Ok(spanned(input, UnionType(map))) + } + + fn empty_union_type(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + + fn union_type_entry<E: Clone>( + input: ParseInput, + ) -> ParseResult<(Label, Option<Expr<E>>)> { + Ok(match_nodes!(input.children(); + [label(name), expression(expr)] => (name, Some(expr)), + [label(name)] => (name, None), + )) + } + + #[alias(expression)] + fn non_empty_list_literal<E: Clone>( + input: ParseInput, + ) -> ParseResult<Expr<E>> { + Ok(match_nodes!(input.children(); + [expression(items)..] => spanned( + input, + NEListLit(items.collect()) + ) + )) + } + + #[alias(expression)] + fn final_expression<E: Clone>(input: ParseInput) -> ParseResult<Expr<E>> { + Ok(match_nodes!(input.into_children(); + [expression(e), EOI(_)] => e + )) + } +} + +pub fn parse_expr<E: Clone>(input_str: &str) -> ParseResult<Expr<E>> { + let rc_input_str = input_str.to_string().into(); + let inputs = DhallParser::parse_with_userdata( + Rule::final_expression, + input_str, + rc_input_str, + )?; + Ok(match_nodes!(<DhallParser>; inputs; + [expression(e)] => e, + )) +} diff --git a/dhall/src/syntax/printer.rs b/dhall/src/syntax/printer.rs new file mode 100644 index 0000000..8df456b --- /dev/null +++ b/dhall/src/syntax/printer.rs @@ -0,0 +1,500 @@ +use crate::syntax::*; +use itertools::Itertools; +use std::fmt::{self, Display}; + +/// Generic instance that delegates to subexpressions +impl<SE: Display + Clone, E: Display> Display for ExprF<SE, E> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::syntax::ExprF::*; + match self { + Lam(a, b, c) => { + write!(f, "λ({} : {}) → {}", a, b, c)?; + } + BoolIf(a, b, c) => { + write!(f, "if {} then {} else {}", a, b, c)?; + } + Pi(a, b, c) if &String::from(a) == "_" => { + write!(f, "{} → {}", b, c)?; + } + Pi(a, b, c) => { + write!(f, "∀({} : {}) → {}", a, b, c)?; + } + Let(a, b, c, d) => { + write!(f, "let {}", a)?; + if let Some(b) = b { + write!(f, " : {}", b)?; + } + write!(f, " = {} in {}", c, d)?; + } + EmptyListLit(t) => { + write!(f, "[] : {}", t)?; + } + NEListLit(es) => { + fmt_list("[", ", ", "]", es, f, Display::fmt)?; + } + SomeLit(e) => { + write!(f, "Some {}", e)?; + } + Merge(a, b, c) => { + write!(f, "merge {} {}", a, b)?; + if let Some(c) = c { + write!(f, " : {}", c)?; + } + } + ToMap(a, b) => { + write!(f, "toMap {}", a)?; + if let Some(b) = b { + write!(f, " : {}", b)?; + } + } + Annot(a, b) => { + write!(f, "{} : {}", a, b)?; + } + Assert(a) => { + write!(f, "assert : {}", a)?; + } + ExprF::BinOp(op, a, b) => { + write!(f, "{} {} {}", a, op, b)?; + } + ExprF::App(a, b) => { + write!(f, "{} {}", a, b)?; + } + Field(a, b) => { + write!(f, "{}.{}", a, b)?; + } + Projection(e, ls) => { + write!(f, "{}.", e)?; + fmt_list("{ ", ", ", " }", ls, f, Display::fmt)?; + } + ProjectionByExpr(a, b) => { + write!(f, "{}.({})", a, b)?; + } + Var(a) => a.fmt(f)?, + Const(k) => k.fmt(f)?, + Builtin(v) => v.fmt(f)?, + BoolLit(true) => f.write_str("True")?, + BoolLit(false) => f.write_str("False")?, + NaturalLit(a) => a.fmt(f)?, + IntegerLit(a) if *a >= 0 => { + f.write_str("+")?; + a.fmt(f)?; + } + IntegerLit(a) => a.fmt(f)?, + DoubleLit(a) => a.fmt(f)?, + TextLit(a) => a.fmt(f)?, + RecordType(a) if a.is_empty() => f.write_str("{}")?, + RecordType(a) => fmt_list("{ ", ", ", " }", a, f, |(k, t), f| { + write!(f, "{} : {}", k, t) + })?, + RecordLit(a) if a.is_empty() => f.write_str("{=}")?, + RecordLit(a) => fmt_list("{ ", ", ", " }", a, f, |(k, v), f| { + write!(f, "{} = {}", k, v) + })?, + UnionType(a) => fmt_list("< ", " | ", " >", a, f, |(k, v), f| { + write!(f, "{}", k)?; + if let Some(v) = v { + write!(f, ": {}", v)?; + } + Ok(()) + })?, + Import(a) => a.fmt(f)?, + Embed(a) => a.fmt(f)?, + } + Ok(()) + } +} + +// There is a one-to-one correspondence between the formatter and the grammar. Each phase is +// named after a corresponding grammar group, and the structure of the formatter reflects +// the relationship between the corresponding grammar rules. This leads to the nice property +// of automatically getting all the parentheses and precedences right. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +enum PrintPhase { + Base, + Operator, + BinOp(core::BinOp), + App, + Import, + Primitive, +} + +// Wraps an Expr with a phase, so that phase selsction can be done +// separate from the actual printing +#[derive(Clone)] +struct PhasedExpr<'a, A>(&'a Expr<A>, PrintPhase); + +impl<'a, A: Display + Clone> Display for PhasedExpr<'a, A> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + self.0.as_ref().fmt_phase(f, self.1) + } +} + +impl<'a, A: Display + Clone> PhasedExpr<'a, A> { + fn phase(self, phase: PrintPhase) -> PhasedExpr<'a, A> { + PhasedExpr(self.0, phase) + } +} + +impl<A: Display + Clone> RawExpr<A> { + fn fmt_phase( + &self, + f: &mut fmt::Formatter, + phase: PrintPhase, + ) -> Result<(), fmt::Error> { + use crate::syntax::ExprF::*; + use PrintPhase::*; + + let needs_paren = match self { + Lam(_, _, _) + | BoolIf(_, _, _) + | Pi(_, _, _) + | Let(_, _, _, _) + | EmptyListLit(_) + | NEListLit(_) + | SomeLit(_) + | Merge(_, _, _) + | ToMap(_, _) + | Annot(_, _) + if phase > Base => + { + true + } + // Precedence is magically handled by the ordering of BinOps. + ExprF::BinOp(op, _, _) if phase > PrintPhase::BinOp(*op) => true, + ExprF::App(_, _) if phase > PrintPhase::App => true, + Field(_, _) | Projection(_, _) | ProjectionByExpr(_, _) + if phase > PrintPhase::Import => + { + true + } + _ => false, + }; + + // Annotate subexpressions with the appropriate phase, defaulting to Base + let phased_self = match self.map_ref(|e| PhasedExpr(e, Base)) { + Pi(a, b, c) => { + if &String::from(&a) == "_" { + Pi(a, b.phase(Operator), c) + } else { + Pi(a, b, c) + } + } + Merge(a, b, c) => Merge( + a.phase(PrintPhase::Import), + b.phase(PrintPhase::Import), + c.map(|x| x.phase(PrintPhase::App)), + ), + ToMap(a, b) => ToMap( + a.phase(PrintPhase::Import), + b.map(|x| x.phase(PrintPhase::App)), + ), + Annot(a, b) => Annot(a.phase(Operator), b), + ExprF::BinOp(op, a, b) => ExprF::BinOp( + op, + a.phase(PrintPhase::BinOp(op)), + b.phase(PrintPhase::BinOp(op)), + ), + SomeLit(e) => SomeLit(e.phase(PrintPhase::Import)), + ExprF::App(f, a) => ExprF::App( + f.phase(PrintPhase::Import), + a.phase(PrintPhase::Import), + ), + Field(a, b) => Field(a.phase(Primitive), b), + Projection(e, ls) => Projection(e.phase(Primitive), ls), + ProjectionByExpr(a, b) => ProjectionByExpr(a.phase(Primitive), b), + e => e, + }; + + if needs_paren { + f.write_str("(")?; + } + + // Uses the ExprF<PhasedExpr<_>, _> instance + phased_self.fmt(f)?; + + if needs_paren { + f.write_str(")")?; + } + Ok(()) + } +} + +impl<A: Display + Clone> Display for Expr<A> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + self.as_ref().fmt_phase(f, PrintPhase::Base) + } +} + +fn fmt_list<T, I, F>( + open: &str, + sep: &str, + close: &str, + it: I, + f: &mut fmt::Formatter, + func: F, +) -> Result<(), fmt::Error> +where + I: IntoIterator<Item = T>, + F: Fn(T, &mut fmt::Formatter) -> Result<(), fmt::Error>, +{ + f.write_str(open)?; + for (i, x) in it.into_iter().enumerate() { + if i > 0 { + f.write_str(sep)?; + } + func(x, f)?; + } + f.write_str(close) +} + +impl<SubExpr: Display> Display for InterpolatedText<SubExpr> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + f.write_str("\"")?; + for x in self.iter() { + match x { + InterpolatedTextContents::Text(a) => { + for c in a.chars() { + match c { + '\\' => f.write_str("\\\\"), + '"' => f.write_str("\\\""), + '$' => f.write_str("\\u0024"), + '\u{0008}' => f.write_str("\\b"), + '\u{000C}' => f.write_str("\\f"), + '\n' => f.write_str("\\n"), + '\r' => f.write_str("\\r"), + '\t' => f.write_str("\\t"), + '\u{0000}'..='\u{001F}' => { + // Escape to an explicit "\u{XXXX}" form + let escaped: String = + c.escape_default().collect(); + // Print as "\uXXXX" + write!( + f, + "\\u{:0>4}", + &escaped[3..escaped.len() - 1] + ) + } + c => write!(f, "{}", c), + }?; + } + } + InterpolatedTextContents::Expr(e) => { + f.write_str("${ ")?; + e.fmt(f)?; + f.write_str(" }")?; + } + } + } + f.write_str("\"")?; + Ok(()) + } +} + +impl Display for Const { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + <Self as fmt::Debug>::fmt(self, f) + } +} + +impl Display for BinOp { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::syntax::BinOp::*; + f.write_str(match self { + BoolOr => "||", + TextAppend => "++", + NaturalPlus => "+", + BoolAnd => "&&", + RecursiveRecordMerge => "∧", + NaturalTimes => "*", + BoolEQ => "==", + BoolNE => "!=", + RecursiveRecordTypeMerge => "⩓", + ImportAlt => "?", + RightBiasedRecordMerge => "⫽", + ListAppend => "#", + Equivalence => "≡", + }) + } +} + +impl Display for NaiveDouble { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let v = f64::from(*self); + if v == std::f64::INFINITY { + f.write_str("Infinity") + } else if v == std::f64::NEG_INFINITY { + f.write_str("-Infinity") + } else if v.is_nan() { + f.write_str("NaN") + } else { + let s = format!("{}", v); + if s.contains('e') || s.contains('.') { + f.write_str(&s) + } else { + write!(f, "{}.0", s) + } + } + } +} + +impl Display for Label { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + // TODO: distinguish between reserved and nonreserved locations for quoting builtins + let s = String::from(self); + let is_reserved = match s.as_str() { + "let" | "in" | "if" | "then" | "else" | "Type" | "Kind" + | "Sort" | "True" | "False" => true, + _ => crate::syntax::Builtin::parse(&s).is_some(), + }; + if !is_reserved && s.chars().all(|c| c.is_ascii_alphanumeric()) { + write!(f, "{}", s) + } else { + write!(f, "`{}`", s) + } + } +} + +impl Display for Hash { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match self { + Hash::SHA256(hash) => write!(f, "sha256:{}", hex::encode(hash)), + } + } +} +impl<SubExpr: Display> Display for Import<SubExpr> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use FilePrefix::*; + use ImportLocation::*; + use ImportMode::*; + let quote_if_needed = |s: &str| -> String { + if s.chars().all(|c| c.is_ascii_alphanumeric()) { + s.to_string() + } else { + format!("\"{}\"", s) + } + }; + + match &self.location { + Local(prefix, path) => { + let prefix = match prefix { + Here => ".", + Parent => "..", + Home => "~", + Absolute => "", + }; + write!(f, "{}/", prefix)?; + let path: String = path + .file_path + .iter() + .map(|c| quote_if_needed(&*c)) + .join("/"); + f.write_str(&path)?; + } + Remote(url) => { + write!(f, "{}://{}/", url.scheme, url.authority,)?; + let path: String = url.path.file_path.iter().join("/"); + f.write_str(&path)?; + if let Some(q) = &url.query { + write!(f, "?{}", q)? + } + if let Some(h) = &url.headers { + write!(f, " using ({})", h)? + } + } + Env(s) => { + write!(f, "env:")?; + if s.chars().all(|c| c.is_ascii_alphanumeric()) { + write!(f, "{}", s)?; + } else { + write!(f, "\"")?; + for c in s.chars() { + match c { + '"' => f.write_str("\\\"")?, + '\\' => f.write_str("\\\\")?, + '\u{0007}' => f.write_str("\\a")?, + '\u{0008}' => f.write_str("\\b")?, + '\u{000C}' => f.write_str("\\f")?, + '\n' => f.write_str("\\n")?, + '\r' => f.write_str("\\r")?, + '\t' => f.write_str("\\t")?, + '\u{000B}' => f.write_str("\\v")?, + _ => write!(f, "{}", c)?, + } + } + write!(f, "\"")?; + } + } + Missing => { + write!(f, "missing")?; + } + } + if let Some(hash) = &self.hash { + write!(f, " ")?; + hash.fmt(f)?; + } + match self.mode { + Code => {} + RawText => write!(f, " as Text")?, + Location => write!(f, " as Location")?, + } + Ok(()) + } +} + +impl Display for Builtin { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::syntax::Builtin::*; + f.write_str(match *self { + Bool => "Bool", + Natural => "Natural", + Integer => "Integer", + Double => "Double", + Text => "Text", + List => "List", + Optional => "Optional", + OptionalNone => "None", + NaturalBuild => "Natural/build", + NaturalFold => "Natural/fold", + NaturalIsZero => "Natural/isZero", + NaturalEven => "Natural/even", + NaturalOdd => "Natural/odd", + NaturalToInteger => "Natural/toInteger", + NaturalShow => "Natural/show", + NaturalSubtract => "Natural/subtract", + IntegerToDouble => "Integer/toDouble", + IntegerShow => "Integer/show", + DoubleShow => "Double/show", + ListBuild => "List/build", + ListFold => "List/fold", + ListLength => "List/length", + ListHead => "List/head", + ListLast => "List/last", + ListIndexed => "List/indexed", + ListReverse => "List/reverse", + OptionalFold => "Optional/fold", + OptionalBuild => "Optional/build", + TextShow => "Text/show", + }) + } +} + +impl Display for Scheme { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::syntax::Scheme::*; + f.write_str(match *self { + HTTP => "http", + HTTPS => "https", + }) + } +} + +impl<Label: Display> Display for V<Label> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let V(x, n) = self; + x.fmt(f)?; + if *n != 0 { + write!(f, "@{}", n)?; + } + Ok(()) + } +} |