From 45be2ff1f5bb3d6e0faa098402adf985b3d5e7ca Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sat, 4 May 2019 12:38:36 +0200 Subject: Rename dhall_core to dhall_syntax --- dhall_syntax/src/context.rs | 80 ++++ dhall_syntax/src/core.rs | 563 +++++++++++++++++++++++++ dhall_syntax/src/import.rs | 64 +++ dhall_syntax/src/label.rs | 34 ++ dhall_syntax/src/lib.rs | 28 ++ dhall_syntax/src/parser.rs | 984 ++++++++++++++++++++++++++++++++++++++++++++ dhall_syntax/src/printer.rs | 498 ++++++++++++++++++++++ dhall_syntax/src/text.rs | 116 ++++++ dhall_syntax/src/visitor.rs | 667 ++++++++++++++++++++++++++++++ 9 files changed, 3034 insertions(+) create mode 100644 dhall_syntax/src/context.rs create mode 100644 dhall_syntax/src/core.rs create mode 100644 dhall_syntax/src/import.rs create mode 100644 dhall_syntax/src/label.rs create mode 100644 dhall_syntax/src/lib.rs create mode 100644 dhall_syntax/src/parser.rs create mode 100644 dhall_syntax/src/printer.rs create mode 100644 dhall_syntax/src/text.rs create mode 100644 dhall_syntax/src/visitor.rs (limited to 'dhall_syntax/src') diff --git a/dhall_syntax/src/context.rs b/dhall_syntax/src/context.rs new file mode 100644 index 0000000..55bfff5 --- /dev/null +++ b/dhall_syntax/src/context.rs @@ -0,0 +1,80 @@ +use std::cmp::Eq; +use std::collections::HashMap; +use std::hash::Hash; + +/// A `(Context a)` associates `Text` labels with values of type `a` +/// +/// The `Context` is used for type-checking when `(a = Expr X)` +/// +/// * You create a `Context` using `empty` and `insert` +/// * You transform a `Context` using `fmap` +/// * You consume a `Context` using `lookup` and `toList` +/// +/// The difference between a `Context` and a `Map` is that a `Context` lets you +/// have multiple ordered occurrences of the same key and you can query for the +/// `n`th occurrence of a given key. +/// +#[derive(Debug, Clone)] +pub struct Context<K: Eq + Hash, T>(HashMap<K, Vec<T>>); + +impl<K: Hash + Eq + Clone, T> Context<K, T> { + /// An empty context with no key-value pairs + pub fn new() -> Self { + Context(HashMap::new()) + } + + /// Look up a key by name and index + /// + /// ```c + /// lookup _ _ empty = Nothing + /// lookup k 0 (insert k v c) = Just v + /// lookup k n (insert k v c) = lookup k (n - 1) c -- 1 <= n + /// lookup k n (insert j v c) = lookup k n c -- k /= j + /// ``` + pub fn lookup<'a>(&'a self, k: &K, n: usize) -> Option<&'a T> { + self.0.get(k).and_then(|v| { + if n < v.len() { + v.get(v.len() - 1 - n) + } else { + None + } + }) + } + + pub fn map<U, F: Fn(&K, &T) -> U>(&self, f: F) -> Context<K, U> { + Context( + self.0 + .iter() + .map(|(k, vs)| { + ((*k).clone(), vs.iter().map(|v| f(k, v)).collect()) + }) + .collect(), + ) + } + + pub fn lookup_all<'a>(&'a self, k: &K) -> impl Iterator<Item = &T> { + self.0.get(k).into_iter().flat_map(|v| v.iter()) + } + + pub fn iter<'a>(&'a self) -> impl Iterator<Item = (&K, &T)> { + self.0 + .iter() + .flat_map(|(k, vs)| vs.iter().map(move |v| (k, v))) + } + + pub fn iter_keys<'a>(&'a self) -> impl Iterator<Item = (&K, &Vec<T>)> { + self.0.iter() + } +} + +impl<K: Hash + Eq + Clone, T: Clone> Context<K, T> { + /// Add a key-value pair to the `Context` + pub fn insert(&self, k: K, v: T) -> Self { + let mut ctx = (*self).clone(); + { + let m = ctx.0.entry(k).or_insert_with(Vec::new); + m.push(v); + } + ctx + } +} diff --git a/dhall_syntax/src/core.rs b/dhall_syntax/src/core.rs new file mode 100644 index 0000000..3db07dd --- /dev/null +++ b/dhall_syntax/src/core.rs @@ -0,0 +1,563 @@ +#![allow(non_snake_case)] +use std::collections::BTreeMap; +use std::collections::HashMap; +use std::rc::Rc; + +use crate::context::Context; +use crate::visitor; +use crate::*; + +pub type Integer = isize; +pub type Natural = usize; +pub type Double = NaiveDouble; + +/// An empty type +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum X {} + +pub fn trivial_result<T>(x: Result<T, X>) -> T { + match x { + Ok(x) => x, + Err(e) => match e {}, + } +} + +/// Double with bitwise equality +#[derive(Debug, Copy, Clone)] +pub struct NaiveDouble(f64); + +impl PartialEq for NaiveDouble { + fn eq(&self, other: &Self) -> bool { + self.0.to_bits() == other.0.to_bits() + } +} + +impl Eq for NaiveDouble {} + +impl From<f64> for NaiveDouble { + fn from(x: f64) -> Self { + NaiveDouble(x) + } +} + +impl From<NaiveDouble> for f64 { + fn from(x: NaiveDouble) -> f64 { + x.0 + } +} + +/// Constants for a pure type system +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum Const { + Type, + Kind, + Sort, +} + +/// Bound variable +/// +/// The `Label` field is the variable's name (i.e. \"`x`\"). +/// The `Int` field is a DeBruijn index. +/// See dhall-lang/standard/semantics.md for details +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct V<Label>(pub Label, pub usize); + +// This is only for the specific `Label` type, not generic +impl From<Label> for V<Label> { + fn from(x: Label) -> V<Label> { + V(x, 0) + } +} +impl<'a> From<&'a Label> for V<Label> { + fn from(x: &'a Label) -> V<Label> { + V(x.clone(), 0) + } +} + +// Definition order must match precedence order for +// pretty-printing to work correctly +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord)] +pub enum BinOp { + /// x ? y + ImportAlt, + /// x || y` + BoolOr, + /// x + y` + NaturalPlus, + /// x ++ y` + TextAppend, + /// x # y + ListAppend, + /// x && y` + BoolAnd, + /// x ∧ y` + Combine, + /// x // y + Prefer, + /// x //\\ y + CombineTypes, + /// x * y` + NaturalTimes, + /// x == y` + BoolEQ, + /// x != y` + BoolNE, +} + +/// Built-ins +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum Builtin { + Bool, + Natural, + Integer, + Double, + Text, + List, + Optional, + OptionalNone, + NaturalBuild, + NaturalFold, + NaturalIsZero, + NaturalEven, + NaturalOdd, + NaturalToInteger, + NaturalShow, + IntegerToDouble, + IntegerShow, + DoubleShow, + ListBuild, + ListFold, + ListLength, + ListHead, + ListLast, + ListIndexed, + ListReverse, + OptionalFold, + OptionalBuild, + TextShow, +} + +pub type ParsedExpr = SubExpr<X, Import>; +pub type ResolvedExpr = SubExpr<X, X>; +pub type DhallExpr = ResolvedExpr; + +#[derive(Debug, PartialEq, Eq)] +pub struct SubExpr<Note, Embed>(pub Rc<Expr<Note, Embed>>); + +pub type Expr<Note, Embed> = ExprF<SubExpr<Note, Embed>, Label, Note, Embed>; + +/// Syntax tree for expressions +// Having the recursion out of the enum definition enables writing +// much more generic code and improves pattern-matching behind +// smart pointers. +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ExprF<SubExpr, Label, Note, Embed> { + Const(Const), + /// `x` + /// `x@n` + Var(V<Label>), + /// `λ(x : A) -> b` + Lam(Label, SubExpr, SubExpr), + /// `A -> B` + /// `∀(x : A) -> B` + Pi(Label, SubExpr, SubExpr), + /// `f a` + App(SubExpr, SubExpr), + /// `let x = r in e` + /// `let x : t = r in e` + Let(Label, Option<SubExpr>, SubExpr, SubExpr), + /// `x : t` + Annot(SubExpr, SubExpr), + /// Built-in values + Builtin(Builtin), + // Binary operations + BinOp(BinOp, SubExpr, SubExpr), + /// `True` + BoolLit(bool), + /// `if x then y else z` + BoolIf(SubExpr, SubExpr, SubExpr), + /// `1` + NaturalLit(Natural), + /// `+2` + IntegerLit(Integer), + /// `3.24` + DoubleLit(Double), + /// `"Some ${interpolated} text"` + TextLit(InterpolatedText<SubExpr>), + /// `[] : List t` + EmptyListLit(SubExpr), + /// `[x, y, z]` + NEListLit(Vec<SubExpr>), + /// Deprecated Optional literal form + /// `[] : Optional a` + /// `[x] : Optional a` + OldOptionalLit(Option<SubExpr>, SubExpr), + /// `Some e` + SomeLit(SubExpr), + /// `{ k1 : t1, k2 : t1 }` + RecordType(BTreeMap<Label, SubExpr>), + /// `{ k1 = v1, k2 = v2 }` + RecordLit(BTreeMap<Label, SubExpr>), + /// `< k1 : t1, k2 >` + UnionType(BTreeMap<Label, Option<SubExpr>>), + /// `< k1 = t1, k2 : t2, k3 >` + UnionLit(Label, SubExpr, BTreeMap<Label, Option<SubExpr>>), + /// `merge x y : t` + Merge(SubExpr, SubExpr, Option<SubExpr>), + /// `e.x` + Field(SubExpr, Label), + /// `e.{ x, y, z }` + Projection(SubExpr, Vec<Label>), + /// Annotation on the AST. Unused for now but could hold e.g. file location information + Note(Note, SubExpr), + /// Embeds an import or the result of resolving the import + Embed(Embed), +} + +impl<SE, L, N, E> ExprF<SE, L, N, E> { + pub(crate) fn visit<'a, V, Return>(&'a self, v: V) -> Return + where + V: visitor::GenericVisitor<&'a ExprF<SE, L, N, E>, Return>, + { + v.visit(self) + } + + fn traverse_ref_with_special_handling_of_binders<'a, SE2, L2, N2, E2, Err>( + &'a self, + visit_subexpr: impl FnMut(&'a SE) -> Result<SE2, Err>, + visit_under_binder: impl FnOnce(&'a L, &'a SE) -> Result<SE2, Err>, + visit_note: impl FnOnce(&'a N) -> Result<N2, Err>, + visit_embed: impl FnOnce(&'a E) -> Result<E2, Err>, + visit_label: impl FnMut(&'a L) -> Result<L2, Err>, + ) -> Result<ExprF<SE2, L2, N2, E2>, Err> + where + L: Ord, + L2: Ord, + { + self.visit(visitor::TraverseRefWithBindersVisitor { + visit_subexpr, + visit_under_binder, + visit_note, + visit_embed, + visit_label, + }) + } + + fn traverse_ref<'a, SE2, L2, N2, E2, Err>( + &'a self, + visit_subexpr: impl FnMut(&'a SE) -> Result<SE2, Err>, + visit_note: impl FnOnce(&'a N) -> Result<N2, Err>, + visit_embed: impl FnOnce(&'a E) -> Result<E2, Err>, + visit_label: impl FnMut(&'a L) -> Result<L2, Err>, + ) -> Result<ExprF<SE2, L2, N2, E2>, Err> + where + L: Ord, + L2: Ord, + { + self.visit(visitor::TraverseRefVisitor { + visit_subexpr, + visit_note, + visit_embed, + visit_label, + }) + } + + pub fn map_ref_with_special_handling_of_binders<'a, SE2, L2, N2, E2>( + &'a self, + mut map_subexpr: impl FnMut(&'a SE) -> SE2, + mut map_under_binder: impl FnMut(&'a L, &'a SE) -> SE2, + map_note: impl FnOnce(&'a N) -> N2, + map_embed: impl FnOnce(&'a E) -> E2, + mut map_label: impl FnMut(&'a L) -> L2, + ) -> ExprF<SE2, L2, N2, E2> + where + L: Ord, + L2: Ord, + { + trivial_result(self.traverse_ref_with_special_handling_of_binders( + |x| Ok(map_subexpr(x)), + |l, x| Ok(map_under_binder(l, x)), + |x| Ok(map_note(x)), + |x| Ok(map_embed(x)), + |x| Ok(map_label(x)), + )) + } + + pub fn map_ref<'a, SE2, L2, N2, E2>( + &'a self, + mut map_subexpr: impl FnMut(&'a SE) -> SE2, + map_note: impl FnOnce(&'a N) -> N2, + map_embed: impl FnOnce(&'a E) -> E2, + mut map_label: impl FnMut(&'a L) -> L2, + ) -> ExprF<SE2, L2, N2, E2> + where + L: Ord, + L2: Ord, + { + trivial_result(self.traverse_ref( + |x| Ok(map_subexpr(x)), + |x| Ok(map_note(x)), + |x| Ok(map_embed(x)), + |x| Ok(map_label(x)), + )) + } + + pub fn traverse_ref_simple<'a, SE2, Err>( + &'a self, + visit_subexpr: impl FnMut(&'a SE) -> Result<SE2, Err>, + ) -> Result<ExprF<SE2, L, N, E>, Err> + where + L: Ord + Clone, + N: Clone, + E: Clone, + { + self.traverse_ref( + visit_subexpr, + |x| Ok(N::clone(x)), + |x| Ok(E::clone(x)), + |x| Ok(L::clone(x)), + ) + } + + pub fn map_ref_simple<'a, SE2>( + &'a self, + map_subexpr: impl Fn(&'a SE) -> SE2, + ) -> ExprF<SE2, L, N, E> + where + L: Ord + Clone, + N: Clone, + E: Clone, + { + self.map_ref(map_subexpr, N::clone, E::clone, L::clone) + } +} + +impl<N, E> Expr<N, E> { + fn traverse_embed<E2, Err>( + &self, + visit_embed: impl FnMut(&E) -> Result<E2, Err>, + ) -> Result<Expr<N, E2>, Err> + where + N: Clone, + { + self.visit(&mut visitor::TraverseEmbedVisitor(visit_embed)) + } + + fn map_embed<E2>(&self, mut map_embed: impl FnMut(&E) -> E2) -> Expr<N, E2> + where + N: Clone, + { + trivial_result(self.traverse_embed(|x| Ok(map_embed(x)))) + } + + pub fn roll(&self) -> SubExpr<N, E> + where + N: Clone, + E: Clone, + { + rc(ExprF::clone(self)) + } + + pub fn squash_embed<E2>( + &self, + f: impl FnMut(&E) -> SubExpr<N, E2>, + ) -> SubExpr<N, E2> + where + N: Clone, + { + trivial_result(self.visit(&mut visitor::SquashEmbedVisitor(f))) + } +} + +impl<E: Clone> Expr<X, E> { + pub fn note_absurd<N>(&self) -> Expr<N, E> { + self.visit(&mut visitor::NoteAbsurdVisitor) + } +} + +impl<N: Clone> Expr<N, X> { + pub fn embed_absurd<E>(&self) -> Expr<N, E> { + self.visit(&mut visitor::EmbedAbsurdVisitor) + } +} + +impl<N, E> SubExpr<N, E> { + pub fn as_ref(&self) -> &Expr<N, E> { + self.0.as_ref() + } + + pub fn traverse_embed<E2, Err>( + &self, + visit_embed: impl FnMut(&E) -> Result<E2, Err>, + ) -> Result<SubExpr<N, E2>, Err> + where + N: Clone, + { + Ok(rc(self.as_ref().traverse_embed(visit_embed)?)) + } + + pub fn map_embed<E2>( + &self, + map_embed: impl FnMut(&E) -> E2, + ) -> SubExpr<N, E2> + where + N: Clone, + { + rc(self.as_ref().map_embed(map_embed)) + } + + pub fn map_subexprs_with_special_handling_of_binders<'a>( + &'a self, + map_expr: impl FnMut(&'a Self) -> Self, + map_under_binder: impl FnMut(&'a Label, &'a Self) -> Self, + ) -> Self { + match self.as_ref() { + ExprF::Embed(_) => SubExpr::clone(self), + // Recursive call + // TODO: don't discard the note ! + ExprF::Note(_, e) => e + .map_subexprs_with_special_handling_of_binders( + map_expr, + map_under_binder, + ), + // Call ExprF::map_ref + e => rc(e.map_ref_with_special_handling_of_binders( + map_expr, + map_under_binder, + |_| unreachable!(), + |_| unreachable!(), + Label::clone, + )), + } + } + + pub fn unroll(&self) -> Expr<N, E> + where + N: Clone, + E: Clone, + { + ExprF::clone(self.as_ref()) + } + + pub fn unnote(&self) -> SubExpr<X, E> + where + E: Clone, + { + rc(self.as_ref().visit(&mut visitor::UnNoteVisitor)) + } + + /// `shift` is used by both normalization and type-checking to avoid variable + /// capture by shifting variable indices + /// See https://github.com/dhall-lang/dhall-lang/blob/master/standard/semantics.md#shift + /// for details + pub fn shift(&self, delta: isize, var: &V<Label>) -> Self { + match self.as_ref() { + ExprF::Var(v) => rc(ExprF::Var(v.shift(delta, var))), + _ => self.map_subexprs_with_special_handling_of_binders( + |e| e.shift(delta, var), + |x: &Label, e| e.shift(delta, &var.shift(1, &x.into())), + ), + } + } + + pub fn subst_shift(&self, var: &V<Label>, val: &Self) -> Self { + match self.as_ref() { + ExprF::Var(v) if v == var => val.clone(), + ExprF::Var(v) => rc(ExprF::Var(v.shift(-1, var))), + _ => self.map_subexprs_with_special_handling_of_binders( + |e| e.subst_shift(var, val), + |x: &Label, e| { + e.subst_shift( + &var.shift(1, &x.into()), + &val.shift(1, &x.into()), + ) + }, + ), + } + } +} + +impl<N: Clone> SubExpr<N, X> { + pub fn embed_absurd<T>(&self) -> SubExpr<N, T> { + rc(self.as_ref().embed_absurd()) + } +} + +impl<E: Clone> SubExpr<X, E> { + pub fn note_absurd<N>(&self) -> SubExpr<N, E> { + rc(self.as_ref().note_absurd()) + } +} + +impl<N, E> Clone for SubExpr<N, E> { + fn clone(&self) -> Self { + SubExpr(Rc::clone(&self.0)) + } +} + +// Should probably rename this +pub fn rc<N, E>(x: Expr<N, E>) -> SubExpr<N, E> { + SubExpr(Rc::new(x)) +} + +/// Add an isize to an usize +/// Panics on over/underflow +fn add_ui(u: usize, i: isize) -> usize { + if i < 0 { + u.checked_sub(i.checked_neg().unwrap() as usize).unwrap() + } else { + u.checked_add(i as usize).unwrap() + } +} + +impl<Label: PartialEq + Clone> V<Label> { + pub fn shift(&self, delta: isize, var: &V<Label>) -> Self { + let V(x, n) = var; + let V(y, m) = self; + if x == y && n <= m { + V(y.clone(), add_ui(*m, delta)) + } else { + V(y.clone(), *m) + } + } +} + +/// `shift` is used by both normalization and type-checking to avoid variable +/// capture by shifting variable indices +/// See https://github.com/dhall-lang/dhall-lang/blob/master/standard/semantics.md#shift +/// for details +/// +pub fn shift<N, E>( + delta: isize, + var: &V<Label>, + in_expr: &SubExpr<N, E>, +) -> SubExpr<N, E> { + in_expr.shift(delta, var) +} + +pub fn shift0_multiple<N, E>( + deltas: &HashMap<Label, isize>, + in_expr: &SubExpr<N, E>, +) -> SubExpr<N, E> { + shift0_multiple_inner(&Context::new(), deltas, in_expr) +} + +fn shift0_multiple_inner<N, E>( + ctx: &Context<Label, ()>, + deltas: &HashMap<Label, isize>, + in_expr: &SubExpr<N, E>, +) -> SubExpr<N, E> { + use crate::ExprF::*; + match in_expr.as_ref() { + Var(V(y, m)) if ctx.lookup(y, *m).is_none() => { + let delta = deltas.get(y).unwrap_or(&0); + rc(Var(V(y.clone(), add_ui(*m, *delta)))) + } + _ => in_expr.map_subexprs_with_special_handling_of_binders( + |e| shift0_multiple_inner(ctx, deltas, e), + |x: &Label, e| { + shift0_multiple_inner(&ctx.insert(x.clone(), ()), deltas, e) + }, + ), + } +} diff --git a/dhall_syntax/src/import.rs b/dhall_syntax/src/import.rs new file mode 100644 index 0000000..00f293c --- /dev/null +++ b/dhall_syntax/src/import.rs @@ -0,0 +1,64 @@ +use std::path::PathBuf; + +/// The beginning of a file path which anchors subsequent path components +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum FilePrefix { + /// Absolute path + Absolute, + /// Path relative to . + Here, + /// Path relative to .. + Parent, + /// Path relative to ~ + Home, +} + +/// The location of import (i.e. local vs. remote vs. environment) +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ImportLocation { + Local(FilePrefix, PathBuf), + Remote(URL), + Env(String), + Missing, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct URL { + pub scheme: Scheme, + pub authority: String, + pub path: PathBuf, + pub query: Option<String>, + pub headers: Option<Box<ImportHashed>>, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum Scheme { + HTTP, + HTTPS, +} + +/// How to interpret the import's contents (i.e. as Dhall code or raw text) +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum ImportMode { + Code, + RawText, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Hash { + pub protocol: String, + pub hash: String, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct ImportHashed { + pub location: ImportLocation, + pub hash: Option<Hash>, +} + +/// Reference to an external resource +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Import { + pub mode: ImportMode, + pub location_hashed: ImportHashed, +} diff --git a/dhall_syntax/src/label.rs b/dhall_syntax/src/label.rs new file mode 100644 index 0000000..43c3f53 --- /dev/null +++ b/dhall_syntax/src/label.rs @@ -0,0 +1,34 @@ +use std::rc::Rc; + +// The type for labels throughout the AST +// It owns the data because otherwise lifetimes would make recursive imports impossible +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Label(Rc<str>); + +impl From<String> for Label { + fn from(s: String) -> Self { + let s: &str = &s; + Label(s.into()) + } +} + +impl<'a> From<&'a str> for Label { + fn from(s: &'a str) -> Self { + Label(Rc::from(s)) + } +} + +impl From<&Label> for String { + fn from(x: &Label) -> String { + x.0.as_ref().to_owned() + } +} + +impl Label { + pub fn from_str(s: &str) -> Label { + Label(s.into()) + } + pub fn as_ref(&self) -> &str { + self.0.as_ref() + } +} diff --git a/dhall_syntax/src/lib.rs b/dhall_syntax/src/lib.rs new file mode 100644 index 0000000..3db8222 --- /dev/null +++ b/dhall_syntax/src/lib.rs @@ -0,0 +1,28 @@ +#![feature(trace_macros)] +#![feature(slice_patterns)] +#![allow( + clippy::many_single_char_names, + clippy::should_implement_trait, + clippy::new_without_default, + clippy::type_complexity +)] + +//! This crate contains the core AST-handling primitives for the [dhall-rust][dhall-rust] crate. +//! This is highly unstable and breaks regularly; use at your own risk. +//! +//! [dhall-rust]: https://github.com/Nadrieril/dhall-rust + +mod core; +pub use crate::core::*; +mod import; +pub use crate::import::*; +mod label; +pub use crate::label::*; +mod text; +pub use crate::text::*; +mod printer; +pub use crate::printer::*; +mod parser; +pub use crate::parser::*; +pub mod context; +pub mod visitor; diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs new file mode 100644 index 0000000..12383d4 --- /dev/null +++ b/dhall_syntax/src/parser.rs @@ -0,0 +1,984 @@ +use itertools::Itertools; +use pest::iterators::Pair; +use pest::Parser; +pub use pest::Span; +use std::borrow::Cow; +use std::collections::BTreeMap; +use std::path::PathBuf; + +use dhall_generated_parser::{DhallParser, Rule}; + +use crate::*; + +// This file consumes the parse tree generated by pest and turns it into +// our own AST. All those custom macros should eventually moved into +// their own crate because they are quite general and useful. For now they +// are here and hopefully you can figure out how they work. + +use crate::ExprF::*; + +type ParsedExpr<'a> = Expr<Span<'a>, Import>; +type ParsedSubExpr<'a> = SubExpr<Span<'a>, Import>; +type ParsedText<'a> = InterpolatedText<SubExpr<Span<'a>, Import>>; +type ParsedTextContents<'a> = + InterpolatedTextContents<SubExpr<Span<'a>, Import>>; + +pub type ParseError = pest::error::Error<Rule>; + +pub type ParseResult<T> = Result<T, ParseError>; + +fn rc(x: ParsedExpr<'_>) -> ParsedSubExpr<'_> { + crate::rc(x) +} + +fn spanned<'a>(_span: Span<'a>, x: ParsedExpr<'a>) -> ParsedExpr<'a> { + x + // This breaks equality testing; I need to fix that first + // Note(span, rc(x)) +} + +#[derive(Debug)] +enum Either<A, B> { + Left(A), + Right(B), +} + +impl crate::Builtin { + pub fn parse(s: &str) -> Option<Self> { + use crate::Builtin::*; + match s { + "Bool" => Some(Bool), + "Natural" => Some(Natural), + "Integer" => Some(Integer), + "Double" => Some(Double), + "Text" => Some(Text), + "List" => Some(List), + "Optional" => Some(Optional), + "None" => Some(OptionalNone), + "Natural/build" => Some(NaturalBuild), + "Natural/fold" => Some(NaturalFold), + "Natural/isZero" => Some(NaturalIsZero), + "Natural/even" => Some(NaturalEven), + "Natural/odd" => Some(NaturalOdd), + "Natural/toInteger" => Some(NaturalToInteger), + "Natural/show" => Some(NaturalShow), + "Integer/toDouble" => Some(IntegerToDouble), + "Integer/show" => Some(IntegerShow), + "Double/show" => Some(DoubleShow), + "List/build" => Some(ListBuild), + "List/fold" => Some(ListFold), + "List/length" => Some(ListLength), + "List/head" => Some(ListHead), + "List/last" => Some(ListLast), + "List/indexed" => Some(ListIndexed), + "List/reverse" => Some(ListReverse), + "Optional/fold" => Some(OptionalFold), + "Optional/build" => Some(OptionalBuild), + "Text/show" => Some(TextShow), + _ => None, + } + } +} + +pub fn custom_parse_error(pair: &Pair<Rule>, msg: String) -> ParseError { + let msg = + format!("{} while matching on:\n{}", msg, debug_pair(pair.clone())); + let e = pest::error::ErrorVariant::CustomError { message: msg }; + pest::error::Error::new_from_span(e, pair.as_span()) +} + +fn debug_pair(pair: Pair<Rule>) -> String { + use std::fmt::Write; + let mut s = String::new(); + fn aux(s: &mut String, indent: usize, prefix: String, pair: Pair<Rule>) { + let indent_str = "| ".repeat(indent); + let rule = pair.as_rule(); + let contents = pair.as_str(); + let mut inner = pair.into_inner(); + let mut first = true; + while let Some(p) = inner.next() { + if first { + first = false; + let last = inner.peek().is_none(); + if last && p.as_str() == contents { + let prefix = format!("{}{:?} > ", prefix, rule); + aux(s, indent, prefix, p); + continue; + } else { + writeln!( + s, + r#"{}{}{:?}: "{}""#, + indent_str, prefix, rule, contents + ) + .unwrap(); + } + } + aux(s, indent + 1, "".into(), p); + } + if first { + writeln!( + s, + r#"{}{}{:?}: "{}""#, + indent_str, prefix, rule, contents + ) + .unwrap(); + } + } + aux(&mut s, 0, "".into(), pair); + s +} + +macro_rules! make_parser { + (@pattern, rule, $name:ident) => (Rule::$name); + (@pattern, token_rule, $name:ident) => (Rule::$name); + (@pattern, rule_group, $name:ident) => (_); + (@filter, rule) => (true); + (@filter, token_rule) => (true); + (@filter, rule_group) => (false); + + (@body, + $pair:expr, + $children:expr, + rule!( $name:ident<$o:ty>; $($args:tt)* ) + ) => ( + make_parser!(@body, + $pair, + $children, + rule!( $name<$o> as $name; $($args)* ) + ) + ); + (@body, + $pair:expr, + $children:expr, + rule!( + $name:ident<$o:ty> + as $group:ident; + captured_str!($x:pat) => $body:expr + ) + ) => ({ + let $x = $pair.as_str(); + let res: $o = $body; + Ok(ParsedValue::$group(res)) + }); + (@body, + $pair:expr, + $children:expr, + rule!( + $name:ident<$o:ty> + as $group:ident; + children!( $( [$($args:tt)*] => $body:expr ),* $(,)* ) + ) + ) => ({ + #[allow(unused_imports)] + use ParsedValue::*; + #[allow(unreachable_code)] + let res: $o = improved_slice_patterns::match_vec!($children; + $( [$($args)*] => $body, )* + [x..] => Err( + format!("Unexpected children: {:?}", x.collect::<Vec<_>>()) + )?, + ).map_err(|_| -> String { unreachable!() })?; + Ok(ParsedValue::$group(res)) + }); + (@body, + $pair:expr, + $children:expr, + rule!( + $name:ident<$o:ty> + as $group:ident; + $span:ident; + $($args:tt)* + ) + ) => ({ + let $span = $pair.as_span(); + make_parser!(@body, + $pair, + $children, + rule!( + $name<$o> + as $group; + $($args)* + ) + ) + }); + (@body, + $pair:expr, + $children:expr, + token_rule!($name:ident<$o:ty>) + ) => ({ + Ok(ParsedValue::$name(())) + }); + (@body, $pair:expr, $children:expr, rule_group!( $name:ident<$o:ty> )) => ( + unreachable!() + ); + + ($( $submac:ident!( $name:ident<$o:ty> $($args:tt)* ); )*) => ( + #[allow(non_camel_case_types, dead_code, clippy::large_enum_variant)] + #[derive(Debug)] + enum ParsedValue<'a> { + $( $name($o), )* + } + + fn parse_any<'a>(pair: Pair<'a, Rule>, children: Vec<ParsedValue<'a>>) + -> Result<ParsedValue<'a>, String> { + match pair.as_rule() { + $( + make_parser!(@pattern, $submac, $name) + if make_parser!(@filter, $submac) + => make_parser!(@body, pair, children, + $submac!( $name<$o> $($args)* )) + , + )* + r => Err(format!("Unexpected {:?}", r)), + } + } + ); +} + +// Non-recursive implementation to avoid stack overflows +fn do_parse<'a>(initial_pair: Pair<'a, Rule>) -> ParseResult<ParsedValue<'a>> { + enum StackFrame<'a> { + Unprocessed(Pair<'a, Rule>), + Processed(Pair<'a, Rule>, usize), + } + use StackFrame::*; + let mut pairs_stack: Vec<StackFrame> = + vec![Unprocessed(initial_pair.clone())]; + let mut values_stack: Vec<ParsedValue> = vec![]; + while let Some(p) = pairs_stack.pop() { + match p { + Unprocessed(mut pair) => loop { + let mut pairs: Vec<_> = pair.clone().into_inner().collect(); + let n_children = pairs.len(); + if n_children == 1 && can_be_shortcutted(pair.as_rule()) { + pair = pairs.pop().unwrap(); + continue; + } else { + pairs_stack.push(Processed(pair, n_children)); + pairs_stack + .extend(pairs.into_iter().map(StackFrame::Unprocessed)); + break; + } + }, + Processed(pair, n) => { + let mut children: Vec<_> = + values_stack.split_off(values_stack.len() - n); + children.reverse(); + let val = match parse_any(pair.clone(), children) { + Ok(v) => v, + Err(msg) => Err(custom_parse_error(&pair, msg))?, + }; + values_stack.push(val); + } + } + } + Ok(values_stack.pop().unwrap()) +} + +// List of rules that can be shortcutted if they have a single child +fn can_be_shortcutted(rule: Rule) -> bool { + use Rule::*; + match rule { + expression + | import_alt_expression + | or_expression + | plus_expression + | text_append_expression + | list_append_expression + | and_expression + | combine_expression + | prefer_expression + | combine_types_expression + | times_expression + | equal_expression + | not_equal_expression + | application_expression + | first_application_expression + | selector_expression + | annotated_expression => true, + _ => false, + } +} + +make_parser! { + token_rule!(EOI<()>); + + rule!(simple_label<Label>; + captured_str!(s) => Label::from(s.trim().to_owned()) + ); + rule!(quoted_label<Label>; + captured_str!(s) => Label::from(s.trim().to_owned()) + ); + rule!(label<Label>; children!( + [simple_label(l)] => l, + [quoted_label(l)] => l, + )); + + rule!(double_quote_literal<ParsedText<'a>>; children!( + [double_quote_chunk(chunks)..] => { + chunks.collect() + } + )); + + rule!(double_quote_chunk<ParsedTextContents<'a>>; children!( + [interpolation(e)] => { + InterpolatedTextContents::Expr(rc(e)) + }, + [double_quote_escaped(s)] => { + InterpolatedTextContents::Text(s) + }, + [double_quote_char(s)] => { + InterpolatedTextContents::Text(s.to_owned()) + }, + )); + rule!(double_quote_escaped<String>; + captured_str!(s) => { + match s { + "\"" => "\"".to_owned(), + "$" => "$".to_owned(), + "\\" => "\\".to_owned(), + "/" => "/".to_owned(), + "b" => "\u{0008}".to_owned(), + "f" => "\u{000C}".to_owned(), + "n" => "\n".to_owned(), + "r" => "\r".to_owned(), + "t" => "\t".to_owned(), + _ => { + // "uXXXX" + use std::convert::TryFrom; + let c = u16::from_str_radix(&s[1..5], 16).unwrap(); + let c = char::try_from(u32::from(c)).unwrap(); + std::iter::once(c).collect() + } + } + } + ); + rule!(double_quote_char<&'a str>; + captured_str!(s) => s + ); + + rule!(single_quote_literal<ParsedText<'a>>; children!( + [single_quote_continue(lines)] => { + let space = InterpolatedTextContents::Text(" ".to_owned()); + let newline = InterpolatedTextContents::Text("\n".to_owned()); + let min_indent = lines + .iter() + .map(|l| { + l.iter().rev().take_while(|c| **c == space).count() + }) + .min() + .unwrap(); + + lines + .into_iter() + .rev() + .map(|mut l| { l.split_off(l.len() - min_indent); l }) + .intersperse(vec![newline]) + .flat_map(|x| x.into_iter().rev()) + .collect::<ParsedText>() + } + )); + rule!(single_quote_char<&'a str>; + captured_str!(s) => s + ); + rule!(escaped_quote_pair<&'a str>; + captured_str!(_) => "''" + ); + rule!(escaped_interpolation<&'a str>; + captured_str!(_) => "${" + ); + rule!(interpolation<ParsedExpr<'a>>; children!( + [expression(e)] => e + )); + + rule!(single_quote_continue<Vec<Vec<ParsedTextContents<'a>>>>; children!( + [interpolation(c), single_quote_continue(lines)] => { + let c = InterpolatedTextContents::Expr(rc(c)); + let mut lines = lines; + lines.last_mut().unwrap().push(c); + lines + }, + [escaped_quote_pair(c), single_quote_continue(lines)] => { + let c = InterpolatedTextContents::Text(c.to_owned()); + let mut lines = lines; + lines.last_mut().unwrap().push(c); + lines + }, + [escaped_interpolation(c), single_quote_continue(lines)] => { + let c = InterpolatedTextContents::Text(c.to_owned()); + let mut lines = lines; + lines.last_mut().unwrap().push(c); + lines + }, + [single_quote_char("\n"), single_quote_continue(lines)] => { + let mut lines = lines; + lines.push(vec![]); + lines + }, + [single_quote_char(c), single_quote_continue(lines)] => { + let c = InterpolatedTextContents::Text(c.to_owned()); + let mut lines = lines; + lines.last_mut().unwrap().push(c); + lines + }, + [] => { + vec![vec![]] + }, + )); + + rule!(builtin<ParsedExpr<'a>>; span; + captured_str!(s) => { + spanned(span, match crate::Builtin::parse(s) { + Some(b) => Builtin(b), + None => match s { + "True" => BoolLit(true), + "False" => BoolLit(false), + "Type" => Const(crate::Const::Type), + "Kind" => Const(crate::Const::Kind), + "Sort" => Const(crate::Const::Sort), + _ => Err( + format!("Unrecognized builtin: '{}'", s) + )?, + } + }) + } + ); + + token_rule!(NaN<()>); + token_rule!(minus_infinity_literal<()>); + token_rule!(plus_infinity_literal<()>); + + rule!(numeric_double_literal<core::Double>; + captured_str!(s) => { + let s = s.trim(); + match s.parse::<f64>() { + Ok(x) if x.is_infinite() => + Err(format!("Overflow while parsing double literal '{}'", s))?, + Ok(x) => NaiveDouble::from(x), + Err(e) => Err(format!("{}", e))?, + } + } + ); + + rule!(double_literal<core::Double>; children!( + [numeric_double_literal(n)] => n, + [minus_infinity_literal(n)] => std::f64::NEG_INFINITY.into(), + [plus_infinity_literal(n)] => std::f64::INFINITY.into(), + [NaN(n)] => std::f64::NAN.into(), + )); + + rule!(natural_literal<core::Natural>; + captured_str!(s) => { + s.trim() + .parse() + .map_err(|e| format!("{}", e))? + } + ); + + rule!(integer_literal<core::Integer>; + captured_str!(s) => { + s.trim() + .parse() + .map_err(|e| format!("{}", e))? + } + ); + + rule!(identifier<ParsedExpr<'a>> as expression; span; children!( + [variable(v)] => { + spanned(span, Var(v)) + }, + [builtin(e)] => e, + )); + + rule!(variable<V<Label>>; children!( + [label(l), natural_literal(idx)] => { + V(l, idx) + }, + [label(l)] => { + V(l, 0) + }, + )); + + rule!(unquoted_path_component<&'a str>; captured_str!(s) => s); + rule!(quoted_path_component<&'a str>; captured_str!(s) => s); + rule!(path_component<String>; children!( + [unquoted_path_component(s)] => { + percent_encoding::percent_decode(s.as_bytes()) + .decode_utf8_lossy() + .into_owned() + }, + [quoted_path_component(s)] => s.to_string(), + )); + rule!(path<PathBuf>; children!( + [path_component(components)..] => { + components.collect() + } + )); + + rule_group!(local<(FilePrefix, PathBuf)>); + + rule!(parent_path<(FilePrefix, PathBuf)> as local; children!( + [path(p)] => (FilePrefix::Parent, p) + )); + rule!(here_path<(FilePrefix, PathBuf)> as local; children!( + [path(p)] => (FilePrefix::Here, p) + )); + rule!(home_path<(FilePrefix, PathBuf)> as local; children!( + [path(p)] => (FilePrefix::Home, p) + )); + rule!(absolute_path<(FilePrefix, PathBuf)> as local; children!( + [path(p)] => (FilePrefix::Absolute, p) + )); + + rule!(scheme<Scheme>; captured_str!(s) => match s { + "http" => Scheme::HTTP, + "https" => Scheme::HTTPS, + _ => unreachable!(), + }); + + rule!(http_raw<URL>; children!( + [scheme(sch), authority(auth), path(p)] => URL { + scheme: sch, + authority: auth, + path: p, + query: None, + headers: None, + }, + [scheme(sch), authority(auth), path(p), query(q)] => URL { + scheme: sch, + authority: auth, + path: p, + query: Some(q), + headers: None, + }, + )); + + rule!(authority<String>; captured_str!(s) => s.to_owned()); + + rule!(query<String>; captured_str!(s) => s.to_owned()); + + rule!(http<URL>; children!( + [http_raw(url)] => url, + [http_raw(url), import_hashed(ih)] => + URL { headers: Some(Box::new(ih)), ..url }, + )); + + rule!(env<String>; children!( + [bash_environment_variable(s)] => s, + [posix_environment_variable(s)] => s, + )); + rule!(bash_environment_variable<String>; captured_str!(s) => s.to_owned()); + rule!(posix_environment_variable<String>; children!( + [posix_environment_variable_character(chars)..] => { + chars.collect() + }, + )); + rule!(posix_environment_variable_character<Cow<'a, str>>; + captured_str!(s) => { + match s { + "\\\"" => Cow::Owned("\"".to_owned()), + "\\\\" => Cow::Owned("\\".to_owned()), + "\\a" => Cow::Owned("\u{0007}".to_owned()), + "\\b" => Cow::Owned("\u{0008}".to_owned()), + "\\f" => Cow::Owned("\u{000C}".to_owned()), + "\\n" => Cow::Owned("\n".to_owned()), + "\\r" => Cow::Owned("\r".to_owned()), + "\\t" => Cow::Owned("\t".to_owned()), + "\\v" => Cow::Owned("\u{000B}".to_owned()), + _ => Cow::Borrowed(s) + } + } + ); + + token_rule!(missing<()>); + + rule!(import_type<ImportLocation>; children!( + [missing(_)] => { + ImportLocation::Missing + }, + [env(e)] => { + ImportLocation::Env(e) + }, + [http(url)] => { + ImportLocation::Remote(url) + }, + [local((prefix, p))] => { + ImportLocation::Local(prefix, p) + }, + )); + + rule!(hash<Hash>; captured_str!(s) => + Hash { + protocol: s.trim()[..6].to_owned(), + hash: s.trim()[7..].to_owned(), + } + ); + + rule!(import_hashed<ImportHashed>; children!( + [import_type(location)] => + ImportHashed { location, hash: None }, + [import_type(location), hash(h)] => + ImportHashed { location, hash: Some(h) }, + )); + + token_rule!(Text<()>); + + rule!(import<ParsedExpr<'a>> as expression; span; children!( + [import_hashed(location_hashed)] => { + spanned(span, Embed(Import { + mode: ImportMode::Code, + location_hashed + })) + }, + [import_hashed(location_hashed), Text(_)] => { + spanned(span, Embed(Import { + mode: ImportMode::RawText, + location_hashed + })) + }, + )); + + token_rule!(lambda<()>); + token_rule!(forall<()>); + token_rule!(arrow<()>); + token_rule!(merge<()>); + token_rule!(if_<()>); + token_rule!(in_<()>); + + rule!(expression<ParsedExpr<'a>> as expression; span; children!( + [lambda(()), label(l), expression(typ), + arrow(()), expression(body)] => { + spanned(span, Lam(l, rc(typ), rc(body))) + }, + [if_(()), expression(cond), expression(left), expression(right)] => { + spanned(span, BoolIf(rc(cond), rc(left), rc(right))) + }, + [let_binding(bindings).., in_(()), expression(final_expr)] => { + bindings.rev().fold( + final_expr, + |acc, x| Let(x.0, x.1, x.2, rc(acc)) + ) + }, + [forall(()), label(l), expression(typ), + arrow(()), expression(body)] => { + spanned(span, Pi(l, rc(typ), rc(body))) + }, + [expression(typ), arrow(()), expression(body)] => { + spanned(span, Pi("_".into(), rc(typ), rc(body))) + }, + [merge(()), expression(x), expression(y), expression(z)] => { + spanned(span, Merge(rc(x), rc(y), Some(rc(z)))) + }, + [expression(e)] => e, + )); + + rule!(let_binding<(Label, Option<ParsedSubExpr<'a>>, ParsedSubExpr<'a>)>; + children!( + [label(name), expression(annot), expression(expr)] => + (name, Some(rc(annot)), rc(expr)), + [label(name), expression(expr)] => + (name, None, rc(expr)), + )); + + token_rule!(List<()>); + token_rule!(Optional<()>); + + rule!(empty_collection<ParsedExpr<'a>> as expression; span; children!( + [List(_), expression(t)] => { + spanned(span, EmptyListLit(rc(t))) + }, + [Optional(_), expression(t)] => { + spanned(span, OldOptionalLit(None, rc(t))) + }, + )); + + rule!(non_empty_optional<ParsedExpr<'a>> as expression; span; children!( + [expression(x), Optional(_), expression(t)] => { + spanned(span, OldOptionalLit(Some(rc(x)), rc(t))) + } + )); + + rule!(import_alt_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::ImportAlt; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + rule!(or_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::BoolOr; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + rule!(plus_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::NaturalPlus; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + rule!(text_append_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::TextAppend; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + rule!(list_append_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::ListAppend; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + rule!(and_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::BoolAnd; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + rule!(combine_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::Combine; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + rule!(prefer_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::Prefer; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + rule!(combine_types_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::CombineTypes; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + rule!(times_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::NaturalTimes; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + rule!(equal_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::BoolEQ; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + rule!(not_equal_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + let o = crate::BinOp::BoolNE; + rest.fold(first, |acc, e| BinOp(o, rc(acc), rc(e))) + }, + )); + + rule!(annotated_expression<ParsedExpr<'a>> as expression; span; children!( + [expression(e)] => e, + [expression(e), expression(annot)] => { + spanned(span, Annot(rc(e), rc(annot))) + }, + )); + + token_rule!(Some_<()>); + + rule!(application_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), expression(rest)..] => { + rest.fold(first, |acc, e| App(rc(acc), rc(e))) + }, + )); + + rule!(first_application_expression<ParsedExpr<'a>> as expression; span; + children!( + [expression(e)] => e, + [Some_(()), expression(e)] => { + spanned(span, SomeLit(rc(e))) + }, + [merge(()), expression(x), expression(y)] => { + spanned(span, Merge(rc(x), rc(y), None)) + }, + )); + + rule!(selector_expression<ParsedExpr<'a>> as expression; children!( + [expression(e)] => e, + [expression(first), selector(rest)..] => { + rest.fold(first, |acc, e| match e { + Either::Left(l) => Field(rc(acc), l), + Either::Right(ls) => Projection(rc(acc), ls), + }) + } + )); + + rule!(selector<Either<Label, Vec<Label>>>; children!( + [label(l)] => Either::Left(l), + [labels(ls)] => Either::Right(ls), + )); + + rule!(labels<Vec<Label>>; children!( + [label(ls)..] => ls.collect(), + )); + + rule!(primitive_expression<ParsedExpr<'a>> as expression; span; children!( + [double_literal(n)] => spanned(span, DoubleLit(n)), + [natural_literal(n)] => spanned(span, NaturalLit(n)), + [integer_literal(n)] => spanned(span, IntegerLit(n)), + [double_quote_literal(s)] => spanned(span, TextLit(s)), + [single_quote_literal(s)] => spanned(span, TextLit(s)), + [expression(e)] => e, + )); + + rule!(empty_record_literal<ParsedExpr<'a>> as expression; span; + captured_str!(_) => spanned(span, RecordLit(BTreeMap::new())) + ); + + rule!(empty_record_type<ParsedExpr<'a>> as expression; span; + captured_str!(_) => spanned(span, RecordType(BTreeMap::new())) + ); + + rule!(non_empty_record_type_or_literal<ParsedExpr<'a>> as expression; span; + children!( + [label(first_label), non_empty_record_type(rest)] => { + let (first_expr, mut map) = rest; + map.insert(first_label, rc(first_expr)); + spanned(span, RecordType(map)) + }, + [label(first_label), non_empty_record_literal(rest)] => { + let (first_expr, mut map) = rest; + map.insert(first_label, rc(first_expr)); + spanned(span, RecordLit(map)) + }, + )); + + rule!(non_empty_record_type + <(ParsedExpr<'a>, BTreeMap<Label, ParsedSubExpr<'a>>)>; children!( + [expression(expr), record_type_entry(entries)..] => { + (expr, entries.collect()) + } + )); + + rule!(record_type_entry<(Label, ParsedSubExpr<'a>)>; children!( + [label(name), expression(expr)] => (name, rc(expr)) + )); + + rule!(non_empty_record_literal + <(ParsedExpr<'a>, BTreeMap<Label, ParsedSubExpr<'a>>)>; children!( + [expression(expr), record_literal_entry(entries)..] => { + (expr, entries.collect()) + } + )); + + rule!(record_literal_entry<(Label, ParsedSubExpr<'a>)>; children!( + [label(name), expression(expr)] => (name, rc(expr)) + )); + + rule!(union_type_or_literal<ParsedExpr<'a>> as expression; span; children!( + [empty_union_type(_)] => { + spanned(span, UnionType(BTreeMap::new())) + }, + [non_empty_union_type_or_literal((Some((l, e)), entries))] => { + spanned(span, UnionLit(l, e, entries)) + }, + [non_empty_union_type_or_literal((None, entries))] => { + spanned(span, UnionType(entries)) + }, + )); + + token_rule!(empty_union_type<()>); + + rule!(non_empty_union_type_or_literal + <(Option<(Label, ParsedSubExpr<'a>)>, + BTreeMap<Label, Option<ParsedSubExpr<'a>>>)>; + children!( + [label(l), union_literal_variant_value((e, entries))] => { + (Some((l, e)), entries) + }, + [label(l), union_type_or_literal_variant_type((e, rest))] => { + let (x, mut entries) = rest; + entries.insert(l, e); + (x, entries) + }, + )); + + rule!(union_literal_variant_value + <(ParsedSubExpr<'a>, BTreeMap<Label, Option<ParsedSubExpr<'a>>>)>; + children!( + [expression(e), union_type_entry(entries)..] => { + (rc(e), entries.collect()) + }, + )); + + rule!(union_type_entry<(Label, Option<ParsedSubExpr<'a>>)>; children!( + [label(name), expression(expr)] => (name, Some(rc(expr))), + [label(name)] => (name, None), + )); + + // TODO: unary union variants + rule!(union_type_or_literal_variant_type + <(Option<ParsedSubExpr<'a>>, + (Option<(Label, ParsedSubExpr<'a>)>, + BTreeMap<Label, Option<ParsedSubExpr<'a>>>))>; + children!( + [expression(e), non_empty_union_type_or_literal(rest)] => { + (Some(rc(e)), rest) + }, + [expression(e)] => { + (Some(rc(e)), (None, BTreeMap::new())) + }, + [non_empty_union_type_or_literal(rest)] => { + (None, rest) + }, + [] => { + (None, (None, BTreeMap::new())) + }, + )); + + rule!(non_empty_list_literal<ParsedExpr<'a>> as expression; span; + children!( + [expression(items)..] => spanned( + span, + NEListLit(items.map(rc).collect()) + ) + )); + + rule!(final_expression<ParsedExpr<'a>> as expression; children!( + [expression(e), EOI(_eoi)] => e + )); +} + +pub fn parse_expr<'a>(s: &'a str) -> ParseResult<ParsedSubExpr<'a>> { + let mut pairs = DhallParser::parse(Rule::final_expression, s)?; + let expr = do_parse(pairs.next().unwrap())?; + assert_eq!(pairs.next(), None); + match expr { + ParsedValue::expression(e) => Ok(rc(e)), + _ => unreachable!(), + } + // Ok(rc(BoolLit(false))) +} + +#[test] +fn test_parse() { + // let expr = r#"{ x = "foo", y = 4 }.x"#; + // let expr = r#"(1 + 2) * 3"#; + let expr = r#"(1) + 3 * 5"#; + println!("{:?}", parse_expr(expr)); + match parse_expr(expr) { + Err(e) => { + println!("{:?}", e); + println!("{}", e); + } + ok => println!("{:?}", ok), + }; + // assert!(false); +} diff --git a/dhall_syntax/src/printer.rs b/dhall_syntax/src/printer.rs new file mode 100644 index 0000000..704000a --- /dev/null +++ b/dhall_syntax/src/printer.rs @@ -0,0 +1,498 @@ +use crate::*; +use itertools::Itertools; +use std::fmt::{self, Display}; + +/// Generic instance that delegates to subexpressions +impl<SE: Display + Clone, N, E: Display> Display for ExprF<SE, Label, N, E> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::ExprF::*; + match self { + Lam(a, b, c) => { + write!(f, "λ({} : {}) → {}", a, b, c)?; + } + BoolIf(a, b, c) => { + write!(f, "if {} then {} else {}", a, b, c)?; + } + Pi(a, b, c) if &String::from(a) == "_" => { + write!(f, "{} → {}", b, c)?; + } + Pi(a, b, c) => { + write!(f, "∀({} : {}) → {}", a, b, c)?; + } + Let(a, b, c, d) => { + write!(f, "let {}", a)?; + if let Some(b) = b { + write!(f, " : {}", b)?; + } + write!(f, " = {} in {}", c, d)?; + } + EmptyListLit(t) => { + write!(f, "[] : List {}", t)?; + } + NEListLit(es) => { + fmt_list("[", ", ", "]", es, f, Display::fmt)?; + } + OldOptionalLit(None, t) => { + write!(f, "[] : Optional {}", t)?; + } + OldOptionalLit(Some(x), t) => { + write!(f, "[{}] : Optional {}", x, t)?; + } + SomeLit(e) => { + write!(f, "Some {}", e)?; + } + Merge(a, b, c) => { + write!(f, "merge {} {}", a, b)?; + if let Some(c) = c { + write!(f, " : {}", c)?; + } + } + Annot(a, b) => { + write!(f, "{} : {}", a, b)?; + } + ExprF::BinOp(op, a, b) => { + write!(f, "{} {} {}", a, op, b)?; + } + ExprF::App(a, b) => { + write!(f, "{} {}", a, b)?; + } + Field(a, b) => { + write!(f, "{}.{}", a, b)?; + } + Projection(e, ls) => { + write!(f, "{}.", e)?; + fmt_list("{ ", ", ", " }", ls, f, Display::fmt)?; + } + Var(a) => a.fmt(f)?, + Const(k) => k.fmt(f)?, + Builtin(v) => v.fmt(f)?, + BoolLit(true) => f.write_str("True")?, + BoolLit(false) => f.write_str("False")?, + NaturalLit(a) => a.fmt(f)?, + IntegerLit(a) if *a >= 0 => { + f.write_str("+")?; + a.fmt(f)?; + } + IntegerLit(a) => a.fmt(f)?, + DoubleLit(a) => a.fmt(f)?, + TextLit(a) => a.fmt(f)?, + RecordType(a) if a.is_empty() => f.write_str("{}")?, + RecordType(a) => fmt_list("{ ", ", ", " }", a, f, |(k, t), f| { + write!(f, "{} : {}", k, t) + })?, + RecordLit(a) if a.is_empty() => f.write_str("{=}")?, + RecordLit(a) => fmt_list("{ ", ", ", " }", a, f, |(k, v), f| { + write!(f, "{} = {}", k, v) + })?, + UnionType(a) => fmt_list("< ", " | ", " >", a, f, |(k, v), f| { + write!(f, "{}", k)?; + if let Some(v) = v { + write!(f, ": {}", v)?; + } + Ok(()) + })?, + UnionLit(a, b, c) => { + write!(f, "< {} = {}", a, b)?; + for (k, v) in c { + write!(f, " | {}", k)?; + if let Some(v) = v { + write!(f, ": {}", v)?; + } + } + f.write_str(" >")? + } + Embed(a) => a.fmt(f)?, + Note(_, b) => b.fmt(f)?, + } + Ok(()) + } +} + +// There is a one-to-one correspondence between the formatter and the grammar. Each phase is +// named after a corresponding grammar group, and the structure of the formatter reflects +// the relationship between the corresponding grammar rules. This leads to the nice property +// of automatically getting all the parentheses and precedences right. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +enum PrintPhase { + Base, + Operator, + BinOp(core::BinOp), + App, + Import, + Primitive, +} + +// Wraps an Expr with a phase, so that phase selsction can be done +// separate from the actual printing +#[derive(Clone)] +struct PhasedExpr<'a, S, A>(&'a SubExpr<S, A>, PrintPhase); + +impl<'a, S: Clone, A: Display + Clone> Display for PhasedExpr<'a, S, A> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + self.0.as_ref().fmt_phase(f, self.1) + } +} + +impl<'a, S: Clone, A: Display + Clone> PhasedExpr<'a, S, A> { + fn phase(self, phase: PrintPhase) -> PhasedExpr<'a, S, A> { + PhasedExpr(self.0, phase) + } +} + +impl<S: Clone, A: Display + Clone> Expr<S, A> { + fn fmt_phase( + &self, + f: &mut fmt::Formatter, + mut phase: PrintPhase, + ) -> Result<(), fmt::Error> { + use crate::ExprF::*; + use PrintPhase::*; + + let needs_paren = match self { + Lam(_, _, _) + | BoolIf(_, _, _) + | Pi(_, _, _) + | Let(_, _, _, _) + | EmptyListLit(_) + | NEListLit(_) + | OldOptionalLit(_, _) + | SomeLit(_) + | Merge(_, _, _) + | Annot(_, _) + if phase > Base => + { + true + } + // Precedence is magically handled by the ordering of BinOps. + ExprF::BinOp(op, _, _) if phase > PrintPhase::BinOp(*op) => true, + ExprF::App(_, _) if phase > PrintPhase::App => true, + Field(_, _) | Projection(_, _) if phase > Import => true, + _ => false, + }; + + if needs_paren { + phase = Base; + } + + // Annotate subexpressions with the appropriate phase, defaulting to Base + let phased_self = match self.map_ref_simple(|e| PhasedExpr(e, Base)) { + Pi(a, b, c) => { + if &String::from(&a) == "_" { + Pi(a, b.phase(Operator), c) + } else { + Pi(a, b, c) + } + } + Merge(a, b, c) => Merge( + a.phase(Import), + b.phase(Import), + c.map(|x| x.phase(PrintPhase::App)), + ), + Annot(a, b) => Annot(a.phase(Operator), b), + ExprF::BinOp(op, a, b) => ExprF::BinOp( + op, + a.phase(PrintPhase::BinOp(op)), + b.phase(PrintPhase::BinOp(op)), + ), + EmptyListLit(t) => EmptyListLit(t.phase(Import)), + OldOptionalLit(x, t) => OldOptionalLit(x, t.phase(Import)), + SomeLit(e) => SomeLit(e.phase(Import)), + ExprF::App(f, a) => ExprF::App(f.phase(Import), a.phase(Import)), + Field(a, b) => Field(a.phase(Primitive), b), + Projection(e, ls) => Projection(e.phase(Primitive), ls), + Note(n, b) => Note(n, b.phase(phase)), + e => e, + }; + + if needs_paren { + f.write_str("(")?; + } + + // Uses the ExprF<PhasedExpr<_>, _, _, _> instance + phased_self.fmt(f)?; + + if needs_paren { + f.write_str(")")?; + } + Ok(()) + } +} + +impl<S: Clone, A: Display + Clone> Display for SubExpr<S, A> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + self.as_ref().fmt_phase(f, PrintPhase::Base) + } +} + +fn fmt_list<T, I, F>( + open: &str, + sep: &str, + close: &str, + it: I, + f: &mut fmt::Formatter, + func: F, +) -> Result<(), fmt::Error> +where + I: IntoIterator<Item = T>, + F: Fn(T, &mut fmt::Formatter) -> Result<(), fmt::Error>, +{ + f.write_str(open)?; + for (i, x) in it.into_iter().enumerate() { + if i > 0 { + f.write_str(sep)?; + } + func(x, f)?; + } + f.write_str(close) +} + +impl<SubExpr: Display + Clone> Display for InterpolatedText<SubExpr> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + f.write_str("\"")?; + for x in self.iter() { + match x { + InterpolatedTextContents::Text(a) => { + for c in a.chars() { + match c { + '\\' => f.write_str("\\\\"), + '"' => f.write_str("\\\""), + '$' => f.write_str("\\$"), + '\u{0008}' => f.write_str("\\b"), + '\u{000C}' => f.write_str("\\f"), + '\n' => f.write_str("\\n"), + '\r' => f.write_str("\\r"), + '\t' => f.write_str("\\t"), + c => write!(f, "{}", c), + }?; + } + } + InterpolatedTextContents::Expr(e) => { + f.write_str("${ ")?; + e.fmt(f)?; + f.write_str(" }")?; + } + } + } + f.write_str("\"")?; + Ok(()) + } +} + +impl Display for Const { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + <Self as fmt::Debug>::fmt(self, f) + } +} + +impl Display for BinOp { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::BinOp::*; + f.write_str(match self { + BoolOr => "||", + TextAppend => "++", + NaturalPlus => "+", + BoolAnd => "&&", + Combine => "/\\", + NaturalTimes => "*", + BoolEQ => "==", + BoolNE => "!=", + CombineTypes => "//\\\\", + ImportAlt => "?", + Prefer => "//", + ListAppend => "#", + }) + } +} + +impl Display for NaiveDouble { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let v = f64::from(*self); + if v == std::f64::INFINITY { + f.write_str("Infinity") + } else if v == std::f64::NEG_INFINITY { + f.write_str("-Infinity") + } else if v.is_nan() { + f.write_str("NaN") + } else { + let s = format!("{}", v); + if s.contains('e') || s.contains('.') { + f.write_str(&s) + } else { + write!(f, "{}.0", s) + } + } + } +} + +impl Display for Label { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + // TODO: distinguish between reserved and nonreserved locations for quoting builtins + let s = String::from(self); + let is_reserved = match s.as_str() { + "let" | "in" | "if" | "then" | "else" | "Type" | "Kind" + | "Sort" | "True" | "False" => true, + _ => crate::Builtin::parse(&s).is_some(), + }; + if !is_reserved && s.chars().all(|c| c.is_ascii_alphanumeric()) { + write!(f, "{}", s) + } else { + write!(f, "`{}`", s) + } + } +} + +impl Display for Hash { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + write!(f, "{}:{}", self.protocol, self.hash) + } +} +impl Display for ImportHashed { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use std::path::PathBuf; + use FilePrefix::*; + use ImportLocation::*; + let quoted_path_component = |s: &str| -> String { + if s.chars().all(|c| c.is_ascii_alphanumeric()) { + s.to_owned() + } else { + format!("\"{}\"", s) + } + }; + let fmt_path = |f: &mut fmt::Formatter, p: &PathBuf| { + let res: String = p + .iter() + .map(|c| quoted_path_component(c.to_string_lossy().as_ref())) + .join("/"); + f.write_str(&res) + }; + + match &self.location { + Local(prefix, path) => { + let prefix = match prefix { + Here => ".", + Parent => "..", + Home => "~", + Absolute => "", + }; + write!(f, "{}/", prefix)?; + fmt_path(f, path)?; + } + Remote(url) => { + write!(f, "{}://{}/", url.scheme, url.authority,)?; + fmt_path(f, &url.path)?; + if let Some(q) = &url.query { + write!(f, "?{}", q)? + } + if let Some(h) = &url.headers { + write!(f, " using ({})", h)? + } + } + Env(s) => { + write!(f, "env:")?; + if s.chars().all(|c| c.is_ascii_alphanumeric()) { + write!(f, "{}", s)?; + } else { + write!(f, "\"")?; + for c in s.chars() { + match c { + '"' => f.write_str("\\\"")?, + '\\' => f.write_str("\\\\")?, + '\u{0007}' => f.write_str("\\a")?, + '\u{0008}' => f.write_str("\\b")?, + '\u{000C}' => f.write_str("\\f")?, + '\n' => f.write_str("\\n")?, + '\r' => f.write_str("\\r")?, + '\t' => f.write_str("\\t")?, + '\u{000B}' => f.write_str("\\v")?, + _ => write!(f, "{}", c)?, + } + } + write!(f, "\"")?; + } + } + Missing => { + write!(f, "missing")?; + } + } + if let Some(hash) = &self.hash { + write!(f, " ")?; + hash.fmt(f)?; + } + Ok(()) + } +} + +impl Display for Import { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + self.location_hashed.fmt(f)?; + use ImportMode::*; + match self.mode { + Code => {} + RawText => write!(f, " as Text")?, + } + Ok(()) + } +} + +impl Display for Builtin { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Builtin::*; + f.write_str(match *self { + Bool => "Bool", + Natural => "Natural", + Integer => "Integer", + Double => "Double", + Text => "Text", + List => "List", + Optional => "Optional", + OptionalNone => "None", + NaturalBuild => "Natural/build", + NaturalFold => "Natural/fold", + NaturalIsZero => "Natural/isZero", + NaturalEven => "Natural/even", + NaturalOdd => "Natural/odd", + NaturalToInteger => "Natural/toInteger", + NaturalShow => "Natural/show", + IntegerToDouble => "Integer/toDouble", + IntegerShow => "Integer/show", + DoubleShow => "Double/show", + ListBuild => "List/build", + ListFold => "List/fold", + ListLength => "List/length", + ListHead => "List/head", + ListLast => "List/last", + ListIndexed => "List/indexed", + ListReverse => "List/reverse", + OptionalFold => "Optional/fold", + OptionalBuild => "Optional/build", + TextShow => "Text/show", + }) + } +} + +impl Display for Scheme { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Scheme::*; + f.write_str(match *self { + HTTP => "http", + HTTPS => "https", + }) + } +} + +impl<Label: Display> Display for V<Label> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let V(x, n) = self; + x.fmt(f)?; + if *n != 0 { + write!(f, "@{}", n)?; + } + Ok(()) + } +} + +impl Display for X { + fn fmt(&self, _: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match *self {} + } +} diff --git a/dhall_syntax/src/text.rs b/dhall_syntax/src/text.rs new file mode 100644 index 0000000..83643d9 --- /dev/null +++ b/dhall_syntax/src/text.rs @@ -0,0 +1,116 @@ +use std::iter::FromIterator; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub struct InterpolatedText<SubExpr> { + head: String, + tail: Vec<(SubExpr, String)>, +} + +impl<SubExpr> From<(String, Vec<(SubExpr, String)>)> + for InterpolatedText<SubExpr> +{ + fn from(x: (String, Vec<(SubExpr, String)>)) -> Self { + InterpolatedText { + head: x.0, + tail: x.1, + } + } +} + +impl<SubExpr> From<String> for InterpolatedText<SubExpr> { + fn from(s: String) -> Self { + InterpolatedText { + head: s, + tail: vec![], + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum InterpolatedTextContents<SubExpr> { + Text(String), + Expr(SubExpr), +} + +impl<SubExpr> InterpolatedTextContents<SubExpr> { + pub fn is_empty(&self) -> bool { + use InterpolatedTextContents::{Expr, Text}; + match self { + Expr(_) => false, + Text(s) => s.is_empty(), + } + } +} + +impl<SubExpr> InterpolatedText<SubExpr> { + pub fn traverse_ref<'a, SubExpr2, E, F>( + &'a self, + mut f: F, + ) -> Result<InterpolatedText<SubExpr2>, E> + where + F: FnMut(&'a SubExpr) -> Result<SubExpr2, E>, + { + Ok(InterpolatedText { + head: self.head.clone(), + tail: self + .tail + .iter() + .map(|(e, s)| Ok((f(e)?, s.clone()))) + .collect::<Result<_, _>>()?, + }) + } + + pub fn iter<'a>( + &'a self, + ) -> impl Iterator<Item = InterpolatedTextContents<SubExpr>> + 'a + where + SubExpr: Clone, + { + use std::iter::once; + use InterpolatedTextContents::{Expr, Text}; + once(Text(self.head.clone())) + .chain(self.tail.iter().flat_map(|(e, s)| { + once(Expr(SubExpr::clone(e))).chain(once(Text(s.clone()))) + })) + .filter(|c| !c.is_empty()) + } + + pub fn into_iter( + self, + ) -> impl Iterator<Item = InterpolatedTextContents<SubExpr>> { + use std::iter::once; + use InterpolatedTextContents::{Expr, Text}; + once(Text(self.head)) + .chain( + self.tail + .into_iter() + .flat_map(|(e, s)| once(Expr(e)).chain(once(Text(s)))), + ) + .filter(|c| !c.is_empty()) + } +} + +impl<SubExpr> FromIterator<InterpolatedTextContents<SubExpr>> + for InterpolatedText<SubExpr> +{ + fn from_iter<T>(iter: T) -> Self + where + T: IntoIterator<Item = InterpolatedTextContents<SubExpr>>, + { + let mut res = InterpolatedText { + head: String::new(), + tail: Vec::new(), + }; + let mut crnt_str = &mut res.head; + for x in iter.into_iter() { + match x { + InterpolatedTextContents::Text(s) => crnt_str.push_str(&s), + InterpolatedTextContents::Expr(e) => { + res.tail.push((e, String::new())); + crnt_str = &mut res.tail.last_mut().unwrap().1; + } + } + } + res + } +} diff --git a/dhall_syntax/src/visitor.rs b/dhall_syntax/src/visitor.rs new file mode 100644 index 0000000..caaefce --- /dev/null +++ b/dhall_syntax/src/visitor.rs @@ -0,0 +1,667 @@ +use std::collections::BTreeMap; + +use crate::*; + +/// A way too generic Visitor trait. +pub trait GenericVisitor<Input, Return>: Sized { + fn visit(self, input: Input) -> Return; +} + +/// A visitor trait that can be used to traverse `ExprF`s. We need this pattern +/// so that Rust lets us have as much mutability as we can. +/// For example, `traverse_embed` cannot be made using only `traverse_ref`, because +/// `traverse_ref` takes a `FnMut` so we would need to pass multiple mutable +/// reverences to this argument to `traverse_ref`. But Rust's ownership system +/// is all about preventing exactly this ! So we have to be more clever. +/// The visitor pattern allows us to have only one mutable thing the whole +/// time: the visitor itself. The visitor can then carry around multiple closures +/// or just one, and Rust is ok with either. See for example TraverseRefVisitor +/// and TraverseEmbedVisitor. +/// This is very generic. For a more legible trait, see ExprFInFallibleVisitor +pub trait ExprFVeryGenericVisitor<'a, Ret, SE1, L1, N1, E1>: Sized { + type Error; + type SE2; + type L2; + type N2; + type E2; + + fn visit_subexpr( + &mut self, + subexpr: &'a SE1, + ) -> Result<Self::SE2, Self::Error>; + fn visit_label(&mut self, label: &'a L1) -> Result<Self::L2, Self::Error>; + + fn visit_binder( + self, + label: &'a L1, + subexpr: &'a SE1, + ) -> Result<(Self::L2, Self::SE2), Self::Error>; + + fn visit_embed_squash(self, embed: &'a E1) -> Result<Ret, Self::Error>; + + fn visit_note_squash( + self, + note: &'a N1, + subexpr: &'a SE1, + ) -> Result<Ret, Self::Error>; + + // Called with the result of the map, in the non-embed/note case. + // Useful to change the result type, and/or avoid some loss of info + fn visit_resulting_exprf( + result: ExprF<Self::SE2, Self::L2, Self::N2, Self::E2>, + ) -> Result<Ret, Self::Error>; +} + +impl<'a, T, Ret, SE1, L1, N1, E1> + GenericVisitor<&'a ExprF<SE1, L1, N1, E1>, Result<Ret, T::Error>> for T +where + L1: Ord, + T::L2: Ord, + T: ExprFVeryGenericVisitor<'a, Ret, SE1, L1, N1, E1>, +{ + fn visit(self, input: &'a ExprF<SE1, L1, N1, E1>) -> Result<Ret, T::Error> { + fn vec<'a, T, U, Err, F: FnMut(&'a T) -> Result<U, Err>>( + x: &'a [T], + f: F, + ) -> Result<Vec<U>, Err> { + x.iter().map(f).collect() + } + fn opt<'a, T, U, Err, F: FnOnce(&'a T) -> Result<U, Err>>( + x: &'a Option<T>, + f: F, + ) -> Result<Option<U>, Err> { + Ok(match x { + Some(x) => Some(f(x)?), + None => None, + }) + } + fn btmap<'a, V, Ret, SE, L, N, E>( + x: &'a BTreeMap<L, SE>, + mut v: V, + ) -> Result<BTreeMap<V::L2, V::SE2>, V::Error> + where + L: Ord, + V::L2: Ord, + V: ExprFVeryGenericVisitor<'a, Ret, SE, L, N, E>, + { + x.iter() + .map(|(k, x)| Ok((v.visit_label(k)?, v.visit_subexpr(x)?))) + .collect() + } + fn btoptmap<'a, V, Ret, SE, L, N, E>( + x: &'a BTreeMap<L, Option<SE>>, + mut v: V, + ) -> Result<BTreeMap<V::L2, Option<V::SE2>>, V::Error> + where + L: Ord, + V::L2: Ord, + V: ExprFVeryGenericVisitor<'a, Ret, SE, L, N, E>, + { + x.iter() + .map(|(k, x)| { + Ok(( + v.visit_label(k)?, + match x { + Some(x) => Some(v.visit_subexpr(x)?), + None => None, + }, + )) + }) + .collect() + } + + let mut v = self; + use crate::ExprF::*; + T::visit_resulting_exprf(match input { + Var(V(l, n)) => Var(V(v.visit_label(l)?, *n)), + Lam(l, t, e) => { + let t = v.visit_subexpr(t)?; + let (l, e) = v.visit_binder(l, e)?; + Lam(l, t, e) + } + Pi(l, t, e) => { + let t = v.visit_subexpr(t)?; + let (l, e) = v.visit_binder(l, e)?; + Pi(l, t, e) + } + Let(l, t, a, e) => { + let t = opt(t, &mut |e| v.visit_subexpr(e))?; + let a = v.visit_subexpr(a)?; + let (l, e) = v.visit_binder(l, e)?; + Let(l, t, a, e) + } + App(f, a) => App(v.visit_subexpr(f)?, v.visit_subexpr(a)?), + Annot(x, t) => Annot(v.visit_subexpr(x)?, v.visit_subexpr(t)?), + Const(k) => Const(*k), + Builtin(v) => Builtin(*v), + BoolLit(b) => BoolLit(*b), + NaturalLit(n) => NaturalLit(*n), + IntegerLit(n) => IntegerLit(*n), + DoubleLit(n) => DoubleLit(*n), + TextLit(t) => TextLit(t.traverse_ref(|e| v.visit_subexpr(e))?), + BinOp(o, x, y) => { + BinOp(*o, v.visit_subexpr(x)?, v.visit_subexpr(y)?) + } + BoolIf(b, t, f) => BoolIf( + v.visit_subexpr(b)?, + v.visit_subexpr(t)?, + v.visit_subexpr(f)?, + ), + EmptyListLit(t) => EmptyListLit(v.visit_subexpr(t)?), + NEListLit(es) => NEListLit(vec(es, |e| v.visit_subexpr(e))?), + OldOptionalLit(x, t) => OldOptionalLit( + opt(x, |e| v.visit_subexpr(e))?, + v.visit_subexpr(t)?, + ), + SomeLit(e) => SomeLit(v.visit_subexpr(e)?), + RecordType(kts) => RecordType(btmap(kts, v)?), + RecordLit(kvs) => RecordLit(btmap(kvs, v)?), + UnionType(kts) => UnionType(btoptmap(kts, v)?), + UnionLit(k, x, kts) => UnionLit( + v.visit_label(k)?, + v.visit_subexpr(x)?, + btoptmap(kts, v)?, + ), + Merge(x, y, t) => Merge( + v.visit_subexpr(x)?, + v.visit_subexpr(y)?, + opt(t, |e| v.visit_subexpr(e))?, + ), + Field(e, l) => Field(v.visit_subexpr(e)?, v.visit_label(l)?), + Projection(e, ls) => { + Projection(v.visit_subexpr(e)?, vec(ls, |l| v.visit_label(l))?) + } + Note(n, e) => return v.visit_note_squash(n, e), + Embed(a) => return v.visit_embed_squash(a), + }) + } +} + +/// Like ExprFVeryGenericVisitor, but sets the return +/// type to ExprF<_> +pub trait ExprFFallibleVisitor<'a, SE1, SE2, L1, L2, N1, N2, E1, E2>: + Sized +{ + type Error; + + fn visit_subexpr(&mut self, subexpr: &'a SE1) -> Result<SE2, Self::Error>; + fn visit_label(&mut self, label: &'a L1) -> Result<L2, Self::Error>; + fn visit_note(self, note: &'a N1) -> Result<N2, Self::Error>; + fn visit_embed(self, embed: &'a E1) -> Result<E2, Self::Error>; + + fn visit_subexpr_under_binder( + mut self, + _label: &'a L1, + subexpr: &'a SE1, + ) -> Result<SE2, Self::Error> { + self.visit_subexpr(subexpr) + } + + fn visit_binder( + mut self, + label: &'a L1, + subexpr: &'a SE1, + ) -> Result<(L2, SE2), Self::Error> { + Ok(( + self.visit_label(label)?, + self.visit_subexpr_under_binder(label, subexpr)?, + )) + } + + fn visit_embed_squash( + self, + embed: &'a E1, + ) -> Result<ExprF<SE2, L2, N2, E2>, Self::Error> { + Ok(ExprF::Embed(self.visit_embed(embed)?)) + } + + fn visit_note_squash( + mut self, + note: &'a N1, + subexpr: &'a SE1, + ) -> Result<ExprF<SE2, L2, N2, E2>, Self::Error> { + let subexpr = self.visit_subexpr(subexpr)?; + let note = self.visit_note(note)?; + Ok(ExprF::Note(note, subexpr)) + } +} + +impl<'a, T, SE1, SE2, L1, L2, N1, N2, E1, E2> + ExprFVeryGenericVisitor<'a, ExprF<SE2, L2, N2, E2>, SE1, L1, N1, E1> for T +where + T: ExprFFallibleVisitor<'a, SE1, SE2, L1, L2, N1, N2, E1, E2>, +{ + type Error = T::Error; + type SE2 = SE2; + type L2 = L2; + type N2 = N2; + type E2 = E2; + + fn visit_subexpr( + &mut self, + subexpr: &'a SE1, + ) -> Result<Self::SE2, Self::Error> { + self.visit_subexpr(subexpr) + } + + fn visit_label(&mut self, label: &'a L1) -> Result<Self::L2, Self::Error> { + self.visit_label(label) + } + + fn visit_binder( + self, + label: &'a L1, + subexpr: &'a SE1, + ) -> Result<(Self::L2, Self::SE2), Self::Error> { + self.visit_binder(label, subexpr) + } + + fn visit_embed_squash( + self, + embed: &'a E1, + ) -> Result<ExprF<SE2, L2, N2, E2>, Self::Error> { + self.visit_embed_squash(embed) + } + + fn visit_note_squash( + self, + note: &'a N1, + subexpr: &'a SE1, + ) -> Result<ExprF<SE2, L2, N2, E2>, Self::Error> { + self.visit_note_squash(note, subexpr) + } + + // Called with the result of the map, in the non-embed/note case. + // Useful to change the result type, and/or avoid some loss of info + fn visit_resulting_exprf( + result: ExprF<Self::SE2, Self::L2, Self::N2, Self::E2>, + ) -> Result<ExprF<SE2, L2, N2, E2>, Self::Error> { + Ok(result) + } +} + +/// Like ExprFFallibleVisitor, but without the error handling. +pub trait ExprFInFallibleVisitor<'a, SE1, SE2, L1, L2, N1, N2, E1, E2>: + Sized +{ + fn visit_subexpr(&mut self, subexpr: &'a SE1) -> SE2; + fn visit_label(&mut self, label: &'a L1) -> L2; + fn visit_note(self, note: &'a N1) -> N2; + fn visit_embed(self, embed: &'a E1) -> E2; + + fn visit_subexpr_under_binder( + mut self, + _label: &'a L1, + subexpr: &'a SE1, + ) -> SE2 { + self.visit_subexpr(subexpr) + } + + fn visit_binder(mut self, label: &'a L1, subexpr: &'a SE1) -> (L2, SE2) { + ( + self.visit_label(label), + self.visit_subexpr_under_binder(label, subexpr), + ) + } + + fn visit_embed_squash(self, embed: &'a E1) -> ExprF<SE2, L2, N2, E2> { + ExprF::Embed(self.visit_embed(embed)) + } + + fn visit_note_squash( + mut self, + note: &'a N1, + subexpr: &'a SE1, + ) -> ExprF<SE2, L2, N2, E2> { + let subexpr = self.visit_subexpr(subexpr); + let note = self.visit_note(note); + ExprF::Note(note, subexpr) + } +} + +struct InfallibleWrapper<T>(T); + +impl<'a, T, SE1, SE2, L1, L2, N1, N2, E1, E2> + ExprFFallibleVisitor<'a, SE1, SE2, L1, L2, N1, N2, E1, E2> + for InfallibleWrapper<T> +where + T: ExprFInFallibleVisitor<'a, SE1, SE2, L1, L2, N1, N2, E1, E2>, +{ + type Error = X; + + fn visit_subexpr(&mut self, subexpr: &'a SE1) -> Result<SE2, Self::Error> { + Ok(self.0.visit_subexpr(subexpr)) + } + fn visit_label(&mut self, label: &'a L1) -> Result<L2, Self::Error> { + Ok(self.0.visit_label(label)) + } + fn visit_note(self, note: &'a N1) -> Result<N2, Self::Error> { + Ok(self.0.visit_note(note)) + } + fn visit_embed(self, embed: &'a E1) -> Result<E2, Self::Error> { + Ok(self.0.visit_embed(embed)) + } + + fn visit_binder( + self, + label: &'a L1, + subexpr: &'a SE1, + ) -> Result<(L2, SE2), Self::Error> { + Ok(self.0.visit_binder(label, subexpr)) + } + + fn visit_embed_squash( + self, + embed: &'a E1, + ) -> Result<ExprF<SE2, L2, N2, E2>, Self::Error> { + Ok(self.0.visit_embed_squash(embed)) + } + + fn visit_note_squash( + self, + note: &'a N1, + subexpr: &'a SE1, + ) -> Result<ExprF<SE2, L2, N2, E2>, Self::Error> { + Ok(self.0.visit_note_squash(note, subexpr)) + } +} + +impl<'a, T, SE1, SE2, L1, L2, N1, N2, E1, E2> + GenericVisitor<&'a ExprF<SE1, L1, N1, E1>, ExprF<SE2, L2, N2, E2>> for T +where + L1: Ord, + L2: Ord, + T: ExprFInFallibleVisitor<'a, SE1, SE2, L1, L2, N1, N2, E1, E2>, +{ + fn visit( + self, + input: &'a ExprF<SE1, L1, N1, E1>, + ) -> ExprF<SE2, L2, N2, E2> { + trivial_result(InfallibleWrapper(self).visit(input)) + } +} + +pub struct TraverseRefWithBindersVisitor<F1, F2, F3, F4, F5> { + pub visit_subexpr: F1, + pub visit_under_binder: F2, + pub visit_note: F3, + pub visit_embed: F4, + pub visit_label: F5, +} + +impl<'a, SE, L, N, E, SE2, L2, N2, E2, Err, F1, F2, F3, F4, F5> + ExprFFallibleVisitor<'a, SE, SE2, L, L2, N, N2, E, E2> + for TraverseRefWithBindersVisitor<F1, F2, F3, F4, F5> +where + SE: 'a, + L: 'a, + N: 'a, + E: 'a, + L: Ord, + L2: Ord, + F1: FnMut(&'a SE) -> Result<SE2, Err>, + F2: FnOnce(&'a L, &'a SE) -> Result<SE2, Err>, + F3: FnOnce(&'a N) -> Result<N2, Err>, + F4: FnOnce(&'a E) -> Result<E2, Err>, + F5: FnMut(&'a L) -> Result<L2, Err>, +{ + type Error = Err; + + fn visit_subexpr(&mut self, subexpr: &'a SE) -> Result<SE2, Self::Error> { + (self.visit_subexpr)(subexpr) + } + fn visit_subexpr_under_binder( + self, + label: &'a L, + subexpr: &'a SE, + ) -> Result<SE2, Self::Error> { + (self.visit_under_binder)(label, subexpr) + } + fn visit_note(self, note: &'a N) -> Result<N2, Self::Error> { + (self.visit_note)(note) + } + fn visit_embed(self, embed: &'a E) -> Result<E2, Self::Error> { + (self.visit_embed)(embed) + } + fn visit_label(&mut self, label: &'a L) -> Result<L2, Self::Error> { + (self.visit_label)(label) + } +} + +pub struct TraverseRefVisitor<F1, F2, F3, F4> { + pub visit_subexpr: F1, + pub visit_note: F2, + pub visit_embed: F3, + pub visit_label: F4, +} + +impl<'a, SE, L, N, E, SE2, L2, N2, E2, Err, F1, F2, F3, F4> + ExprFFallibleVisitor<'a, SE, SE2, L, L2, N, N2, E, E2> + for TraverseRefVisitor<F1, F2, F3, F4> +where + SE: 'a, + L: 'a, + N: 'a, + E: 'a, + L: Ord, + L2: Ord, + F1: FnMut(&'a SE) -> Result<SE2, Err>, + F2: FnOnce(&'a N) -> Result<N2, Err>, + F3: FnOnce(&'a E) -> Result<E2, Err>, + F4: FnMut(&'a L) -> Result<L2, Err>, +{ + type Error = Err; + + fn visit_subexpr(&mut self, subexpr: &'a SE) -> Result<SE2, Self::Error> { + (self.visit_subexpr)(subexpr) + } + fn visit_note(self, note: &'a N) -> Result<N2, Self::Error> { + (self.visit_note)(note) + } + fn visit_embed(self, embed: &'a E) -> Result<E2, Self::Error> { + (self.visit_embed)(embed) + } + fn visit_label(&mut self, label: &'a L) -> Result<L2, Self::Error> { + (self.visit_label)(label) + } +} + +pub struct TraverseEmbedVisitor<F1>(pub F1); + +impl<'a, 'b, N, E, E2, Err, F1> + ExprFFallibleVisitor< + 'a, + SubExpr<N, E>, + SubExpr<N, E2>, + Label, + Label, + N, + N, + E, + E2, + > for &'b mut TraverseEmbedVisitor<F1> +where + N: Clone + 'a, + F1: FnMut(&E) -> Result<E2, Err>, +{ + type Error = Err; + + fn visit_subexpr( + &mut self, + subexpr: &'a SubExpr<N, E>, + ) -> Result<SubExpr<N, E2>, Self::Error> { + Ok(rc(subexpr.as_ref().visit(&mut **self)?)) + } + fn visit_note(self, note: &'a N) -> Result<N, Self::Error> { + Ok(N::clone(note)) + } + fn visit_embed(self, embed: &'a E) -> Result<E2, Self::Error> { + (self.0)(embed) + } + fn visit_label(&mut self, label: &'a Label) -> Result<Label, Self::Error> { + Ok(Label::clone(label)) + } +} + +pub struct SquashEmbedVisitor<F1>(pub F1); + +impl<'a, 'b, N, E1, E2, F1> + ExprFVeryGenericVisitor<'a, SubExpr<N, E2>, SubExpr<N, E1>, Label, N, E1> + for &'b mut SquashEmbedVisitor<F1> +where + N: Clone + 'a, + F1: FnMut(&E1) -> SubExpr<N, E2>, +{ + type Error = X; + type SE2 = SubExpr<N, E2>; + type L2 = Label; + type N2 = N; + type E2 = E2; + + fn visit_subexpr( + &mut self, + subexpr: &'a SubExpr<N, E1>, + ) -> Result<Self::SE2, Self::Error> { + Ok(subexpr.as_ref().visit(&mut **self)?) + } + + fn visit_label( + &mut self, + label: &'a Label, + ) -> Result<Self::L2, Self::Error> { + Ok(Label::clone(label)) + } + + fn visit_binder( + mut self, + label: &'a Label, + subexpr: &'a SubExpr<N, E1>, + ) -> Result<(Self::L2, Self::SE2), Self::Error> { + Ok((self.visit_label(label)?, self.visit_subexpr(subexpr)?)) + } + + fn visit_embed_squash( + self, + embed: &'a E1, + ) -> Result<SubExpr<N, E2>, Self::Error> { + Ok((self.0)(embed)) + } + + fn visit_note_squash( + mut self, + note: &'a N, + subexpr: &'a SubExpr<N, E1>, + ) -> Result<SubExpr<N, E2>, Self::Error> { + let subexpr = self.visit_subexpr(subexpr)?; + let note = N::clone(note); + Ok(rc(ExprF::Note(note, subexpr))) + } + + // Called with the result of the map, in the non-embed/note case. + // Useful to change the result type, and/or avoid some loss of info + fn visit_resulting_exprf( + result: ExprF<Self::SE2, Self::L2, Self::N2, Self::E2>, + ) -> Result<SubExpr<N, E2>, Self::Error> { + Ok(rc(result)) + } +} + +pub struct UnNoteVisitor; + +impl<'a, 'b, N, E> + ExprFInFallibleVisitor< + 'a, + SubExpr<N, E>, + SubExpr<X, E>, + Label, + Label, + N, + X, + E, + E, + > for &'b mut UnNoteVisitor +where + E: Clone + 'a, +{ + fn visit_subexpr(&mut self, subexpr: &'a SubExpr<N, E>) -> SubExpr<X, E> { + rc(subexpr.as_ref().visit(&mut **self)) + } + fn visit_note(self, _: &'a N) -> X { + unreachable!() + } + fn visit_note_squash( + self, + _: &'a N, + subexpr: &'a SubExpr<N, E>, + ) -> Expr<X, E> { + subexpr.as_ref().visit(self) + } + fn visit_embed(self, embed: &'a E) -> E { + E::clone(embed) + } + fn visit_label(&mut self, label: &'a Label) -> Label { + Label::clone(label) + } +} + +pub struct NoteAbsurdVisitor; + +impl<'a, 'b, N, E> + ExprFInFallibleVisitor< + 'a, + SubExpr<X, E>, + SubExpr<N, E>, + Label, + Label, + X, + N, + E, + E, + > for &'b mut NoteAbsurdVisitor +where + E: Clone + 'a, +{ + fn visit_subexpr(&mut self, subexpr: &'a SubExpr<X, E>) -> SubExpr<N, E> { + rc(subexpr.as_ref().visit(&mut **self)) + } + fn visit_note(self, note: &'a X) -> N { + match *note {} + } + fn visit_embed(self, embed: &'a E) -> E { + E::clone(embed) + } + fn visit_label(&mut self, label: &'a Label) -> Label { + Label::clone(label) + } +} + +pub struct EmbedAbsurdVisitor; + +impl<'a, 'b, N, E> + ExprFInFallibleVisitor< + 'a, + SubExpr<N, X>, + SubExpr<N, E>, + Label, + Label, + N, + N, + X, + E, + > for &'b mut EmbedAbsurdVisitor +where + N: Clone + 'a, +{ + fn visit_subexpr(&mut self, subexpr: &'a SubExpr<N, X>) -> SubExpr<N, E> { + rc(subexpr.as_ref().visit(&mut **self)) + } + fn visit_note(self, note: &'a N) -> N { + N::clone(note) + } + fn visit_embed(self, embed: &'a X) -> E { + match *embed {} + } + fn visit_label(&mut self, label: &'a Label) -> Label { + Label::clone(label) + } +} -- cgit v1.2.3