diff options
author | Nadrieril | 2019-03-06 12:19:26 +0100 |
---|---|---|
committer | Nadrieril | 2019-03-06 12:19:26 +0100 |
commit | 564a5f37b106c69d8ebe9aec2f665f5222b3dfda (patch) | |
tree | 21779d5ea3bc5daaec7fa214402868cde101cfce /dhall_core/src | |
parent | 5b32c5bdea80a0bdf19240f3cc2b8e2ae251d51a (diff) |
Split-off core into its own crate
Diffstat (limited to 'dhall_core/src')
-rw-r--r-- | dhall_core/src/core.rs | 1205 | ||||
-rw-r--r-- | dhall_core/src/grammar.lalrpop | 164 | ||||
-rw-r--r-- | dhall_core/src/grammar_util.rs | 7 | ||||
-rw-r--r-- | dhall_core/src/lexer.rs | 394 | ||||
-rw-r--r-- | dhall_core/src/lib.rs | 10 | ||||
-rw-r--r-- | dhall_core/src/parser.rs | 827 |
6 files changed, 2607 insertions, 0 deletions
diff --git a/dhall_core/src/core.rs b/dhall_core/src/core.rs new file mode 100644 index 0000000..340cb04 --- /dev/null +++ b/dhall_core/src/core.rs @@ -0,0 +1,1205 @@ +#![allow(non_snake_case)] +use std::collections::BTreeMap; +use std::fmt::{self, Display}; +use std::path::PathBuf; + +/// Constants for a pure type system +/// +/// The only axiom is: +/// +/// ```c +/// ⊦ Type : Kind +/// ``` +/// +/// ... and the valid rule pairs are: +/// +/// ```c +/// ⊦ Type ↝ Type : Type -- Functions from terms to terms (ordinary functions) +/// ⊦ Kind ↝ Type : Type -- Functions from types to terms (polymorphic functions) +/// ⊦ Kind ↝ Kind : Kind -- Functions from types to types (type constructors) +/// ``` +/// +/// These are the same rule pairs as System Fω +/// +/// Note that Dhall does not support functions from terms to types and therefore +/// Dhall is not a dependently typed language +/// +#[derive(Debug, Copy, Clone, PartialEq, Eq)] // (Show, Bounded, Enum) +pub enum Const { + Type, + Kind, +} + +/// Path to an external resource +#[derive(Debug, Clone, PartialEq, Eq)] // (Eq, Ord, Show) +pub enum Path { + File(PathBuf), + URL(String), +} + +/// Label for a bound variable +/// +/// The `String` field is the variable's name (i.e. \"`x`\"). +/// +/// The `Int` field disambiguates variables with the same name if there are +/// multiple bound variables of the same name in scope. Zero refers to the +/// nearest bound variable and the index increases by one for each bound +/// variable of the same name going outward. The following diagram may help: +/// +/// ```c +/// +---refers to--+ +/// | | +/// v | +/// \(x : Type) -> \(y : Type) -> \(x : Type) -> x@0 +/// +/// +------------------refers to-----------------+ +/// | | +/// v | +/// \(x : Type) -> \(y : Type) -> \(x : Type) -> x@1 +/// ``` +/// +/// This `Int` behaves like a De Bruijn index in the special case where all +/// variables have the same name. +/// +/// You can optionally omit the index if it is `0`: +/// +/// ```c +/// +refers to+ +/// | | +/// v | +/// \(x : *) -> \(y : *) -> \(x : *) -> x +/// ``` +/// +/// Zero indices are omitted when pretty-printing `Var`s and non-zero indices +/// appear as a numeric suffix. +/// +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct V<'i>(pub &'i str, pub usize); + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum BinOp { + /// x && y` + BoolAnd, + /// x || y` + BoolOr, + /// x == y` + BoolEQ, + /// x != y` + BoolNE, + /// x + y` + NaturalPlus, + /// x * y` + NaturalTimes, + /// x ++ y` + TextAppend, + /// x ∧ y` + Combine, + /// x //\\ y + CombineTypes, + /// x ? y + ImportAlt, + /// x // y + Prefer, + /// x # y + ListAppend, +} + +/// Syntax tree for expressions +#[derive(Debug, Clone, PartialEq)] +pub enum Expr<'i, S, A> { + /// `Const c ~ c` + Const(Const), + /// `Var (V x 0) ~ x`<br> + /// `Var (V x n) ~ x@n` + Var(V<'i>), + /// `Lam x A b ~ λ(x : A) -> b` + Lam(&'i str, Box<Expr<'i, S, A>>, Box<Expr<'i, S, A>>), + /// `Pi "_" A B ~ A -> B` + /// `Pi x A B ~ ∀(x : A) -> B` + Pi(&'i str, Box<Expr<'i, S, A>>, Box<Expr<'i, S, A>>), + /// `App f A ~ f A` + App(Box<Expr<'i, S, A>>, Box<Expr<'i, S, A>>), + /// `Let x Nothing r e ~ let x = r in e` + /// `Let x (Just t) r e ~ let x : t = r in e` + Let( + &'i str, + Option<Box<Expr<'i, S, A>>>, + Box<Expr<'i, S, A>>, + Box<Expr<'i, S, A>>, + ), + /// `Annot x t ~ x : t` + Annot(Box<Expr<'i, S, A>>, Box<Expr<'i, S, A>>), + /// Built-in values + Builtin(Builtin), + // Binary operations + BinOp(BinOp, Box<Expr<'i, S, A>>, Box<Expr<'i, S, A>>), + /// `BoolLit b ~ b` + BoolLit(bool), + /// `BoolIf x y z ~ if x then y else z` + BoolIf( + Box<Expr<'i, S, A>>, + Box<Expr<'i, S, A>>, + Box<Expr<'i, S, A>>, + ), + /// `NaturalLit n ~ +n` + NaturalLit(Natural), + /// `IntegerLit n ~ n` + IntegerLit(Integer), + /// `DoubleLit n ~ n` + DoubleLit(Double), + /// `TextLit t ~ t` + TextLit(Builder), + /// `ListLit t [x, y, z] ~ [x, y, z] : List t` + ListLit(Option<Box<Expr<'i, S, A>>>, Vec<Expr<'i, S, A>>), + /// `OptionalLit t [e] ~ [e] : Optional t` + /// `OptionalLit t [] ~ [] : Optional t` + OptionalLit(Option<Box<Expr<'i, S, A>>>, Vec<Expr<'i, S, A>>), + /// `Record [(k1, t1), (k2, t2)] ~ { k1 : t1, k2 : t1 }` + Record(BTreeMap<&'i str, Expr<'i, S, A>>), + /// `RecordLit [(k1, v1), (k2, v2)] ~ { k1 = v1, k2 = v2 }` + RecordLit(BTreeMap<&'i str, Expr<'i, S, A>>), + /// `Union [(k1, t1), (k2, t2)] ~ < k1 : t1, k2 : t2 >` + Union(BTreeMap<&'i str, Expr<'i, S, A>>), + /// `UnionLit (k1, v1) [(k2, t2), (k3, t3)] ~ < k1 = t1, k2 : t2, k3 : t3 >` + UnionLit( + &'i str, + Box<Expr<'i, S, A>>, + BTreeMap<&'i str, Expr<'i, S, A>>, + ), + /// `Merge x y t ~ merge x y : t` + Merge( + Box<Expr<'i, S, A>>, + Box<Expr<'i, S, A>>, + Option<Box<Expr<'i, S, A>>>, + ), + /// `Field e x ~ e.x` + Field(Box<Expr<'i, S, A>>, &'i str), + /// `Note S x ~ e` + Note(S, Box<Expr<'i, S, A>>), + /// `Embed path ~ path` + Embed(A), + + FailedParse(String, Vec<Expr<'i, S, A>>), +} + +/// Built-ins +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum Builtin { + Bool, + Natural, + Integer, + Double, + Text, + List, + Optional, + NaturalBuild, + NaturalFold, + NaturalIsZero, + NaturalEven, + NaturalOdd, + NaturalToInteger, + NaturalShow, + IntegerToDouble, + IntegerShow, + DoubleShow, + ListBuild, + ListFold, + ListLength, + ListHead, + ListLast, + ListIndexed, + ListReverse, + OptionalFold, + OptionalBuild, + TextShow, +} + +impl<'i> From<&'i str> for V<'i> { + fn from(s: &'i str) -> Self { + V(s, 0) + } +} + +impl<'i, S, A> From<&'i str> for Expr<'i, S, A> { + fn from(s: &'i str) -> Self { + Expr::Var(s.into()) + } +} + +impl<'i, S, A> From<Builtin> for Expr<'i, S, A> { + fn from(t: Builtin) -> Self { + Expr::Builtin(t) + } +} + +impl<'i, S, A> Expr<'i, S, A> { + fn bool_lit(&self) -> Option<bool> { + match *self { + Expr::BoolLit(v) => Some(v), + _ => None, + } + } + + fn natural_lit(&self) -> Option<usize> { + match *self { + Expr::NaturalLit(v) => Some(v), + _ => None, + } + } + + fn text_lit(&self) -> Option<String> { + match *self { + Expr::TextLit(ref t) => Some(t.clone()), // FIXME? + _ => None, + } + } +} + +// There is a one-to-one correspondence between the formatters in this section +// and the grammar in grammar.lalrpop. Each formatter is named after the +// corresponding grammar rule and the relationship between formatters exactly matches +// the relationship between grammar rules. This leads to the nice emergent property +// of automatically getting all the parentheses and precedences right. +// +// This approach has one major disadvantage: you can get an infinite loop if +// you add a new constructor to the syntax tree without adding a matching +// case the corresponding builder. + +impl<'i, S, A: Display> Display for Expr<'i, S, A> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + // buildExprA + use crate::Expr::*; + match self { + &Annot(ref a, ref b) => { + a.fmt_b(f)?; + write!(f, " : ")?; + b.fmt(f) + } + &Note(_, ref b) => b.fmt(f), + a => a.fmt_b(f), + } + } +} + +impl<'i, S, A: Display> Expr<'i, S, A> { + fn fmt_b(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Expr::*; + match self { + &Lam(a, ref b, ref c) => { + write!(f, "λ({} : ", a)?; + b.fmt(f)?; + write!(f, ") → ")?; + c.fmt_b(f) + } + &BoolIf(ref a, ref b, ref c) => { + write!(f, "if ")?; + a.fmt(f)?; + write!(f, " then ")?; + b.fmt_b(f)?; + write!(f, " else ")?; + c.fmt_c(f) + } + &Pi("_", ref b, ref c) => { + b.fmt_c(f)?; + write!(f, " → ")?; + c.fmt_b(f) + } + &Pi(a, ref b, ref c) => { + write!(f, "∀({} : ", a)?; + b.fmt(f)?; + write!(f, ") → ")?; + c.fmt_b(f) + } + &Let(a, None, ref c, ref d) => { + write!(f, "let {} = ", a)?; + c.fmt(f)?; + write!(f, ") → ")?; + d.fmt_b(f) + } + &Let(a, Some(ref b), ref c, ref d) => { + write!(f, "let {} : ", a)?; + b.fmt(f)?; + write!(f, " = ")?; + c.fmt(f)?; + write!(f, ") → ")?; + d.fmt_b(f) + } + &ListLit(ref t, ref es) => { + fmt_list("[", "]", es, f, |e, f| e.fmt(f))?; + match t { + Some(t) => { + write!(f, " : List ")?; + t.fmt_e(f)? + } + None => {} + } + Ok(()) + } + &OptionalLit(ref t, ref es) => { + fmt_list("[", "]", es, f, |e, f| e.fmt(f))?; + match t { + Some(t) => { + write!(f, " : Optional ")?; + t.fmt_e(f)? + } + None => {} + } + Ok(()) + } + &Merge(ref a, ref b, ref c) => { + write!(f, "merge ")?; + a.fmt_e(f)?; + write!(f, " ")?; + b.fmt_e(f)?; + match c { + Some(c) => { + write!(f, " : ")?; + c.fmt_d(f)? + } + None => {} + } + Ok(()) + } + &Note(_, ref b) => b.fmt_b(f), + a => a.fmt_c(f), + } + } + + fn fmt_c(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::BinOp::*; + use crate::Expr::*; + match self { + // FIXME precedence + &BinOp(BoolOr, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" || ")?; + b.fmt_c(f) + } + &BinOp(TextAppend, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" ++ ")?; + b.fmt_c(f) + } + &BinOp(NaturalPlus, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" + ")?; + b.fmt_c(f) + } + &BinOp(BoolAnd, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" && ")?; + b.fmt_c(f) + } + &BinOp(Combine, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" ^ ")?; + b.fmt_c(f) + } + &BinOp(NaturalTimes, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" * ")?; + b.fmt_c(f) + } + &BinOp(BoolEQ, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" == ")?; + b.fmt_c(f) + } + &BinOp(BoolNE, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" != ")?; + b.fmt_c(f) + } + &Note(_, ref b) => b.fmt_c(f), + a => a.fmt_d(f), + } + } + + fn fmt_d(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Expr::*; + match self { + &App(ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" ")?; + b.fmt_e(f) + } + &Note(_, ref b) => b.fmt_d(f), + a => a.fmt_e(f), + } + } + + fn fmt_e(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Expr::*; + match self { + &Field(ref a, b) => { + a.fmt_e(f)?; + write!(f, ".{}", b) + } + &Note(_, ref b) => b.fmt_e(f), + a => a.fmt_f(f), + } + } + + fn fmt_f(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Expr::*; + match self { + &Var(a) => a.fmt(f), + &Const(k) => k.fmt(f), + &Builtin(v) => v.fmt(f), + &BoolLit(true) => f.write_str("True"), + &BoolLit(false) => f.write_str("False"), + &IntegerLit(a) => a.fmt(f), + &NaturalLit(a) => { + f.write_str("+")?; + a.fmt(f) + } + &DoubleLit(a) => a.fmt(f), + &TextLit(ref a) => <String as fmt::Debug>::fmt(a, f), // FIXME Format with Haskell escapes + &Record(ref a) if a.is_empty() => f.write_str("{}"), + &Record(ref a) => fmt_list("{ ", " }", a, f, |(k, t), f| { + write!(f, "{} : {}", k, t) + }), + &RecordLit(ref a) if a.is_empty() => f.write_str("{=}"), + &RecordLit(ref a) => fmt_list("{ ", " }", a, f, |(k, v), f| { + write!(f, "{} = {}", k, v) + }), + &Union(ref _a) => f.write_str("Union"), + &UnionLit(_a, ref _b, ref _c) => f.write_str("UnionLit"), + &Embed(ref a) => a.fmt(f), + &Note(_, ref b) => b.fmt_f(f), + a => write!(f, "({})", a), + } + } +} + +fn fmt_list<T, I, F>( + open: &str, + close: &str, + it: I, + f: &mut fmt::Formatter, + func: F, +) -> Result<(), fmt::Error> +where + I: IntoIterator<Item = T>, + F: Fn(T, &mut fmt::Formatter) -> Result<(), fmt::Error>, +{ + f.write_str(open)?; + for (i, x) in it.into_iter().enumerate() { + if i > 0 { + f.write_str(", ")?; + } + func(x, f)?; + } + f.write_str(close) +} + +impl Display for Const { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + <Self as fmt::Debug>::fmt(self, f) + } +} + +impl Display for Builtin { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Builtin::*; + f.write_str(match *self { + Bool => "Bool", + Natural => "Natural", + Integer => "Integer", + Double => "Double", + Text => "Text", + List => "List", + Optional => "Optional", + NaturalBuild => "Natural/build", + NaturalFold => "Natural/fold", + NaturalIsZero => "Natural/isZero", + NaturalEven => "Natural/even", + NaturalOdd => "Natural/odd", + NaturalToInteger => "Natural/toInteger", + NaturalShow => "Natural/show", + IntegerToDouble => "Integer/toDouble", + IntegerShow => "Integer/show", + DoubleShow => "Double/show", + ListBuild => "List/build", + ListFold => "List/fold", + ListLength => "List/length", + ListHead => "List/head", + ListLast => "List/last", + ListIndexed => "List/indexed", + ListReverse => "List/reverse", + OptionalFold => "Optional/fold", + OptionalBuild => "Optional/build", + TextShow => "Text/show", + }) + } +} + +impl Builtin { + pub fn parse(s: &str) -> Option<Self> { + use self::Builtin::*; + match s { + "Bool" => Some(Bool), + "Natural" => Some(Natural), + "Integer" => Some(Integer), + "Double" => Some(Double), + "Text" => Some(Text), + "List" => Some(List), + "Optional" => Some(Optional), + "Natural/build" => Some(NaturalBuild), + "Natural/fold" => Some(NaturalFold), + "Natural/isZero" => Some(NaturalIsZero), + "Natural/even" => Some(NaturalEven), + "Natural/odd" => Some(NaturalOdd), + "Natural/toInteger" => Some(NaturalToInteger), + "Natural/show" => Some(NaturalShow), + "Integer/toDouble" => Some(IntegerToDouble), + "Integer/show" => Some(IntegerShow), + "Double/show" => Some(DoubleShow), + "List/build" => Some(ListBuild), + "List/fold" => Some(ListFold), + "List/length" => Some(ListLength), + "List/head" => Some(ListHead), + "List/last" => Some(ListLast), + "List/indexed" => Some(ListIndexed), + "List/reverse" => Some(ListReverse), + "Optional/fold" => Some(OptionalFold), + "Optional/build" => Some(OptionalBuild), + "Text/show" => Some(TextShow), + _ => None, + } + } +} + +impl<'i> Display for V<'i> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let V(x, n) = *self; + f.write_str(x)?; + if n != 0 { + write!(f, "@{}", n)?; + } + Ok(()) + } +} + +pub fn pi<'i, S, A, Name, Et, Ev>( + var: Name, + ty: Et, + value: Ev, +) -> Expr<'i, S, A> +where + Name: Into<&'i str>, + Et: Into<Expr<'i, S, A>>, + Ev: Into<Expr<'i, S, A>>, +{ + Expr::Pi(var.into(), bx(ty.into()), bx(value.into())) +} + +pub fn app<'i, S, A, Ef, Ex>(f: Ef, x: Ex) -> Expr<'i, S, A> +where + Ef: Into<Expr<'i, S, A>>, + Ex: Into<Expr<'i, S, A>>, +{ + Expr::App(bx(f.into()), bx(x.into())) +} + +pub type Builder = String; +pub type Double = f64; +pub type Int = isize; +pub type Integer = isize; +pub type Natural = usize; + +/// A void type +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum X {} + +impl Display for X { + fn fmt(&self, _: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match *self {} + } +} + +pub fn bx<T>(x: T) -> Box<T> { + Box::new(x) +} + +fn add_ui(u: usize, i: isize) -> usize { + if i < 0 { + u.checked_sub(i.checked_neg().unwrap() as usize).unwrap() + } else { + u.checked_add(i as usize).unwrap() + } +} + +fn map_record_value<'a, I, K, V, U, F>(it: I, f: F) -> BTreeMap<K, U> +where + I: IntoIterator<Item = (&'a K, &'a V)>, + K: Eq + Ord + Copy + 'a, + V: 'a, + F: Fn(&V) -> U, +{ + it.into_iter().map(|(&k, v)| (k, f(v))).collect() +} + +fn map_op2<T, U, V, F, G>(f: F, g: G, a: T, b: T) -> V +where + F: FnOnce(U, U) -> V, + G: Fn(T) -> U, +{ + f(g(a), g(b)) +} + +/// `shift` is used by both normalization and type-checking to avoid variable +/// capture by shifting variable indices +/// +/// For example, suppose that you were to normalize the following expression: +/// +/// ```c +/// λ(a : Type) → λ(x : a) → (λ(y : a) → λ(x : a) → y) x +/// ``` +/// +/// If you were to substitute `y` with `x` without shifting any variable +/// indices, then you would get the following incorrect result: +/// +/// ```c +/// λ(a : Type) → λ(x : a) → λ(x : a) → x -- Incorrect normalized form +/// ``` +/// +/// In order to substitute `x` in place of `y` we need to `shift` `x` by `1` in +/// order to avoid being misinterpreted as the `x` bound by the innermost +/// lambda. If we perform that `shift` then we get the correct result: +/// +/// ```c +/// λ(a : Type) → λ(x : a) → λ(x : a) → x@1 +/// ``` +/// +/// As a more worked example, suppose that you were to normalize the following +/// expression: +/// +/// ```c +/// λ(a : Type) +/// → λ(f : a → a → a) +/// → λ(x : a) +/// → λ(x : a) +/// → (λ(x : a) → f x x@1) x@1 +/// ``` +/// +/// The correct normalized result would be: +/// +/// ```c +/// λ(a : Type) +/// → λ(f : a → a → a) +/// → λ(x : a) +/// → λ(x : a) +/// → f x@1 x +/// ``` +/// +/// The above example illustrates how we need to both increase and decrease +/// variable indices as part of substitution: +/// +/// * We need to increase the index of the outer `x\@1` to `x\@2` before we +/// substitute it into the body of the innermost lambda expression in order +/// to avoid variable capture. This substitution changes the body of the +/// lambda expression to `(f x\@2 x\@1)` +/// +/// * We then remove the innermost lambda and therefore decrease the indices of +/// both `x`s in `(f x\@2 x\@1)` to `(f x\@1 x)` in order to reflect that one +/// less `x` variable is now bound within that scope +/// +/// Formally, `(shift d (V x n) e)` modifies the expression `e` by adding `d` to +/// the indices of all variables named `x` whose indices are greater than +/// `(n + m)`, where `m` is the number of bound variables of the same name +/// within that scope +/// +/// In practice, `d` is always `1` or `-1` because we either: +/// +/// * increment variables by `1` to avoid variable capture during substitution +/// * decrement variables by `1` when deleting lambdas after substitution +/// +/// `n` starts off at `0` when substitution begins and increments every time we +/// descend into a lambda or let expression that binds a variable of the same +/// name in order to avoid shifting the bound variables by mistake. +/// +pub fn shift<'i, S, T, A: Clone>( + d: isize, + v: V, + e: &Expr<'i, S, A>, +) -> Expr<'i, T, A> { + use crate::Expr::*; + let V(x, n) = v; + match *e { + Const(a) => Const(a), + Var(V(x2, n2)) => { + let n3 = if x == x2 && n <= n2 { + add_ui(n2, d) + } else { + n2 + }; + Var(V(x2, n3)) + } + Lam(x2, ref tA, ref b) => { + let n2 = if x == x2 { n + 1 } else { n }; + let tA2 = shift(d, V(x, n), tA); + let b2 = shift(d, V(x, n2), b); + Lam(x2, bx(tA2), bx(b2)) + } + Pi(x2, ref tA, ref tB) => { + let n2 = if x == x2 { n + 1 } else { n }; + let tA2 = shift(d, V(x, n), tA); + let tB2 = shift(d, V(x, n2), tB); + pi(x2, tA2, tB2) + } + App(ref f, ref a) => app(shift(d, v, f), shift(d, v, a)), + Let(f, ref mt, ref r, ref e) => { + let n2 = if x == f { n + 1 } else { n }; + let e2 = shift(d, V(x, n2), e); + let mt2 = mt.as_ref().map(|t| bx(shift(d, V(x, n), t))); + let r2 = shift(d, V(x, n), r); + Let(f, mt2, bx(r2), bx(e2)) + } + Annot(ref a, ref b) => shift_op2(Annot, d, v, a, b), + Builtin(v) => Builtin(v), + BoolLit(a) => BoolLit(a), + BinOp(o, ref a, ref b) => shift_op2(|x, y| BinOp(o, x, y), d, v, a, b), + BoolIf(ref a, ref b, ref c) => { + BoolIf(bx(shift(d, v, a)), bx(shift(d, v, b)), bx(shift(d, v, c))) + } + NaturalLit(a) => NaturalLit(a), + IntegerLit(a) => IntegerLit(a), + DoubleLit(a) => DoubleLit(a), + TextLit(ref a) => TextLit(a.clone()), + ListLit(ref t, ref es) => ListLit( + t.as_ref().map(|t| bx(shift(d, v, t))), + es.iter().map(|e| shift(d, v, e)).collect(), + ), + OptionalLit(ref t, ref es) => OptionalLit( + t.as_ref().map(|t| bx(shift(d, v, t))), + es.iter().map(|e| shift(d, v, e)).collect(), + ), + Record(ref a) => Record(map_record_value(a, |val| shift(d, v, val))), + RecordLit(ref a) => { + RecordLit(map_record_value(a, |val| shift(d, v, val))) + } + Union(ref a) => Union(map_record_value(a, |val| shift(d, v, val))), + UnionLit(k, ref uv, ref a) => UnionLit( + k, + bx(shift(d, v, uv)), + map_record_value(a, |val| shift(d, v, val)), + ), + Merge(ref a, ref b, ref c) => Merge( + bx(shift(d, v, a)), + bx(shift(d, v, b)), + c.as_ref().map(|c| bx(shift(d, v, c))), + ), + Field(ref a, b) => Field(bx(shift(d, v, a)), b), + Note(_, ref b) => shift(d, v, b), + // The Dhall compiler enforces that all embedded values are closed expressions + // and `shift` does nothing to a closed expression + Embed(ref p) => Embed(p.clone()), + _ => panic!(), + } +} + +fn shift_op2<'i, S, T, A, F>( + f: F, + d: isize, + v: V, + a: &Expr<'i, S, A>, + b: &Expr<'i, S, A>, +) -> Expr<'i, T, A> +where + F: FnOnce(Box<Expr<'i, T, A>>, Box<Expr<'i, T, A>>) -> Expr<'i, T, A>, + A: Clone, +{ + map_op2(f, |x| bx(shift(d, v, x)), a, b) +} + +/// Substitute all occurrences of a variable with an expression +/// +/// ```c +/// subst x C B ~ B[x := C] +/// ``` +/// +pub fn subst<'i, S, T, A>( + v: V<'i>, + e: &Expr<'i, S, A>, + b: &Expr<'i, T, A>, +) -> Expr<'i, S, A> +where + S: Clone, + A: Clone, +{ + use crate::Expr::*; + let V(x, n) = v; + match *b { + Const(a) => Const(a), + Lam(y, ref tA, ref b) => { + let n2 = if x == y { n + 1 } else { n }; + let b2 = subst(V(x, n2), &shift(1, V(y, 0), e), b); + let tA2 = subst(V(x, n), e, tA); + Lam(y, bx(tA2), bx(b2)) + } + Pi(y, ref tA, ref tB) => { + let n2 = if x == y { n + 1 } else { n }; + let tB2 = subst(V(x, n2), &shift(1, V(y, 0), e), tB); + let tA2 = subst(V(x, n), e, tA); + pi(y, tA2, tB2) + } + App(ref f, ref a) => { + let f2 = subst(v, e, f); + let a2 = subst(v, e, a); + app(f2, a2) + } + Var(v2) => { + if v == v2 { + e.clone() + } else { + Var(v2) + } + } + Let(f, ref mt, ref r, ref b) => { + let n2 = if x == f { n + 1 } else { n }; + let b2 = subst(V(x, n2), &shift(1, V(f, 0), e), b); + let mt2 = mt.as_ref().map(|t| bx(subst(V(x, n), e, t))); + let r2 = subst(V(x, n), e, r); + Let(f, mt2, bx(r2), bx(b2)) + } + Annot(ref a, ref b) => subst_op2(Annot, v, e, a, b), + Builtin(v) => Builtin(v), + BoolLit(a) => BoolLit(a), + BinOp(o, ref a, ref b) => subst_op2(|x, y| BinOp(o, x, y), v, e, a, b), + BoolIf(ref a, ref b, ref c) => { + BoolIf(bx(subst(v, e, a)), bx(subst(v, e, b)), bx(subst(v, e, c))) + } + NaturalLit(a) => NaturalLit(a), + IntegerLit(a) => IntegerLit(a), + DoubleLit(a) => DoubleLit(a), + TextLit(ref a) => TextLit(a.clone()), + ListLit(ref a, ref b) => { + let a2 = a.as_ref().map(|a| bx(subst(v, e, a))); + let b2 = b.iter().map(|be| subst(v, e, be)).collect(); + ListLit(a2, b2) + } + OptionalLit(ref a, ref b) => { + let a2 = a.as_ref().map(|a| bx(subst(v, e, a))); + let b2 = b.iter().map(|be| subst(v, e, be)).collect(); + OptionalLit(a2, b2) + } + Record(ref kts) => Record(map_record_value(kts, |t| subst(v, e, t))), + RecordLit(ref kvs) => { + RecordLit(map_record_value(kvs, |val| subst(v, e, val))) + } + Union(ref kts) => Union(map_record_value(kts, |t| subst(v, e, t))), + UnionLit(k, ref uv, ref kvs) => UnionLit( + k, + bx(subst(v, e, uv)), + map_record_value(kvs, |val| subst(v, e, val)), + ), + Merge(ref a, ref b, ref c) => Merge( + bx(subst(v, e, a)), + bx(subst(v, e, b)), + c.as_ref().map(|c| bx(subst(v, e, c))), + ), + Field(ref a, b) => Field(bx(subst(v, e, a)), b), + Note(_, ref b) => subst(v, e, b), + Embed(ref p) => Embed(p.clone()), + _ => panic!(), + } +} + +fn subst_op2<'i, S, T, A, F>( + f: F, + v: V<'i>, + e: &Expr<'i, S, A>, + a: &Expr<'i, T, A>, + b: &Expr<'i, T, A>, +) -> Expr<'i, S, A> +where + F: FnOnce(Box<Expr<'i, S, A>>, Box<Expr<'i, S, A>>) -> Expr<'i, S, A>, + S: Clone, + A: Clone, +{ + map_op2(f, |x| bx(subst(v, e, x)), a, b) +} + +/// Reduce an expression to its normal form, performing beta reduction +/// +/// `normalize` does not type-check the expression. You may want to type-check +/// expressions before normalizing them since normalization can convert an +/// ill-typed expression into a well-typed expression. +/// +/// However, `normalize` will not fail if the expression is ill-typed and will +/// leave ill-typed sub-expressions unevaluated. +/// +pub fn normalize<'i, S, T, A>(e: &Expr<'i, S, A>) -> Expr<'i, T, A> +where + S: Clone + fmt::Debug, + T: Clone + fmt::Debug, + A: Clone + fmt::Debug, +{ + use crate::BinOp::*; + use crate::Builtin::*; + use crate::Expr::*; + match *e { + Const(k) => Const(k), + Var(v) => Var(v), + Lam(x, ref tA, ref b) => { + let tA2 = normalize(tA); + let b2 = normalize(b); + Lam(x, bx(tA2), bx(b2)) + } + Pi(x, ref tA, ref tB) => { + let tA2 = normalize(tA); + let tB2 = normalize(tB); + pi(x, tA2, tB2) + } + App(ref f, ref a) => match normalize::<S, T, A>(f) { + Lam(x, _A, b) => { + // Beta reduce + let vx0 = V(x, 0); + let a2 = shift::<S, S, A>(1, vx0, a); + let b2 = subst::<S, T, A>(vx0, &a2, &b); + let b3 = shift::<S, T, A>(-1, vx0, &b2); + normalize(&b3) + } + f2 => match (f2, normalize::<S, T, A>(a)) { + // fold/build fusion for `List` + (App(box Builtin(ListBuild), _), App(box App(box Builtin(ListFold), _), box e2)) | + (App(box Builtin(ListFold), _), App(box App(box Builtin(ListBuild), _), box e2)) | + + // fold/build fusion for `Natural` + (Builtin(NaturalBuild), App(box Builtin(NaturalFold), box e2)) | + (Builtin(NaturalFold), App(box Builtin(NaturalBuild), box e2)) => normalize(&e2), + + /* + App (App (App (App NaturalFold (NaturalLit n0)) _) succ') zero -> + normalize (go n0) + where + go !0 = zero + go !n = App succ' (go (n - 1)) + App NaturalBuild k + | check -> NaturalLit n + | otherwise -> App f' a' + where + labeled = + normalize (App (App (App k Natural) "Succ") "Zero") + + n = go 0 labeled + where + go !m (App (Var "Succ") e') = go (m + 1) e' + go !m (Var "Zero") = m + go !_ _ = internalError text + check = go labeled + where + go (App (Var "Succ") e') = go e' + go (Var "Zero") = True + go _ = False + */ + (Builtin(NaturalIsZero), NaturalLit(n)) => BoolLit(n == 0), + (Builtin(NaturalEven), NaturalLit(n)) => BoolLit(n % 2 == 0), + (Builtin(NaturalOdd), NaturalLit(n)) => BoolLit(n % 2 != 0), + (Builtin(NaturalToInteger), NaturalLit(n)) => IntegerLit(n as isize), + (Builtin(NaturalShow), NaturalLit(n)) => TextLit(n.to_string()), + (App(f@box Builtin(ListBuild), box t), k) => { + let labeled = + normalize::<_, T, _>(&app(app(app(k.clone(), app( + Builtin(self::Builtin::List), t.clone())), "Cons"), "Nil")); + + fn list_to_vector<'i, S, A>(v: &mut Vec<Expr<'i, S, A>>, e: Expr<'i, S, A>) + where S: Clone, A: Clone + { + match e { + App(box App(box Var(V("Cons", _)), box x), box e2) => { + v.push(x); + list_to_vector(v, e2) + } + Var(V("Nil", _)) => {} + _ => panic!("internalError list_to_vector"), + } + } + fn check<S, A>(e: &Expr<S, A>) -> bool { + match *e { + App(box App(box Var(V("Cons", _)), _), ref e2) => check(e2), + Var(V("Nil", _)) => true, + _ => false, + } + } + + if check(&labeled) { + let mut v = vec![]; + list_to_vector(&mut v, labeled); + ListLit(Some(bx(t)), v) + } else { + app(App(f, bx(t)), k) + } + } + (App(box App(box App(box App(box Builtin(ListFold), _), box ListLit(_, xs)), _), cons), nil) => { + let e2: Expr<_, _> = xs.into_iter().rev().fold(nil, |y, ys| // foldr + App(bx(App(cons.clone(), bx(y))), bx(ys)) + ); + normalize(&e2) + } + (App(f, x_), ListLit(t, ys)) => match *f { + Builtin(ListLength) => + NaturalLit(ys.len()), + Builtin(ListHead) => + normalize(&OptionalLit(t, ys.into_iter().take(1).collect())), + Builtin(ListLast) => + normalize(&OptionalLit(t, ys.into_iter().last().into_iter().collect())), + Builtin(ListReverse) => { + let mut xs = ys; + xs.reverse(); + normalize(&ListLit(t, xs)) + } + _ => app(App(f, x_), ListLit(t, ys)), + }, + /* + App (App ListIndexed _) (ListLit t xs) -> + normalize (ListLit t' (fmap adapt (Data.Vector.indexed xs))) + where + t' = Record (Data.Map.fromList kts) + where + kts = [ ("index", Natural) + , ("value", t) + ] + adapt (n, x) = RecordLit (Data.Map.fromList kvs) + where + kvs = [ ("index", NaturalLit (fromIntegral n)) + , ("value", x) + ] + */ + (App(box App(box App(box App(box Builtin(OptionalFold), _), box OptionalLit(_, xs)), _), just), nothing) => { + let e2: Expr<_, _> = xs.into_iter().fold(nothing, |y, _| + App(just.clone(), bx(y)) + ); + normalize(&e2) + } + (App(box Builtin(OptionalBuild), _), App(box App(box Builtin(OptionalFold), _), b)) => { + normalize(&b) + } + (App(box Builtin(OptionalBuild), a0), g) => { + let e2: Expr<_, _> = app(app(app(g, + App(bx(Builtin(Optional)), a0.clone())), + Lam("x", a0.clone(), + bx(OptionalLit(Some(a0.clone()), vec![Var(V("x", 0))])))), + OptionalLit(Some(a0), vec![])); + normalize(&e2) + } + (f2, a2) => app(f2, a2), + }, + }, + Let(f, _, ref r, ref b) => { + let r2 = shift::<_, S, _>(1, V(f, 0), r); + let b2 = subst(V(f, 0), &r2, b); + let b3 = shift::<_, T, _>(-1, V(f, 0), &b2); + normalize(&b3) + } + Annot(ref x, _) => normalize(x), + Builtin(v) => Builtin(v), + BoolLit(b) => BoolLit(b), + BinOp(BoolAnd, ref x, ref y) => with_binop( + BoolAnd, + Expr::bool_lit, + |xn, yn| BoolLit(xn && yn), + normalize(x), + normalize(y), + ), + BinOp(BoolOr, ref x, ref y) => with_binop( + BoolOr, + Expr::bool_lit, + |xn, yn| BoolLit(xn || yn), + normalize(x), + normalize(y), + ), + BinOp(BoolEQ, ref x, ref y) => with_binop( + BoolEQ, + Expr::bool_lit, + |xn, yn| BoolLit(xn == yn), + normalize(x), + normalize(y), + ), + BinOp(BoolNE, ref x, ref y) => with_binop( + BoolNE, + Expr::bool_lit, + |xn, yn| BoolLit(xn != yn), + normalize(x), + normalize(y), + ), + BoolIf(ref b, ref t, ref f) => match normalize(b) { + BoolLit(true) => normalize(t), + BoolLit(false) => normalize(f), + b2 => BoolIf(bx(b2), bx(normalize(t)), bx(normalize(f))), + }, + NaturalLit(n) => NaturalLit(n), + BinOp(NaturalPlus, ref x, ref y) => with_binop( + NaturalPlus, + Expr::natural_lit, + |xn, yn| NaturalLit(xn + yn), + normalize(x), + normalize(y), + ), + BinOp(NaturalTimes, ref x, ref y) => with_binop( + NaturalTimes, + Expr::natural_lit, + |xn, yn| NaturalLit(xn * yn), + normalize(x), + normalize(y), + ), + IntegerLit(n) => IntegerLit(n), + DoubleLit(n) => DoubleLit(n), + TextLit(ref t) => TextLit(t.clone()), + BinOp(TextAppend, ref x, ref y) => with_binop( + TextAppend, + Expr::text_lit, + |xt, yt| TextLit(xt + &yt), + normalize(x), + normalize(y), + ), + ListLit(ref t, ref es) => { + let t2 = t.as_ref().map(|x| x.as_ref()).map(normalize).map(bx); + let es2 = es.iter().map(normalize).collect(); + ListLit(t2, es2) + } + OptionalLit(ref t, ref es) => { + let t2 = t.as_ref().map(|x| x.as_ref()).map(normalize).map(bx); + let es2 = es.iter().map(normalize).collect(); + OptionalLit(t2, es2) + } + Record(ref kts) => Record(map_record_value(kts, normalize)), + RecordLit(ref kvs) => RecordLit(map_record_value(kvs, normalize)), + Union(ref kts) => Union(map_record_value(kts, normalize)), + UnionLit(k, ref v, ref kvs) => { + UnionLit(k, bx(normalize(v)), map_record_value(kvs, normalize)) + } + Merge(ref _x, ref _y, ref _t) => unimplemented!(), + Field(ref r, x) => match normalize(r) { + RecordLit(kvs) => match kvs.get(x) { + Some(r2) => normalize(r2), + None => { + Field(bx(RecordLit(map_record_value(&kvs, normalize))), x) + } + }, + r2 => Field(bx(r2), x), + }, + Note(_, ref e) => normalize(e), + Embed(ref a) => Embed(a.clone()), + _ => unimplemented!(), + } +} + +fn with_binop<'a, S, A, U, Get, Set>( + op: BinOp, + get: Get, + set: Set, + x: Expr<'a, S, A>, + y: Expr<'a, S, A>, +) -> Expr<'a, S, A> +where + Get: Fn(&Expr<'a, S, A>) -> Option<U>, + Set: FnOnce(U, U) -> Expr<'a, S, A>, +{ + if let (Some(xv), Some(yv)) = (get(&x), get(&y)) { + set(xv, yv) + } else { + Expr::BinOp(op, bx(x), bx(y)) + } +} diff --git a/dhall_core/src/grammar.lalrpop b/dhall_core/src/grammar.lalrpop new file mode 100644 index 0000000..1ffe2ff --- /dev/null +++ b/dhall_core/src/grammar.lalrpop @@ -0,0 +1,164 @@ +use std::collections::BTreeMap; +use std::iter; +use std::iter::FromIterator; + +use crate::core; +use crate::core::bx; +use crate::core::Expr::*; +use crate::core::Builtin; +use crate::core::Builtin::*; +use crate::core::BinOp::*; +use crate::grammar_util::*; +use crate::lexer::*; + +grammar<'input>; + +extern { + type Location = usize; + type Error = LexicalError; + + enum Tok<'input> { + Pi => Tok::Pi, + Lambda => Tok::Lambda, + Combine => Tok::Combine, + "->" => Tok::Arrow, + + Int => Tok::Integer(<isize>), + Nat => Tok::Natural(<usize>), + Text => Tok::Text(<String>), + Bool => Tok::Bool(<bool>), + Label => Tok::Identifier(<&'input str>), + Const => Tok::Const(<core::Const>), + Let => Tok::Keyword(Keyword::Let), + In => Tok::Keyword(Keyword::In), + If => Tok::Keyword(Keyword::If), + Then => Tok::Keyword(Keyword::Then), + Else => Tok::Keyword(Keyword::Else), + List => Tok::ListLike(ListLike::List), + Optional => Tok::ListLike(ListLike::Optional), + Builtin => Tok::Builtin(<Builtin>), + + "{" => Tok::BraceL, + "}" => Tok::BraceR, + "[" => Tok::BracketL, + "]" => Tok::BracketR, + "(" => Tok::ParenL, + ")" => Tok::ParenR, + "&&" => Tok::BoolAnd, + "||" => Tok::BoolOr, + "==" => Tok::CompareEQ, + "!=" => Tok::CompareNE, + "++" => Tok::Append, + "*" => Tok::Times, + "+" => Tok::Plus, + "," => Tok::Comma, + "." => Tok::Dot, + ":" => Tok::Ascription, + "=" => Tok::Equals, + } +} + +pub Expr: BoxExpr<'input> = { // exprA + ExprB, +}; + +ExprB: BoxExpr<'input> = { + Lambda "(" <Label> ":" <Expr> ")" "->" <ExprB> => bx(Lam(<>)), + Pi "(" <Label> ":" <Expr> ")" "->" <ExprB> => bx(Pi(<>)), + If <Expr> Then <ExprB> Else <ExprC> => bx(BoolIf(<>)), + <ExprC> "->" <ExprB> => bx(Pi("_", <>)), + Let <Label> <(":" <Expr>)?> "=" <Expr> In <ExprB> => bx(Let(<>)), + "[" <a:Elems> "]" ":" <b:ListLike> <c:ExprE> => bx(b(Some(c), a)), + <ExprC> ":" <Expr> => bx(Annot(<>)), + ExprC, +}; + +ListLike: ExprListFn<'input> = { + List => ListLit, + Optional => OptionalLit, +}; + +BoolOr: ExprOpFn<'input> = { "||" => (|x,y| BinOp(BoolOr, x, y)) }; +NaturalPlus: ExprOpFn<'input> = { "+" => (|x,y| BinOp(NaturalPlus, x, y)) }; +TextAppend: ExprOpFn<'input> = { "++" => (|x,y| BinOp(TextAppend, x, y)) }; +BoolAnd: ExprOpFn<'input> = { "&&" => (|x,y| BinOp(BoolAnd, x, y)) }; +CombineOp: ExprOpFn<'input> = { Combine => (|x,y| BinOp(Combine, x, y)) }; +NaturalTimes: ExprOpFn<'input> = { "*" => (|x,y| BinOp(NaturalTimes, x, y)) }; +BoolEQ: ExprOpFn<'input> = { "==" => (|x,y| BinOp(BoolEQ, x, y)) }; +BoolNE: ExprOpFn<'input> = { "!=" => (|x,y| BinOp(BoolNE, x, y)) }; + +Tier<NextTier, Op>: BoxExpr<'input> = { + <a:NextTier> <f:Op> <b:Tier<NextTier, Op>> => bx(f(a, b)), + // <b:Tier<NextTier, Op>> <f:Op> <a:NextTier> => bx(f(a, b)), + NextTier, +}; + +ExprC = Tier<ExprC1, BoolOr>; +ExprC1 = Tier<ExprC2, NaturalPlus>; +ExprC2 = Tier<ExprC3, TextAppend>; +ExprC3 = Tier<ExprC4, BoolAnd>; +ExprC4 = Tier<ExprC5, CombineOp>; +ExprC5 = Tier<ExprC6, NaturalTimes>; +ExprC6 = Tier<ExprC7, BoolEQ>; +ExprC7 = Tier<ExprD, BoolNE>; + +ExprD: BoxExpr<'input> = { + <v:(ExprE)+> => { + let mut it = v.into_iter(); + let f = it.next().unwrap(); + it.fold(f, |f, x| bx(App(f, x))) + } +}; + +ExprE: BoxExpr<'input> = { + <a:ExprF> <fields:("." <Label>)*> => { + fields.into_iter().fold(a, |x, f| bx(Field(x, f))) + }, +}; + +ExprF: BoxExpr<'input> = { + Nat => bx(NaturalLit(<>)), + Int => bx(IntegerLit(<>)), + Text => bx(TextLit(<>)), + Label => bx(Var(core::V(<>, 0))), // FIXME support var@n syntax + Const => bx(Const(<>)), + List => bx(Builtin(List)), + Optional => bx(Builtin(Optional)), + Builtin => bx(Builtin(<>)), + Bool => bx(BoolLit(<>)), + Record, + RecordLit, + "(" <Expr> ")", +}; + +SepBy<S, T>: iter::Chain<::std::vec::IntoIter<T>, ::std::option::IntoIter<T>> = { + <v:(<T> S)*> <last:T?> => v.into_iter().chain(last.into_iter()), +}; + +SepBy1<S, T>: iter::Chain<::std::vec::IntoIter<T>, iter::Once<T>> = { + <v:(<T> S)*> <last:T> => v.into_iter().chain(iter::once(last)), +}; + +Elems: Vec<ParsedExpr<'input>> = { + <v:SepBy<",", Expr>> => { + v.into_iter() + .map(|b| *b) + .collect::<Vec<_>>() + } +}; + +RecordLit: BoxExpr<'input> = { + "{" "=" "}" => bx(RecordLit(BTreeMap::new())), + "{" <FieldValues> "}" => bx(RecordLit(BTreeMap::from_iter(<>))), +}; + +Record: BoxExpr<'input> = { + "{" <FieldTypes> "}" => bx(Record(BTreeMap::from_iter(<>))), +}; + +FieldValues = SepBy1<",", Field<"=">>; +FieldTypes = SepBy<",", Field<":">>; + +Field<Sep>: (&'input str, ParsedExpr<'input>) = { + <a:Label> Sep <b:Expr> => (a, *b), +}; diff --git a/dhall_core/src/grammar_util.rs b/dhall_core/src/grammar_util.rs new file mode 100644 index 0000000..ce73444 --- /dev/null +++ b/dhall_core/src/grammar_util.rs @@ -0,0 +1,7 @@ +use crate::core::{Expr, X}; + +pub type ParsedExpr<'i> = Expr<'i, X, X>; // FIXME Parse paths and replace the second X with Path +pub type BoxExpr<'i> = Box<ParsedExpr<'i>>; +pub type ExprOpFn<'i> = fn(BoxExpr<'i>, BoxExpr<'i>) -> ParsedExpr<'i>; +pub type ExprListFn<'i> = + fn(Option<BoxExpr<'i>>, Vec<ParsedExpr<'i>>) -> ParsedExpr<'i>; diff --git a/dhall_core/src/lexer.rs b/dhall_core/src/lexer.rs new file mode 100644 index 0000000..5fc0f05 --- /dev/null +++ b/dhall_core/src/lexer.rs @@ -0,0 +1,394 @@ +use nom::*; + +use crate::core::Builtin; +use crate::core::Builtin::*; +use crate::core::Const; + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Keyword { + Let, + In, + If, + Then, + Else, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum ListLike { + List, + Optional, +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum Tok<'i> { + Identifier(&'i str), + Keyword(Keyword), + Builtin(Builtin), + ListLike(ListLike), + Const(Const), + Bool(bool), + Integer(isize), + Natural(usize), + Text(String), + + // Symbols + BraceL, + BraceR, + BracketL, + BracketR, + ParenL, + ParenR, + Arrow, + Lambda, + Pi, + Combine, + BoolAnd, + BoolOr, + CompareEQ, + CompareNE, + Append, + Times, + Plus, + Comma, + Dot, + Ascription, + Equals, +} + +#[derive(Debug)] +pub enum LexicalError { + Error(usize, nom::simple_errors::Err<u32>), + Incomplete(nom::Needed), +} + +pub type Spanned<Tok, Loc, Error> = Result<(Loc, Tok, Loc), Error>; + +#[allow(dead_code)] +fn is_identifier_first_char(c: char) -> bool { + c.is_alphabetic() || c == '_' +} + +fn is_identifier_rest_char(c: char) -> bool { + is_identifier_first_char(c) || c.is_digit(10) || c == '/' +} + +macro_rules! digits { + ($i:expr, $t:tt, $radix:expr) => {{ + let r: nom::IResult<&str, $t> = map_res!( + $i, + take_while1_s!(call!(|c: char| c.is_digit($radix))), + |s| $t::from_str_radix(s, $radix) + ); + r + }}; +} + +named!(natural<&str, usize>, digits!(usize, 10)); +named!(integral<&str, isize>, digits!(isize, 10)); +named!(integer<&str, isize>, alt!( + preceded!(tag!("-"), map!(integral, |i: isize| -i)) | + preceded!(tag!("+"), integral) +)); +named!(boolean<&str, bool>, alt!( + value!(true, tag!("True")) | + value!(false, tag!("False")) +)); + +named!(identifier<&str, &str>, recognize!(preceded!( + take_while1_s!(is_identifier_first_char), + take_while_s!(is_identifier_rest_char)) +)); + +/// Parse an identifier, ensuring a whole identifier is parsed and not just a prefix. +macro_rules! ident_tag { + ($i:expr, $tag:expr) => { + match identifier($i) { + nom::IResult::Done(i, s) => { + if s == $tag { + nom::IResult::Done(i, s) + } else { + nom::IResult::Error(error_position!( + nom::ErrorKind::Tag, + $i + )) + } + } + r => r, + } + }; +} + +fn string_escape_single(c: char) -> Option<&'static str> { + match c { + 'n' => Some("\n"), + 'r' => Some("\r"), + 't' => Some("\t"), + '"' => Some("\""), + '\'' => Some("'"), + '\\' => Some("\\"), + '0' => Some("\0"), + 'a' => Some("\x07"), + 'b' => Some("\x08"), + 'f' => Some("\x0c"), + 'v' => Some("\x0b"), + '&' => Some(""), + _ => None, + } +} + +named!(string_escape_numeric<&str, char>, map_opt!(alt!( + preceded!(tag!("x"), digits!(u32, 16)) | + preceded!(tag!("o"), digits!(u32, 8)) | + digits!(u32, 10) +), ::std::char::from_u32)); + +fn string_lit_inner(input: &str) -> nom::IResult<&str, String> { + use nom::ErrorKind; + use nom::IResult::*; ; + let mut s = String::new(); + let mut cs = input.char_indices().peekable(); + while let Some((i, c)) = cs.next() { + match c { + '"' => return nom::IResult::Done(&input[i..], s), + '\\' => match cs.next() { + Some((_, s)) if s.is_whitespace() => { + while cs.peek().map(|&(_, s)| s.is_whitespace()) + == Some(true) + { + let _ = cs.next(); + } + if cs.next().map(|p| p.1) != Some('\\') { + return Error(error_position!( + ErrorKind::Custom(4 /* FIXME */), + input + )); + } + } + Some((j, ec)) => { + if let Some(esc) = string_escape_single(ec) { + s.push_str(esc); + // FIXME Named ASCII escapes and control character escapes + } else { + match string_escape_numeric(&input[j..]) { + Done(rest, esc) => { + let &(k, _) = cs.peek().unwrap(); + // digits are always single byte ASCII characters + let consumed = input[k..].len() - rest.len(); + for _ in 0..consumed { + let _ = cs.next(); + } + s.push(esc); + } + Incomplete(s) => return Incomplete(s), + Error(e) => return Error(e), + } + } + } + _ => { + return Error(error_position!( + ErrorKind::Custom(5 /* FIXME */), + input + )); + } + }, + _ => s.push(c), + } + } + Error(error_position!(ErrorKind::Custom(3 /* FIXME */), input)) +} + +named!(string_lit<&str, String>, delimited!(tag!("\""), string_lit_inner, tag!("\""))); + +named!(keyword<&str, Keyword>, alt!( + value!(Keyword::Let, ident_tag!("let")) | + value!(Keyword::In, ident_tag!("in")) | + value!(Keyword::If, ident_tag!("if")) | + value!(Keyword::Then, ident_tag!("then")) | + value!(Keyword::Else, ident_tag!("else")) +)); + +named!(type_const<&str, Const>, alt!( + value!(Const::Type, ident_tag!("Type")) | + value!(Const::Kind, ident_tag!("Kind")) +)); + +named!(list_like<&str, ListLike>, alt!( + value!(ListLike::List, ident_tag!("List")) | + value!(ListLike::Optional, ident_tag!("Optional")) +)); + +named!(builtin<&str, Builtin>, alt!( + value!(NaturalFold, ident_tag!("Natural/fold")) | + value!(NaturalBuild, ident_tag!("Natural/build")) | + value!(NaturalIsZero, ident_tag!("Natural/isZero")) | + value!(NaturalEven, ident_tag!("Natural/even")) | + value!(NaturalOdd, ident_tag!("Natural/odd")) | + value!(NaturalShow, ident_tag!("Natural/show")) | + value!(Natural, ident_tag!("Natural")) | + value!(Integer, ident_tag!("Integer")) | + value!(Double, ident_tag!("Double")) | + value!(Text, ident_tag!("Text")) | + value!(ListBuild, ident_tag!("List/build")) | + value!(ListFold, ident_tag!("List/fold")) | + value!(ListLength, ident_tag!("List/length")) | + value!(ListHead, ident_tag!("List/head")) | + value!(ListLast, ident_tag!("List/last")) | + value!(ListIndexed, ident_tag!("List/indexed")) | + value!(ListReverse, ident_tag!("List/reverse")) | + value!(OptionalFold, ident_tag!("Optional/fold")) | + value!(Bool, ident_tag!("Bool")) +)); + +named!(token<&str, Tok>, alt!( + value!(Tok::Pi, ident_tag!("forall")) | + value!(Tok::Pi, tag!("∀")) | + value!(Tok::Lambda, tag!("\\")) | + value!(Tok::Lambda, tag!("λ")) | + value!(Tok::Combine, tag!("/\\")) | + value!(Tok::Combine, tag!("∧")) | + value!(Tok::Arrow, tag!("->")) | + value!(Tok::Arrow, tag!("→")) | + + map!(type_const, Tok::Const) | + map!(boolean, Tok::Bool) | + map!(keyword, Tok::Keyword) | + map!(builtin, Tok::Builtin) | + map!(list_like, Tok::ListLike) | + map!(natural, Tok::Natural) | + map!(integer, Tok::Integer) | + map!(identifier, Tok::Identifier) | + map!(string_lit, Tok::Text) | + + value!(Tok::BraceL, tag!("{")) | + value!(Tok::BraceR, tag!("}")) | + value!(Tok::BracketL, tag!("[")) | + value!(Tok::BracketR, tag!("]")) | + value!(Tok::ParenL, tag!("(")) | + value!(Tok::ParenR, tag!(")")) | + value!(Tok::BoolAnd, tag!("&&")) | + value!(Tok::BoolOr, tag!("||")) | + value!(Tok::CompareEQ, tag!("==")) | + value!(Tok::CompareNE, tag!("!=")) | + value!(Tok::Append, tag!("++")) | + value!(Tok::Times, tag!("*")) | + value!(Tok::Plus, tag!("+")) | + value!(Tok::Comma, tag!(",")) | + value!(Tok::Dot, tag!(".")) | + value!(Tok::Ascription, tag!(":")) | + value!(Tok::Equals, tag!("=")) +)); + +fn find_end(input: &str, ending: &str) -> Option<usize> { + input.find(ending).map(|i| i + ending.len()) +} + +pub struct Lexer<'input> { + input: &'input str, + offset: usize, +} + +impl<'input> Lexer<'input> { + pub fn new(input: &'input str) -> Self { + Lexer { + input: input, + offset: 0, + } + } + + fn current_input(&mut self) -> &'input str { + &self.input[self.offset..] + } + + fn skip_whitespace(&mut self) -> bool { + let input = self.current_input(); + let trimmed = input.trim_start(); + let whitespace_len = input.len() - trimmed.len(); + let skipped = whitespace_len > 0; + if skipped { + // println!("skipped {} whitespace bytes in {}..{}", whitespace_len, self.offset, self.offset + whitespace_len); + self.offset += whitespace_len; + } + skipped + } + + fn skip_comments(&mut self) -> bool { + let input = self.current_input(); + if !input.is_char_boundary(0) || !input.is_char_boundary(2) { + return false; + } + let skip = match &input[0..2] { + "{-" => find_end(input, "-}"), + "--" => find_end(input, "\n"), // Also skips past \r\n (CRLF) + _ => None, + } + .unwrap_or(0); + // println!("skipped {} bytes of comment", skip); + self.offset += skip; + skip != 0 + } + + fn skip_comments_and_whitespace(&mut self) { + while self.skip_whitespace() || self.skip_comments() {} + } +} + +impl<'input> Iterator for Lexer<'input> { + type Item = Spanned<Tok<'input>, usize, LexicalError>; + + fn next(&mut self) -> Option<Self::Item> { + use nom::IResult::*; + self.skip_comments_and_whitespace(); + let input = self.current_input(); + if input.is_empty() { + return None; + } + match token(input) { + Done(rest, t) => { + let parsed_len = input.len() - rest.len(); + //println!("parsed {} bytes => {:?}", parsed_len, t); + let start = self.offset; + self.offset += parsed_len; + Some(Ok((start, t, self.offset))) + } + Error(e) => { + let offset = self.offset; + self.offset = self.input.len(); + Some(Err(LexicalError::Error(offset, e))) + } + Incomplete(needed) => Some(Err(LexicalError::Incomplete(needed))), + } + } +} + +#[test] +fn test_lex() { + use self::Tok::*; + let s = "λ(b : Bool) → b == False"; + let expected = [ + Lambda, + ParenL, + Identifier("b"), + Ascription, + Builtin(crate::core::Builtin::Bool), + ParenR, + Arrow, + Identifier("b"), + CompareEQ, + Bool(false), + ]; + let lexer = Lexer::new(s); + let tokens = lexer.map(|r| r.unwrap().1).collect::<Vec<_>>(); + assert_eq!(&tokens, &expected); + + assert_eq!(string_lit(r#""a\&b""#).to_result(), Ok("ab".to_owned())); + assert_eq!( + string_lit(r#""a\ \b""#).to_result(), + Ok("ab".to_owned()) + ); + assert!(string_lit(r#""a\ b""#).is_err()); + assert_eq!(string_lit(r#""a\nb""#).to_result(), Ok("a\nb".to_owned())); + assert_eq!( + string_lit(r#""\o141\x62\99""#).to_result(), + Ok("abc".to_owned()) + ); +} diff --git a/dhall_core/src/lib.rs b/dhall_core/src/lib.rs new file mode 100644 index 0000000..e1d0f7d --- /dev/null +++ b/dhall_core/src/lib.rs @@ -0,0 +1,10 @@ +#![feature(box_patterns)] +#![feature(trace_macros)] + +pub mod core; +pub use crate::core::*; +use lalrpop_util::lalrpop_mod; +lalrpop_mod!(pub grammar); +mod grammar_util; +pub mod lexer; +pub mod parser; diff --git a/dhall_core/src/parser.rs b/dhall_core/src/parser.rs new file mode 100644 index 0000000..6b20c01 --- /dev/null +++ b/dhall_core/src/parser.rs @@ -0,0 +1,827 @@ +use std::collections::BTreeMap; +// use itertools::*; +use lalrpop_util; +use pest::iterators::Pair; +use pest::Parser; + +use dhall_parser::{DhallParser, Rule}; + +use crate::core; +use crate::core::{bx, BinOp, Builtin, Const, Expr, V}; +use crate::grammar; +use crate::grammar_util::{BoxExpr, ParsedExpr}; +use crate::lexer::{Lexer, LexicalError, Tok}; + +pub fn parse_expr_lalrpop( + s: &str, +) -> Result<BoxExpr, lalrpop_util::ParseError<usize, Tok, LexicalError>> { + grammar::ExprParser::new().parse(Lexer::new(s)) + // Ok(bx(Expr::BoolLit(false))) +} + +pub type ParseError = pest::error::Error<Rule>; + +pub type ParseResult<T> = Result<T, ParseError>; + +pub fn custom_parse_error(pair: &Pair<Rule>, msg: String) -> ParseError { + let msg = + format!("{} while matching on:\n{}", msg, debug_pair(pair.clone())); + let e = pest::error::ErrorVariant::CustomError { message: msg }; + pest::error::Error::new_from_span(e, pair.as_span()) +} + +fn debug_pair(pair: Pair<Rule>) -> String { + use std::fmt::Write; + let mut s = String::new(); + fn aux(s: &mut String, indent: usize, prefix: String, pair: Pair<Rule>) { + let indent_str = "| ".repeat(indent); + let rule = pair.as_rule(); + let contents = pair.as_str().clone(); + let mut inner = pair.into_inner(); + let mut first = true; + while let Some(p) = inner.next() { + if first { + first = false; + let last = inner.peek().is_none(); + if last && p.as_str() == contents { + let prefix = format!("{}{:?} > ", prefix, rule); + aux(s, indent, prefix, p); + continue; + } else { + writeln!( + s, + r#"{}{}{:?}: "{}""#, + indent_str, prefix, rule, contents + ) + .unwrap(); + } + } + aux(s, indent + 1, "".into(), p); + } + if first { + writeln!( + s, + r#"{}{}{:?}: "{}""#, + indent_str, prefix, rule, contents + ) + .unwrap(); + } + } + aux(&mut s, 0, "".into(), pair); + s +} + +/* Macro to pattern-match iterators. + * Returns `Result<_, IterMatchError<_>>`. + * + * Example: + * ``` + * let vec = vec![1, 2, 3]; + * + * match_iter!(vec.into_iter(); (x, y?, z) => { + * // x: T + * // y: Option<T> + * // z: T + * }) + * + * // or + * match_iter!(vec.into_iter(); (x, y, z*) => { + * // x, y: T + * // z: impl Iterator<T> + * }) + * ``` + * +*/ +#[derive(Debug)] +enum IterMatchError<T> { + NotEnoughItems, + TooManyItems, + NoMatchFound, + Other(T), // Allow other macros to inject their own errors +} +macro_rules! match_iter { + // Everything else pattern + (@match 0, $iter:expr, $x:ident* $($rest:tt)*) => { + match_iter!(@match 2, $iter $($rest)*); + #[allow(unused_mut)] + let mut $x = $iter; + }; + // Alias to use in macros + (@match 0, $iter:expr, $x:ident?? $($rest:tt)*) => { + match_iter!(@match 2, $iter $($rest)*); + #[allow(unused_mut)] + let mut $x = $iter; + }; + // Optional pattern + (@match 0, $iter:expr, $x:ident? $($rest:tt)*) => { + match_iter!(@match 1, $iter $($rest)*); + #[allow(unused_mut)] + let mut $x = $iter.next(); + match $iter.next() { + Some(_) => break Err(IterMatchError::TooManyItems), + None => {}, + }; + }; + // Normal pattern + (@match 0, $iter:expr, $x:ident $($rest:tt)*) => { + #[allow(unused_mut)] + let mut $x = match $iter.next() { + Some(x) => x, + None => break Err(IterMatchError::NotEnoughItems), + }; + match_iter!(@match 0, $iter $($rest)*); + }; + // Normal pattern after a variable length one: declare reversed and take from the end + (@match $w:expr, $iter:expr, $x:ident $($rest:tt)*) => { + match_iter!(@match $w, $iter $($rest)*); + #[allow(unused_mut)] + let mut $x = match $iter.next_back() { + Some(x) => x, + None => break Err(IterMatchError::NotEnoughItems), + }; + }; + + // Check no elements remain + (@match 0, $iter:expr $(,)*) => { + match $iter.next() { + Some(_) => break Err(IterMatchError::TooManyItems), + None => {}, + }; + }; + (@match $_:expr, $iter:expr) => {}; + + (@panic; $($args:tt)*) => { + { + let ret: Result<_, IterMatchError<()>> = match_iter!($($args)*); + ret.unwrap() + } + }; + ($iter:expr; ($($args:tt)*) => $body:expr) => { + { + #[allow(unused_mut)] + let mut iter = $iter; + // Not a real loop; used for error handling + let ret: Result<_, IterMatchError<_>> = loop { + match_iter!(@match 0, iter, $($args)*); + break Ok($body); + }; + ret + } + }; +} + +/* Extends match_iter with typed matches. Takes a callback that determines + * when a capture matches. + * Returns `Result<_, IterMatchError<_>>`; errors returned by the callback will + * get propagated using IterMatchError::Other. + * + * Example: + * ``` + * macro_rules! callback { + * (@type_callback, positive, $x:expr) => { + * if $x >= 0 { Ok($x) } else { Err(()) } + * }; + * (@type_callback, negative, $x:expr) => { + * if $x <= 0 { Ok($x) } else { Err(()) } + * }; + * (@type_callback, any, $x:expr) => { + * Ok($x) + * }; + * } + * + * let vec = vec![-1, 2, 3]; + * + * match_iter_typed!(callback; vec.into_iter(); + * (x: positive, y?: negative, z: any) => { ... }, + * ) + * ``` + * +*/ +macro_rules! match_iter_typed { + // Collect untyped arguments to pass to match_iter! + (@collect, ($($vars:tt)*), ($($args:tt)*), ($($acc:tt)*), ($x:ident : $ty:ident, $($rest:tt)*)) => { + match_iter_typed!(@collect, ($($vars)*), ($($args)*), ($($acc)*, $x), ($($rest)*)) + }; + (@collect, ($($vars:tt)*), ($($args:tt)*), ($($acc:tt)*), ($x:ident? : $ty:ident, $($rest:tt)*)) => { + match_iter_typed!(@collect, ($($vars)*), ($($args)*), ($($acc)*, $x?), ($($rest)*)) + }; + (@collect, ($($vars:tt)*), ($($args:tt)*), ($($acc:tt)*), ($x:ident* : $ty:ident, $($rest:tt)*)) => { + match_iter_typed!(@collect, ($($vars)*), ($($args)*), ($($acc)*, $x??), ($($rest)*)) + }; + // Catch extra comma if exists + (@collect, ($($vars:tt)*), ($($args:tt)*), (,$($acc:tt)*), ($(,)*)) => { + match_iter_typed!(@collect, ($($vars)*), ($($args)*), ($($acc)*), ()) + }; + (@collect, ($iter:expr, $body:expr, $callback:ident, $error:ident), ($($args:tt)*), ($($acc:tt)*), ($(,)*)) => { + match_iter!($iter; ($($acc)*) => { + match_iter_typed!(@callback, $callback, $iter, $($args)*); + $body + }) + }; + + // Pass the matches through the callback + (@callback, $callback:ident, $iter:expr, $x:ident : $ty:ident $($rest:tt)*) => { + let $x = $callback!(@type_callback, $ty, $x); + #[allow(unused_mut)] + let mut $x = match $x { + Ok(x) => x, + Err(e) => break Err(IterMatchError::Other(e)), + }; + match_iter_typed!(@callback, $callback, $iter $($rest)*); + }; + (@callback, $callback: ident, $iter:expr, $x:ident? : $ty:ident $($rest:tt)*) => { + let $x = $x.map(|x| $callback!(@type_callback, $ty, x)); + #[allow(unused_mut)] + let mut $x = match $x { + Some(Ok(x)) => Some(x), + Some(Err(e)) => break Err(IterMatchError::Other(e)), + None => None, + }; + match_iter_typed!(@callback, $callback, $iter $($rest)*); + }; + (@callback, $callback: ident, $iter:expr, $x:ident* : $ty:ident $($rest:tt)*) => { + let $x = $x.map(|x| $callback!(@type_callback, $ty, x)).collect(); + let $x: Vec<_> = match $x { + Ok(x) => x, + Err(e) => break Err(IterMatchError::Other(e)), + }; + #[allow(unused_mut)] + let mut $x = $x.into_iter(); + match_iter_typed!(@callback, $callback, $iter $($rest)*); + }; + (@callback, $callback:ident, $iter:expr $(,)*) => {}; + + ($callback:ident; $iter:expr; ($($args:tt)*) => $body:expr) => { + { + #[allow(unused_mut)] + let mut iter = $iter; + match_iter_typed!(@collect, + (iter, $body, $callback, last_error), + ($($args)*), (), ($($args)*,) + ) + } + }; +} + +/* Extends match_iter and match_iter_typed with branching. + * Returns `Result<_, IterMatchError<_>>`; errors returned by the callback will + * get propagated using IterMatchError::Other. + * Allows multiple branches. The passed iterator must be Clone. + * Will check the branches in order, testing each branch using the callback macro provided. + * + * Example: + * ``` + * macro_rules! callback { + * (@type_callback, positive, $x:expr) => { + * if $x >= 0 { Ok($x) } else { Err(()) } + * }; + * (@type_callback, negative, $x:expr) => { + * if $x <= 0 { Ok($x) } else { Err(()) } + * }; + * (@type_callback, any, $x:expr) => { + * Ok($x) + * }; + * (@branch_callback, typed, $($args:tt)*) => { + * match_iter_typed!(callback; $($args)*) + * }; + * (@branch_callback, untyped, $($args:tt)*) => { + * match_iter!($($args)*) + * }; + * } + * + * let vec = vec![-1, 2, 3]; + * + * match_iter_branching!(branch_callback; vec.into_iter(); + * typed!(x: positive, y?: negative, z: any) => { ... }, + * untyped!(x, y, z) => { ... }, + * ) + * ``` + * +*/ +macro_rules! match_iter_branching { + (@noclone, $callback:ident; $arg:expr; $( $submac:ident!($($args:tt)*) => $body:expr ),* $(,)*) => { + { + #[allow(unused_assignments)] + let mut last_error = IterMatchError::NoMatchFound; + // Not a real loop; used for error handling + // Would use loop labels but they create warnings + #[allow(unreachable_code)] + loop { + $( + let matched: Result<_, IterMatchError<_>> = + $callback!(@branch_callback, $submac, $arg; ($($args)*) => $body); + #[allow(unused_assignments)] + match matched { + Ok(v) => break Ok(v), + Err(e) => last_error = e, + }; + )* + break Err(last_error); + } + } + }; + ($callback:ident; $iter:expr; $($args:tt)*) => { + { + #[allow(unused_mut)] + let mut iter = $iter; + match_iter_branching!(@noclone, $callback; iter.clone(); $($args)*) + } + }; +} + +macro_rules! match_pair { + (@type_callback, $ty:ident, $x:expr) => { + $ty($x) + }; + (@branch_callback, children, $pair:expr; $($args:tt)*) => { + { + #[allow(unused_mut)] + let mut pairs = $pair.clone().into_inner(); + match_iter_typed!(match_pair; pairs; $($args)*) + } + }; + (@branch_callback, self, $pair:expr; ($x:ident : $ty:ident) => $body:expr) => { + { + let $x = match_pair!(@type_callback, $ty, $pair.clone()); + match $x { + Ok($x) => Ok($body), + Err(e) => Err(IterMatchError::Other(e)), + } + } + }; + (@branch_callback, raw_pair, $pair:expr; ($x:ident) => $body:expr) => { + { + let $x = $pair.clone(); + Ok($body) + } + }; + (@branch_callback, captured_str, $pair:expr; ($x:ident) => $body:expr) => { + { + let $x = $pair.as_str(); + Ok($body) + } + }; + + ($pair:expr; $($args:tt)*) => { + { + let pair = $pair; + let result = match_iter_branching!(@noclone, match_pair; pair; $($args)*); + result.map_err(|e| match e { + IterMatchError::Other(e) => e, + _ => custom_parse_error(&pair, "No match found".to_owned()), + }) + } + }; +} + +macro_rules! make_pest_parse_function { + ($name:ident<$o:ty>; $submac:ident!( $($args:tt)* )) => ( + #[allow(unused_variables)] + #[allow(non_snake_case)] + fn $name<'a>(pair: Pair<'a, Rule>) -> ParseResult<$o> { + $submac!(pair; $($args)*) + } + ); +} + +macro_rules! named { + ($name:ident<$o:ty>; $($args:tt)*) => ( + make_pest_parse_function!($name<$o>; match_pair!( $($args)* )); + ); +} + +macro_rules! rule { + ($name:ident<$o:ty>; $($args:tt)*) => ( + make_pest_parse_function!($name<$o>; match_rule!( + Rule::$name => match_pair!( $($args)* ), + )); + ); +} + +macro_rules! rule_group { + ($name:ident<$o:ty>; $($ty:ident),*) => ( + make_pest_parse_function!($name<$o>; match_rule!( + $( + Rule::$ty => match_pair!(raw_pair!(p) => $ty(p)?), + )* + )); + ); +} + +macro_rules! match_rule { + ($pair:expr; $($pat:pat => $submac:ident!( $($args:tt)* ),)*) => { + { + #[allow(unreachable_patterns)] + match $pair.as_rule() { + $( + $pat => $submac!($pair; $($args)*), + )* + r => Err(custom_parse_error(&$pair, format!("Unexpected {:?}", r))), + } + } + }; +} + +rule!(EOI<()>; children!() => ()); + +named!(str<&'a str>; captured_str!(s) => s.trim()); + +named!(raw_str<&'a str>; captured_str!(s) => s); + +rule!(escaped_quote_pair<&'a str>; + children!() => "''" +); +rule!(escaped_interpolation<&'a str>; + children!() => "${" +); + +rule!(single_quote_continue<Vec<&'a str>>; + // TODO: handle interpolation + // children!(c: expression, rest: single_quote_continue) => { + // rest.push(c); rest + // }, + children!(c: escaped_quote_pair, rest: single_quote_continue) => { + rest.push(c); rest + }, + children!(c: escaped_interpolation, rest: single_quote_continue) => { + rest.push(c); rest + }, + // capture interpolation as string + children!(c: raw_str, rest: single_quote_continue) => { + rest.push(c); rest + }, + children!() => { + vec![] + }, +); + +rule!(let_binding<(&'a str, Option<BoxExpr<'a>>, BoxExpr<'a>)>; + children!(name: str, annot?: expression, expr: expression) => (name, annot, expr) +); + +named!(record_entry<(&'a str, BoxExpr<'a>)>; + children!(name: str, expr: expression) => (name, expr) +); + +named!(partial_record_entries<(BoxExpr<'a>, BTreeMap<&'a str, ParsedExpr<'a>>)>; + children!(expr: expression, entries*: record_entry) => { + let mut map: BTreeMap<&str, ParsedExpr> = BTreeMap::new(); + for (n, e) in entries { + map.insert(n, *e); + } + (expr, map) + } +); + +rule!(non_empty_record_literal<(BoxExpr<'a>, BTreeMap<&'a str, ParsedExpr<'a>>)>; + self!(x: partial_record_entries) => x +); + +rule!(non_empty_record_type<(BoxExpr<'a>, BTreeMap<&'a str, ParsedExpr<'a>>)>; + self!(x: partial_record_entries) => x +); + +rule!(union_type_entry<(&'a str, BoxExpr<'a>)>; + children!(name: str, expr: expression) => (name, expr) +); + +rule!(union_type_entries<BTreeMap<&'a str, ParsedExpr<'a>>>; + children!(entries*: union_type_entry) => { + let mut map: BTreeMap<&str, ParsedExpr> = BTreeMap::new(); + for (n, e) in entries { + map.insert(n, *e); + } + map + } +); + +rule!(non_empty_union_type_or_literal + <(Option<(&'a str, BoxExpr<'a>)>, BTreeMap<&'a str, ParsedExpr<'a>>)>; + children!(label: str, e: expression, entries: union_type_entries) => { + (Some((label, e)), entries) + }, + children!(l: str, e: expression, rest: non_empty_union_type_or_literal) => { + let (x, mut entries) = rest; + entries.insert(l, *e); + (x, entries) + }, + children!(l: str, e: expression) => { + let mut entries = BTreeMap::new(); + entries.insert(l, *e); + (None, entries) + }, +); + +rule!(empty_union_type<()>; children!() => ()); + +rule_group!(expression<BoxExpr<'a>>; + identifier_raw, + lambda_expression, + ifthenelse_expression, + let_expression, + forall_expression, + arrow_expression, + merge_expression, + empty_collection, + non_empty_optional, + + annotated_expression, + import_alt_expression, + or_expression, + plus_expression, + text_append_expression, + list_append_expression, + and_expression, + combine_expression, + prefer_expression, + combine_types_expression, + times_expression, + equal_expression, + not_equal_expression, + application_expression, + + selector_expression_raw, + literal_expression_raw, + empty_record_type, + empty_record_literal, + non_empty_record_type_or_literal, + union_type_or_literal, + non_empty_list_literal_raw, + final_expression +); + +// TODO: parse escapes and interpolation +rule!(double_quote_literal<String>; + children!(strs*: raw_str) => { + strs.collect() + } +); +rule!(single_quote_literal<String>; + children!(eol: raw_str, contents: single_quote_continue) => { + contents.push(eol); + contents.into_iter().rev().collect() + } +); + +rule!(double_literal_raw<core::Double>; + raw_pair!(pair) => { + pair.as_str().trim() + .parse() + .map_err(|e: std::num::ParseFloatError| custom_parse_error(&pair, format!("{}", e)))? + } +); + +rule!(minus_infinity_literal<()>; children!() => ()); +rule!(plus_infinity_literal<()>; children!() => ()); +rule!(NaN_raw<()>; children!() => ()); + +rule!(natural_literal_raw<core::Natural>; + raw_pair!(pair) => { + pair.as_str().trim() + .parse() + .map_err(|e: std::num::ParseIntError| custom_parse_error(&pair, format!("{}", e)))? + } +); + +rule!(integer_literal_raw<core::Integer>; + raw_pair!(pair) => { + pair.as_str().trim() + .parse() + .map_err(|e: std::num::ParseIntError| custom_parse_error(&pair, format!("{}", e)))? + } +); + +rule!(identifier_raw<BoxExpr<'a>>; + children!(name: str, idx?: natural_literal_raw) => { + match Builtin::parse(name) { + Some(b) => bx(Expr::Builtin(b)), + None => match name { + "True" => bx(Expr::BoolLit(true)), + "False" => bx(Expr::BoolLit(false)), + "Type" => bx(Expr::Const(Const::Type)), + "Kind" => bx(Expr::Const(Const::Kind)), + name => bx(Expr::Var(V(name, idx.unwrap_or(0)))), + } + } + } +); + +rule!(lambda_expression<BoxExpr<'a>>; + children!(label: str, typ: expression, body: expression) => { + bx(Expr::Lam(label, typ, body)) + } +); + +rule!(ifthenelse_expression<BoxExpr<'a>>; + children!(cond: expression, left: expression, right: expression) => { + bx(Expr::BoolIf(cond, left, right)) + } +); + +rule!(let_expression<BoxExpr<'a>>; + children!(bindings*: let_binding, final_expr: expression) => { + bindings.fold(final_expr, |acc, x| bx(Expr::Let(x.0, x.1, x.2, acc))) + } +); + +rule!(forall_expression<BoxExpr<'a>>; + children!(label: str, typ: expression, body: expression) => { + bx(Expr::Pi(label, typ, body)) + } +); + +rule!(arrow_expression<BoxExpr<'a>>; + children!(typ: expression, body: expression) => { + bx(Expr::Pi("_", typ, body)) + } +); + +rule!(merge_expression<BoxExpr<'a>>; + children!(x: expression, y: expression, z?: expression) => { + bx(Expr::Merge(x, y, z)) + } +); + +rule!(empty_collection<BoxExpr<'a>>; + children!(x: str, y: expression) => { + match x { + "Optional" => bx(Expr::OptionalLit(Some(y), vec![])), + "List" => bx(Expr::ListLit(Some(y), vec![])), + _ => unreachable!(), + } + } +); + +rule!(non_empty_optional<BoxExpr<'a>>; + children!(x: expression, _y: str, z: expression) => { + bx(Expr::OptionalLit(Some(z), vec![*x])) + } +); + +macro_rules! binop { + ($rule:ident, $op:ident) => { + rule!($rule<BoxExpr<'a>>; + children!(first: expression, rest*: expression) => { + rest.fold(first, |acc, e| bx(Expr::BinOp(BinOp::$op, acc, e))) + } + ); + }; +} + +rule!(annotated_expression<BoxExpr<'a>>; + children!(e: expression, annot: expression) => { + bx(Expr::Annot(e, annot)) + }, + children!(e: expression) => e, +); + +binop!(import_alt_expression, ImportAlt); +binop!(or_expression, BoolOr); +binop!(plus_expression, NaturalPlus); +binop!(text_append_expression, TextAppend); +binop!(list_append_expression, ListAppend); +binop!(and_expression, BoolAnd); +binop!(combine_expression, Combine); +binop!(prefer_expression, Prefer); +binop!(combine_types_expression, CombineTypes); +binop!(times_expression, NaturalTimes); +binop!(equal_expression, BoolEQ); +binop!(not_equal_expression, BoolNE); + +rule!(application_expression<BoxExpr<'a>>; + children!(first: expression, rest*: expression) => { + rest.fold(first, |acc, e| bx(Expr::App(acc, e))) + } +); + +rule!(selector_expression_raw<BoxExpr<'a>>; + children!(first: expression, rest*: str) => { + rest.fold(first, |acc, e| bx(Expr::Field(acc, e))) + } +); + +rule!(literal_expression_raw<BoxExpr<'a>>; + children!(n: double_literal_raw) => bx(Expr::DoubleLit(n)), + children!(n: minus_infinity_literal) => bx(Expr::DoubleLit(std::f64::NEG_INFINITY)), + children!(n: plus_infinity_literal) => bx(Expr::DoubleLit(std::f64::INFINITY)), + children!(n: NaN_raw) => bx(Expr::DoubleLit(std::f64::NAN)), + children!(n: natural_literal_raw) => bx(Expr::NaturalLit(n)), + children!(n: integer_literal_raw) => bx(Expr::IntegerLit(n)), + children!(s: double_quote_literal) => bx(Expr::TextLit(s)), + children!(s: single_quote_literal) => bx(Expr::TextLit(s)), + children!(e: expression) => e, +); + +rule!(empty_record_type<BoxExpr<'a>>; + children!() => bx(Expr::Record(BTreeMap::new())) +); +rule!(empty_record_literal<BoxExpr<'a>>; + children!() => bx(Expr::RecordLit(BTreeMap::new())) +); +rule!(non_empty_record_type_or_literal<BoxExpr<'a>>; + children!(first_label: str, rest: non_empty_record_type) => { + let (first_expr, mut map) = rest; + map.insert(first_label, *first_expr); + bx(Expr::Record(map)) + }, + children!(first_label: str, rest: non_empty_record_literal) => { + let (first_expr, mut map) = rest; + map.insert(first_label, *first_expr); + bx(Expr::RecordLit(map)) + }, +); + +rule!(union_type_or_literal<BoxExpr<'a>>; + children!(_e: empty_union_type) => { + bx(Expr::Union(BTreeMap::new())) + }, + children!(x: non_empty_union_type_or_literal) => { + match x { + (Some((l, e)), entries) => bx(Expr::UnionLit(l, e, entries)), + (None, entries) => bx(Expr::Union(entries)), + } + }, +); + +rule!(non_empty_list_literal_raw<BoxExpr<'a>>; + children!(items*: expression) => { + bx(Expr::ListLit(None, items.map(|x| *x).collect())) + } +); + +rule!(final_expression<BoxExpr<'a>>; + children!(e: expression, _eoi: EOI) => e +); + +pub fn parse_expr_pest(s: &str) -> ParseResult<BoxExpr> { + let pairs = DhallParser::parse(Rule::final_expression, s)?; + // Match the only item in the pairs iterator + match_iter!(@panic; pairs; (p) => expression(p)) + // Ok(bx(Expr::BoolLit(false))) +} + +#[test] +fn test_parse() { + use crate::core::BinOp::*; + use crate::core::Expr::*; + // let expr = r#"{ x = "foo", y = 4 }.x"#; + // let expr = r#"(1 + 2) * 3"#; + let expr = r#"if True then 1 + 3 * 5 else 2"#; + println!("{:?}", parse_expr_lalrpop(expr)); + use std::thread; + // I don't understand why it stack overflows even on tiny expressions... + thread::Builder::new() + .stack_size(3 * 1024 * 1024) + .spawn(move || match parse_expr_pest(expr) { + Err(e) => { + println!("{:?}", e); + println!("{}", e); + } + ok => println!("{:?}", ok), + }) + .unwrap() + .join() + .unwrap(); + // assert_eq!(parse_expr_pest(expr).unwrap(), parse_expr_lalrpop(expr).unwrap()); + // assert!(false); + + println!("test {:?}", parse_expr_lalrpop("3 + 5 * 10")); + assert!(parse_expr_lalrpop("22").is_ok()); + assert!(parse_expr_lalrpop("(22)").is_ok()); + assert_eq!( + parse_expr_lalrpop("3 + 5 * 10").ok(), + Some(Box::new(BinOp( + NaturalPlus, + Box::new(NaturalLit(3)), + Box::new(BinOp( + NaturalTimes, + Box::new(NaturalLit(5)), + Box::new(NaturalLit(10)) + )) + ))) + ); + // The original parser is apparently right-associative + assert_eq!( + parse_expr_lalrpop("2 * 3 * 4").ok(), + Some(Box::new(BinOp( + NaturalTimes, + Box::new(NaturalLit(2)), + Box::new(BinOp( + NaturalTimes, + Box::new(NaturalLit(3)), + Box::new(NaturalLit(4)) + )) + ))) + ); + assert!(parse_expr_lalrpop("((((22))))").is_ok()); + assert!(parse_expr_lalrpop("((22)").is_err()); + println!("{:?}", parse_expr_lalrpop("\\(b : Bool) -> b == False")); + assert!(parse_expr_lalrpop("\\(b : Bool) -> b == False").is_ok()); + println!("{:?}", parse_expr_lalrpop("foo.bar")); + assert!(parse_expr_lalrpop("foo.bar").is_ok()); + assert!(parse_expr_lalrpop("[] : List Bool").is_ok()); + + // println!("{:?}", parse_expr_lalrpop("< Left = True | Right : Natural >")); + // println!("{:?}", parse_expr_lalrpop(r#""bl${42}ah""#)); + // assert!(parse_expr_lalrpop("< Left = True | Right : Natural >").is_ok()); +} |