From 564a5f37b106c69d8ebe9aec2f665f5222b3dfda Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Wed, 6 Mar 2019 12:19:26 +0100 Subject: Split-off core into its own crate --- dhall_core/src/core.rs | 1205 ++++++++++++++++++++++++++++++++++++++++ dhall_core/src/grammar.lalrpop | 164 ++++++ dhall_core/src/grammar_util.rs | 7 + dhall_core/src/lexer.rs | 394 +++++++++++++ dhall_core/src/lib.rs | 10 + dhall_core/src/parser.rs | 827 +++++++++++++++++++++++++++ 6 files changed, 2607 insertions(+) create mode 100644 dhall_core/src/core.rs create mode 100644 dhall_core/src/grammar.lalrpop create mode 100644 dhall_core/src/grammar_util.rs create mode 100644 dhall_core/src/lexer.rs create mode 100644 dhall_core/src/lib.rs create mode 100644 dhall_core/src/parser.rs (limited to 'dhall_core/src') diff --git a/dhall_core/src/core.rs b/dhall_core/src/core.rs new file mode 100644 index 0000000..340cb04 --- /dev/null +++ b/dhall_core/src/core.rs @@ -0,0 +1,1205 @@ +#![allow(non_snake_case)] +use std::collections::BTreeMap; +use std::fmt::{self, Display}; +use std::path::PathBuf; + +/// Constants for a pure type system +/// +/// The only axiom is: +/// +/// ```c +/// ⊦ Type : Kind +/// ``` +/// +/// ... and the valid rule pairs are: +/// +/// ```c +/// ⊦ Type ↝ Type : Type -- Functions from terms to terms (ordinary functions) +/// ⊦ Kind ↝ Type : Type -- Functions from types to terms (polymorphic functions) +/// ⊦ Kind ↝ Kind : Kind -- Functions from types to types (type constructors) +/// ``` +/// +/// These are the same rule pairs as System Fω +/// +/// Note that Dhall does not support functions from terms to types and therefore +/// Dhall is not a dependently typed language +/// +#[derive(Debug, Copy, Clone, PartialEq, Eq)] // (Show, Bounded, Enum) +pub enum Const { + Type, + Kind, +} + +/// Path to an external resource +#[derive(Debug, Clone, PartialEq, Eq)] // (Eq, Ord, Show) +pub enum Path { + File(PathBuf), + URL(String), +} + +/// Label for a bound variable +/// +/// The `String` field is the variable's name (i.e. \"`x`\"). +/// +/// The `Int` field disambiguates variables with the same name if there are +/// multiple bound variables of the same name in scope. Zero refers to the +/// nearest bound variable and the index increases by one for each bound +/// variable of the same name going outward. The following diagram may help: +/// +/// ```c +/// +---refers to--+ +/// | | +/// v | +/// \(x : Type) -> \(y : Type) -> \(x : Type) -> x@0 +/// +/// +------------------refers to-----------------+ +/// | | +/// v | +/// \(x : Type) -> \(y : Type) -> \(x : Type) -> x@1 +/// ``` +/// +/// This `Int` behaves like a De Bruijn index in the special case where all +/// variables have the same name. +/// +/// You can optionally omit the index if it is `0`: +/// +/// ```c +/// +refers to+ +/// | | +/// v | +/// \(x : *) -> \(y : *) -> \(x : *) -> x +/// ``` +/// +/// Zero indices are omitted when pretty-printing `Var`s and non-zero indices +/// appear as a numeric suffix. +/// +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub struct V<'i>(pub &'i str, pub usize); + +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum BinOp { + /// x && y` + BoolAnd, + /// x || y` + BoolOr, + /// x == y` + BoolEQ, + /// x != y` + BoolNE, + /// x + y` + NaturalPlus, + /// x * y` + NaturalTimes, + /// x ++ y` + TextAppend, + /// x ∧ y` + Combine, + /// x //\\ y + CombineTypes, + /// x ? y + ImportAlt, + /// x // y + Prefer, + /// x # y + ListAppend, +} + +/// Syntax tree for expressions +#[derive(Debug, Clone, PartialEq)] +pub enum Expr<'i, S, A> { + /// `Const c ~ c` + Const(Const), + /// `Var (V x 0) ~ x`
+ /// `Var (V x n) ~ x@n` + Var(V<'i>), + /// `Lam x A b ~ λ(x : A) -> b` + Lam(&'i str, Box>, Box>), + /// `Pi "_" A B ~ A -> B` + /// `Pi x A B ~ ∀(x : A) -> B` + Pi(&'i str, Box>, Box>), + /// `App f A ~ f A` + App(Box>, Box>), + /// `Let x Nothing r e ~ let x = r in e` + /// `Let x (Just t) r e ~ let x : t = r in e` + Let( + &'i str, + Option>>, + Box>, + Box>, + ), + /// `Annot x t ~ x : t` + Annot(Box>, Box>), + /// Built-in values + Builtin(Builtin), + // Binary operations + BinOp(BinOp, Box>, Box>), + /// `BoolLit b ~ b` + BoolLit(bool), + /// `BoolIf x y z ~ if x then y else z` + BoolIf( + Box>, + Box>, + Box>, + ), + /// `NaturalLit n ~ +n` + NaturalLit(Natural), + /// `IntegerLit n ~ n` + IntegerLit(Integer), + /// `DoubleLit n ~ n` + DoubleLit(Double), + /// `TextLit t ~ t` + TextLit(Builder), + /// `ListLit t [x, y, z] ~ [x, y, z] : List t` + ListLit(Option>>, Vec>), + /// `OptionalLit t [e] ~ [e] : Optional t` + /// `OptionalLit t [] ~ [] : Optional t` + OptionalLit(Option>>, Vec>), + /// `Record [(k1, t1), (k2, t2)] ~ { k1 : t1, k2 : t1 }` + Record(BTreeMap<&'i str, Expr<'i, S, A>>), + /// `RecordLit [(k1, v1), (k2, v2)] ~ { k1 = v1, k2 = v2 }` + RecordLit(BTreeMap<&'i str, Expr<'i, S, A>>), + /// `Union [(k1, t1), (k2, t2)] ~ < k1 : t1, k2 : t2 >` + Union(BTreeMap<&'i str, Expr<'i, S, A>>), + /// `UnionLit (k1, v1) [(k2, t2), (k3, t3)] ~ < k1 = t1, k2 : t2, k3 : t3 >` + UnionLit( + &'i str, + Box>, + BTreeMap<&'i str, Expr<'i, S, A>>, + ), + /// `Merge x y t ~ merge x y : t` + Merge( + Box>, + Box>, + Option>>, + ), + /// `Field e x ~ e.x` + Field(Box>, &'i str), + /// `Note S x ~ e` + Note(S, Box>), + /// `Embed path ~ path` + Embed(A), + + FailedParse(String, Vec>), +} + +/// Built-ins +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum Builtin { + Bool, + Natural, + Integer, + Double, + Text, + List, + Optional, + NaturalBuild, + NaturalFold, + NaturalIsZero, + NaturalEven, + NaturalOdd, + NaturalToInteger, + NaturalShow, + IntegerToDouble, + IntegerShow, + DoubleShow, + ListBuild, + ListFold, + ListLength, + ListHead, + ListLast, + ListIndexed, + ListReverse, + OptionalFold, + OptionalBuild, + TextShow, +} + +impl<'i> From<&'i str> for V<'i> { + fn from(s: &'i str) -> Self { + V(s, 0) + } +} + +impl<'i, S, A> From<&'i str> for Expr<'i, S, A> { + fn from(s: &'i str) -> Self { + Expr::Var(s.into()) + } +} + +impl<'i, S, A> From for Expr<'i, S, A> { + fn from(t: Builtin) -> Self { + Expr::Builtin(t) + } +} + +impl<'i, S, A> Expr<'i, S, A> { + fn bool_lit(&self) -> Option { + match *self { + Expr::BoolLit(v) => Some(v), + _ => None, + } + } + + fn natural_lit(&self) -> Option { + match *self { + Expr::NaturalLit(v) => Some(v), + _ => None, + } + } + + fn text_lit(&self) -> Option { + match *self { + Expr::TextLit(ref t) => Some(t.clone()), // FIXME? + _ => None, + } + } +} + +// There is a one-to-one correspondence between the formatters in this section +// and the grammar in grammar.lalrpop. Each formatter is named after the +// corresponding grammar rule and the relationship between formatters exactly matches +// the relationship between grammar rules. This leads to the nice emergent property +// of automatically getting all the parentheses and precedences right. +// +// This approach has one major disadvantage: you can get an infinite loop if +// you add a new constructor to the syntax tree without adding a matching +// case the corresponding builder. + +impl<'i, S, A: Display> Display for Expr<'i, S, A> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + // buildExprA + use crate::Expr::*; + match self { + &Annot(ref a, ref b) => { + a.fmt_b(f)?; + write!(f, " : ")?; + b.fmt(f) + } + &Note(_, ref b) => b.fmt(f), + a => a.fmt_b(f), + } + } +} + +impl<'i, S, A: Display> Expr<'i, S, A> { + fn fmt_b(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Expr::*; + match self { + &Lam(a, ref b, ref c) => { + write!(f, "λ({} : ", a)?; + b.fmt(f)?; + write!(f, ") → ")?; + c.fmt_b(f) + } + &BoolIf(ref a, ref b, ref c) => { + write!(f, "if ")?; + a.fmt(f)?; + write!(f, " then ")?; + b.fmt_b(f)?; + write!(f, " else ")?; + c.fmt_c(f) + } + &Pi("_", ref b, ref c) => { + b.fmt_c(f)?; + write!(f, " → ")?; + c.fmt_b(f) + } + &Pi(a, ref b, ref c) => { + write!(f, "∀({} : ", a)?; + b.fmt(f)?; + write!(f, ") → ")?; + c.fmt_b(f) + } + &Let(a, None, ref c, ref d) => { + write!(f, "let {} = ", a)?; + c.fmt(f)?; + write!(f, ") → ")?; + d.fmt_b(f) + } + &Let(a, Some(ref b), ref c, ref d) => { + write!(f, "let {} : ", a)?; + b.fmt(f)?; + write!(f, " = ")?; + c.fmt(f)?; + write!(f, ") → ")?; + d.fmt_b(f) + } + &ListLit(ref t, ref es) => { + fmt_list("[", "]", es, f, |e, f| e.fmt(f))?; + match t { + Some(t) => { + write!(f, " : List ")?; + t.fmt_e(f)? + } + None => {} + } + Ok(()) + } + &OptionalLit(ref t, ref es) => { + fmt_list("[", "]", es, f, |e, f| e.fmt(f))?; + match t { + Some(t) => { + write!(f, " : Optional ")?; + t.fmt_e(f)? + } + None => {} + } + Ok(()) + } + &Merge(ref a, ref b, ref c) => { + write!(f, "merge ")?; + a.fmt_e(f)?; + write!(f, " ")?; + b.fmt_e(f)?; + match c { + Some(c) => { + write!(f, " : ")?; + c.fmt_d(f)? + } + None => {} + } + Ok(()) + } + &Note(_, ref b) => b.fmt_b(f), + a => a.fmt_c(f), + } + } + + fn fmt_c(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::BinOp::*; + use crate::Expr::*; + match self { + // FIXME precedence + &BinOp(BoolOr, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" || ")?; + b.fmt_c(f) + } + &BinOp(TextAppend, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" ++ ")?; + b.fmt_c(f) + } + &BinOp(NaturalPlus, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" + ")?; + b.fmt_c(f) + } + &BinOp(BoolAnd, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" && ")?; + b.fmt_c(f) + } + &BinOp(Combine, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" ^ ")?; + b.fmt_c(f) + } + &BinOp(NaturalTimes, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" * ")?; + b.fmt_c(f) + } + &BinOp(BoolEQ, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" == ")?; + b.fmt_c(f) + } + &BinOp(BoolNE, ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" != ")?; + b.fmt_c(f) + } + &Note(_, ref b) => b.fmt_c(f), + a => a.fmt_d(f), + } + } + + fn fmt_d(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Expr::*; + match self { + &App(ref a, ref b) => { + a.fmt_d(f)?; + f.write_str(" ")?; + b.fmt_e(f) + } + &Note(_, ref b) => b.fmt_d(f), + a => a.fmt_e(f), + } + } + + fn fmt_e(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Expr::*; + match self { + &Field(ref a, b) => { + a.fmt_e(f)?; + write!(f, ".{}", b) + } + &Note(_, ref b) => b.fmt_e(f), + a => a.fmt_f(f), + } + } + + fn fmt_f(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Expr::*; + match self { + &Var(a) => a.fmt(f), + &Const(k) => k.fmt(f), + &Builtin(v) => v.fmt(f), + &BoolLit(true) => f.write_str("True"), + &BoolLit(false) => f.write_str("False"), + &IntegerLit(a) => a.fmt(f), + &NaturalLit(a) => { + f.write_str("+")?; + a.fmt(f) + } + &DoubleLit(a) => a.fmt(f), + &TextLit(ref a) => ::fmt(a, f), // FIXME Format with Haskell escapes + &Record(ref a) if a.is_empty() => f.write_str("{}"), + &Record(ref a) => fmt_list("{ ", " }", a, f, |(k, t), f| { + write!(f, "{} : {}", k, t) + }), + &RecordLit(ref a) if a.is_empty() => f.write_str("{=}"), + &RecordLit(ref a) => fmt_list("{ ", " }", a, f, |(k, v), f| { + write!(f, "{} = {}", k, v) + }), + &Union(ref _a) => f.write_str("Union"), + &UnionLit(_a, ref _b, ref _c) => f.write_str("UnionLit"), + &Embed(ref a) => a.fmt(f), + &Note(_, ref b) => b.fmt_f(f), + a => write!(f, "({})", a), + } + } +} + +fn fmt_list( + open: &str, + close: &str, + it: I, + f: &mut fmt::Formatter, + func: F, +) -> Result<(), fmt::Error> +where + I: IntoIterator, + F: Fn(T, &mut fmt::Formatter) -> Result<(), fmt::Error>, +{ + f.write_str(open)?; + for (i, x) in it.into_iter().enumerate() { + if i > 0 { + f.write_str(", ")?; + } + func(x, f)?; + } + f.write_str(close) +} + +impl Display for Const { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + ::fmt(self, f) + } +} + +impl Display for Builtin { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::Builtin::*; + f.write_str(match *self { + Bool => "Bool", + Natural => "Natural", + Integer => "Integer", + Double => "Double", + Text => "Text", + List => "List", + Optional => "Optional", + NaturalBuild => "Natural/build", + NaturalFold => "Natural/fold", + NaturalIsZero => "Natural/isZero", + NaturalEven => "Natural/even", + NaturalOdd => "Natural/odd", + NaturalToInteger => "Natural/toInteger", + NaturalShow => "Natural/show", + IntegerToDouble => "Integer/toDouble", + IntegerShow => "Integer/show", + DoubleShow => "Double/show", + ListBuild => "List/build", + ListFold => "List/fold", + ListLength => "List/length", + ListHead => "List/head", + ListLast => "List/last", + ListIndexed => "List/indexed", + ListReverse => "List/reverse", + OptionalFold => "Optional/fold", + OptionalBuild => "Optional/build", + TextShow => "Text/show", + }) + } +} + +impl Builtin { + pub fn parse(s: &str) -> Option { + use self::Builtin::*; + match s { + "Bool" => Some(Bool), + "Natural" => Some(Natural), + "Integer" => Some(Integer), + "Double" => Some(Double), + "Text" => Some(Text), + "List" => Some(List), + "Optional" => Some(Optional), + "Natural/build" => Some(NaturalBuild), + "Natural/fold" => Some(NaturalFold), + "Natural/isZero" => Some(NaturalIsZero), + "Natural/even" => Some(NaturalEven), + "Natural/odd" => Some(NaturalOdd), + "Natural/toInteger" => Some(NaturalToInteger), + "Natural/show" => Some(NaturalShow), + "Integer/toDouble" => Some(IntegerToDouble), + "Integer/show" => Some(IntegerShow), + "Double/show" => Some(DoubleShow), + "List/build" => Some(ListBuild), + "List/fold" => Some(ListFold), + "List/length" => Some(ListLength), + "List/head" => Some(ListHead), + "List/last" => Some(ListLast), + "List/indexed" => Some(ListIndexed), + "List/reverse" => Some(ListReverse), + "Optional/fold" => Some(OptionalFold), + "Optional/build" => Some(OptionalBuild), + "Text/show" => Some(TextShow), + _ => None, + } + } +} + +impl<'i> Display for V<'i> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let V(x, n) = *self; + f.write_str(x)?; + if n != 0 { + write!(f, "@{}", n)?; + } + Ok(()) + } +} + +pub fn pi<'i, S, A, Name, Et, Ev>( + var: Name, + ty: Et, + value: Ev, +) -> Expr<'i, S, A> +where + Name: Into<&'i str>, + Et: Into>, + Ev: Into>, +{ + Expr::Pi(var.into(), bx(ty.into()), bx(value.into())) +} + +pub fn app<'i, S, A, Ef, Ex>(f: Ef, x: Ex) -> Expr<'i, S, A> +where + Ef: Into>, + Ex: Into>, +{ + Expr::App(bx(f.into()), bx(x.into())) +} + +pub type Builder = String; +pub type Double = f64; +pub type Int = isize; +pub type Integer = isize; +pub type Natural = usize; + +/// A void type +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum X {} + +impl Display for X { + fn fmt(&self, _: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match *self {} + } +} + +pub fn bx(x: T) -> Box { + Box::new(x) +} + +fn add_ui(u: usize, i: isize) -> usize { + if i < 0 { + u.checked_sub(i.checked_neg().unwrap() as usize).unwrap() + } else { + u.checked_add(i as usize).unwrap() + } +} + +fn map_record_value<'a, I, K, V, U, F>(it: I, f: F) -> BTreeMap +where + I: IntoIterator, + K: Eq + Ord + Copy + 'a, + V: 'a, + F: Fn(&V) -> U, +{ + it.into_iter().map(|(&k, v)| (k, f(v))).collect() +} + +fn map_op2(f: F, g: G, a: T, b: T) -> V +where + F: FnOnce(U, U) -> V, + G: Fn(T) -> U, +{ + f(g(a), g(b)) +} + +/// `shift` is used by both normalization and type-checking to avoid variable +/// capture by shifting variable indices +/// +/// For example, suppose that you were to normalize the following expression: +/// +/// ```c +/// λ(a : Type) → λ(x : a) → (λ(y : a) → λ(x : a) → y) x +/// ``` +/// +/// If you were to substitute `y` with `x` without shifting any variable +/// indices, then you would get the following incorrect result: +/// +/// ```c +/// λ(a : Type) → λ(x : a) → λ(x : a) → x -- Incorrect normalized form +/// ``` +/// +/// In order to substitute `x` in place of `y` we need to `shift` `x` by `1` in +/// order to avoid being misinterpreted as the `x` bound by the innermost +/// lambda. If we perform that `shift` then we get the correct result: +/// +/// ```c +/// λ(a : Type) → λ(x : a) → λ(x : a) → x@1 +/// ``` +/// +/// As a more worked example, suppose that you were to normalize the following +/// expression: +/// +/// ```c +/// λ(a : Type) +/// → λ(f : a → a → a) +/// → λ(x : a) +/// → λ(x : a) +/// → (λ(x : a) → f x x@1) x@1 +/// ``` +/// +/// The correct normalized result would be: +/// +/// ```c +/// λ(a : Type) +/// → λ(f : a → a → a) +/// → λ(x : a) +/// → λ(x : a) +/// → f x@1 x +/// ``` +/// +/// The above example illustrates how we need to both increase and decrease +/// variable indices as part of substitution: +/// +/// * We need to increase the index of the outer `x\@1` to `x\@2` before we +/// substitute it into the body of the innermost lambda expression in order +/// to avoid variable capture. This substitution changes the body of the +/// lambda expression to `(f x\@2 x\@1)` +/// +/// * We then remove the innermost lambda and therefore decrease the indices of +/// both `x`s in `(f x\@2 x\@1)` to `(f x\@1 x)` in order to reflect that one +/// less `x` variable is now bound within that scope +/// +/// Formally, `(shift d (V x n) e)` modifies the expression `e` by adding `d` to +/// the indices of all variables named `x` whose indices are greater than +/// `(n + m)`, where `m` is the number of bound variables of the same name +/// within that scope +/// +/// In practice, `d` is always `1` or `-1` because we either: +/// +/// * increment variables by `1` to avoid variable capture during substitution +/// * decrement variables by `1` when deleting lambdas after substitution +/// +/// `n` starts off at `0` when substitution begins and increments every time we +/// descend into a lambda or let expression that binds a variable of the same +/// name in order to avoid shifting the bound variables by mistake. +/// +pub fn shift<'i, S, T, A: Clone>( + d: isize, + v: V, + e: &Expr<'i, S, A>, +) -> Expr<'i, T, A> { + use crate::Expr::*; + let V(x, n) = v; + match *e { + Const(a) => Const(a), + Var(V(x2, n2)) => { + let n3 = if x == x2 && n <= n2 { + add_ui(n2, d) + } else { + n2 + }; + Var(V(x2, n3)) + } + Lam(x2, ref tA, ref b) => { + let n2 = if x == x2 { n + 1 } else { n }; + let tA2 = shift(d, V(x, n), tA); + let b2 = shift(d, V(x, n2), b); + Lam(x2, bx(tA2), bx(b2)) + } + Pi(x2, ref tA, ref tB) => { + let n2 = if x == x2 { n + 1 } else { n }; + let tA2 = shift(d, V(x, n), tA); + let tB2 = shift(d, V(x, n2), tB); + pi(x2, tA2, tB2) + } + App(ref f, ref a) => app(shift(d, v, f), shift(d, v, a)), + Let(f, ref mt, ref r, ref e) => { + let n2 = if x == f { n + 1 } else { n }; + let e2 = shift(d, V(x, n2), e); + let mt2 = mt.as_ref().map(|t| bx(shift(d, V(x, n), t))); + let r2 = shift(d, V(x, n), r); + Let(f, mt2, bx(r2), bx(e2)) + } + Annot(ref a, ref b) => shift_op2(Annot, d, v, a, b), + Builtin(v) => Builtin(v), + BoolLit(a) => BoolLit(a), + BinOp(o, ref a, ref b) => shift_op2(|x, y| BinOp(o, x, y), d, v, a, b), + BoolIf(ref a, ref b, ref c) => { + BoolIf(bx(shift(d, v, a)), bx(shift(d, v, b)), bx(shift(d, v, c))) + } + NaturalLit(a) => NaturalLit(a), + IntegerLit(a) => IntegerLit(a), + DoubleLit(a) => DoubleLit(a), + TextLit(ref a) => TextLit(a.clone()), + ListLit(ref t, ref es) => ListLit( + t.as_ref().map(|t| bx(shift(d, v, t))), + es.iter().map(|e| shift(d, v, e)).collect(), + ), + OptionalLit(ref t, ref es) => OptionalLit( + t.as_ref().map(|t| bx(shift(d, v, t))), + es.iter().map(|e| shift(d, v, e)).collect(), + ), + Record(ref a) => Record(map_record_value(a, |val| shift(d, v, val))), + RecordLit(ref a) => { + RecordLit(map_record_value(a, |val| shift(d, v, val))) + } + Union(ref a) => Union(map_record_value(a, |val| shift(d, v, val))), + UnionLit(k, ref uv, ref a) => UnionLit( + k, + bx(shift(d, v, uv)), + map_record_value(a, |val| shift(d, v, val)), + ), + Merge(ref a, ref b, ref c) => Merge( + bx(shift(d, v, a)), + bx(shift(d, v, b)), + c.as_ref().map(|c| bx(shift(d, v, c))), + ), + Field(ref a, b) => Field(bx(shift(d, v, a)), b), + Note(_, ref b) => shift(d, v, b), + // The Dhall compiler enforces that all embedded values are closed expressions + // and `shift` does nothing to a closed expression + Embed(ref p) => Embed(p.clone()), + _ => panic!(), + } +} + +fn shift_op2<'i, S, T, A, F>( + f: F, + d: isize, + v: V, + a: &Expr<'i, S, A>, + b: &Expr<'i, S, A>, +) -> Expr<'i, T, A> +where + F: FnOnce(Box>, Box>) -> Expr<'i, T, A>, + A: Clone, +{ + map_op2(f, |x| bx(shift(d, v, x)), a, b) +} + +/// Substitute all occurrences of a variable with an expression +/// +/// ```c +/// subst x C B ~ B[x := C] +/// ``` +/// +pub fn subst<'i, S, T, A>( + v: V<'i>, + e: &Expr<'i, S, A>, + b: &Expr<'i, T, A>, +) -> Expr<'i, S, A> +where + S: Clone, + A: Clone, +{ + use crate::Expr::*; + let V(x, n) = v; + match *b { + Const(a) => Const(a), + Lam(y, ref tA, ref b) => { + let n2 = if x == y { n + 1 } else { n }; + let b2 = subst(V(x, n2), &shift(1, V(y, 0), e), b); + let tA2 = subst(V(x, n), e, tA); + Lam(y, bx(tA2), bx(b2)) + } + Pi(y, ref tA, ref tB) => { + let n2 = if x == y { n + 1 } else { n }; + let tB2 = subst(V(x, n2), &shift(1, V(y, 0), e), tB); + let tA2 = subst(V(x, n), e, tA); + pi(y, tA2, tB2) + } + App(ref f, ref a) => { + let f2 = subst(v, e, f); + let a2 = subst(v, e, a); + app(f2, a2) + } + Var(v2) => { + if v == v2 { + e.clone() + } else { + Var(v2) + } + } + Let(f, ref mt, ref r, ref b) => { + let n2 = if x == f { n + 1 } else { n }; + let b2 = subst(V(x, n2), &shift(1, V(f, 0), e), b); + let mt2 = mt.as_ref().map(|t| bx(subst(V(x, n), e, t))); + let r2 = subst(V(x, n), e, r); + Let(f, mt2, bx(r2), bx(b2)) + } + Annot(ref a, ref b) => subst_op2(Annot, v, e, a, b), + Builtin(v) => Builtin(v), + BoolLit(a) => BoolLit(a), + BinOp(o, ref a, ref b) => subst_op2(|x, y| BinOp(o, x, y), v, e, a, b), + BoolIf(ref a, ref b, ref c) => { + BoolIf(bx(subst(v, e, a)), bx(subst(v, e, b)), bx(subst(v, e, c))) + } + NaturalLit(a) => NaturalLit(a), + IntegerLit(a) => IntegerLit(a), + DoubleLit(a) => DoubleLit(a), + TextLit(ref a) => TextLit(a.clone()), + ListLit(ref a, ref b) => { + let a2 = a.as_ref().map(|a| bx(subst(v, e, a))); + let b2 = b.iter().map(|be| subst(v, e, be)).collect(); + ListLit(a2, b2) + } + OptionalLit(ref a, ref b) => { + let a2 = a.as_ref().map(|a| bx(subst(v, e, a))); + let b2 = b.iter().map(|be| subst(v, e, be)).collect(); + OptionalLit(a2, b2) + } + Record(ref kts) => Record(map_record_value(kts, |t| subst(v, e, t))), + RecordLit(ref kvs) => { + RecordLit(map_record_value(kvs, |val| subst(v, e, val))) + } + Union(ref kts) => Union(map_record_value(kts, |t| subst(v, e, t))), + UnionLit(k, ref uv, ref kvs) => UnionLit( + k, + bx(subst(v, e, uv)), + map_record_value(kvs, |val| subst(v, e, val)), + ), + Merge(ref a, ref b, ref c) => Merge( + bx(subst(v, e, a)), + bx(subst(v, e, b)), + c.as_ref().map(|c| bx(subst(v, e, c))), + ), + Field(ref a, b) => Field(bx(subst(v, e, a)), b), + Note(_, ref b) => subst(v, e, b), + Embed(ref p) => Embed(p.clone()), + _ => panic!(), + } +} + +fn subst_op2<'i, S, T, A, F>( + f: F, + v: V<'i>, + e: &Expr<'i, S, A>, + a: &Expr<'i, T, A>, + b: &Expr<'i, T, A>, +) -> Expr<'i, S, A> +where + F: FnOnce(Box>, Box>) -> Expr<'i, S, A>, + S: Clone, + A: Clone, +{ + map_op2(f, |x| bx(subst(v, e, x)), a, b) +} + +/// Reduce an expression to its normal form, performing beta reduction +/// +/// `normalize` does not type-check the expression. You may want to type-check +/// expressions before normalizing them since normalization can convert an +/// ill-typed expression into a well-typed expression. +/// +/// However, `normalize` will not fail if the expression is ill-typed and will +/// leave ill-typed sub-expressions unevaluated. +/// +pub fn normalize<'i, S, T, A>(e: &Expr<'i, S, A>) -> Expr<'i, T, A> +where + S: Clone + fmt::Debug, + T: Clone + fmt::Debug, + A: Clone + fmt::Debug, +{ + use crate::BinOp::*; + use crate::Builtin::*; + use crate::Expr::*; + match *e { + Const(k) => Const(k), + Var(v) => Var(v), + Lam(x, ref tA, ref b) => { + let tA2 = normalize(tA); + let b2 = normalize(b); + Lam(x, bx(tA2), bx(b2)) + } + Pi(x, ref tA, ref tB) => { + let tA2 = normalize(tA); + let tB2 = normalize(tB); + pi(x, tA2, tB2) + } + App(ref f, ref a) => match normalize::(f) { + Lam(x, _A, b) => { + // Beta reduce + let vx0 = V(x, 0); + let a2 = shift::(1, vx0, a); + let b2 = subst::(vx0, &a2, &b); + let b3 = shift::(-1, vx0, &b2); + normalize(&b3) + } + f2 => match (f2, normalize::(a)) { + // fold/build fusion for `List` + (App(box Builtin(ListBuild), _), App(box App(box Builtin(ListFold), _), box e2)) | + (App(box Builtin(ListFold), _), App(box App(box Builtin(ListBuild), _), box e2)) | + + // fold/build fusion for `Natural` + (Builtin(NaturalBuild), App(box Builtin(NaturalFold), box e2)) | + (Builtin(NaturalFold), App(box Builtin(NaturalBuild), box e2)) => normalize(&e2), + + /* + App (App (App (App NaturalFold (NaturalLit n0)) _) succ') zero -> + normalize (go n0) + where + go !0 = zero + go !n = App succ' (go (n - 1)) + App NaturalBuild k + | check -> NaturalLit n + | otherwise -> App f' a' + where + labeled = + normalize (App (App (App k Natural) "Succ") "Zero") + + n = go 0 labeled + where + go !m (App (Var "Succ") e') = go (m + 1) e' + go !m (Var "Zero") = m + go !_ _ = internalError text + check = go labeled + where + go (App (Var "Succ") e') = go e' + go (Var "Zero") = True + go _ = False + */ + (Builtin(NaturalIsZero), NaturalLit(n)) => BoolLit(n == 0), + (Builtin(NaturalEven), NaturalLit(n)) => BoolLit(n % 2 == 0), + (Builtin(NaturalOdd), NaturalLit(n)) => BoolLit(n % 2 != 0), + (Builtin(NaturalToInteger), NaturalLit(n)) => IntegerLit(n as isize), + (Builtin(NaturalShow), NaturalLit(n)) => TextLit(n.to_string()), + (App(f@box Builtin(ListBuild), box t), k) => { + let labeled = + normalize::<_, T, _>(&app(app(app(k.clone(), app( + Builtin(self::Builtin::List), t.clone())), "Cons"), "Nil")); + + fn list_to_vector<'i, S, A>(v: &mut Vec>, e: Expr<'i, S, A>) + where S: Clone, A: Clone + { + match e { + App(box App(box Var(V("Cons", _)), box x), box e2) => { + v.push(x); + list_to_vector(v, e2) + } + Var(V("Nil", _)) => {} + _ => panic!("internalError list_to_vector"), + } + } + fn check(e: &Expr) -> bool { + match *e { + App(box App(box Var(V("Cons", _)), _), ref e2) => check(e2), + Var(V("Nil", _)) => true, + _ => false, + } + } + + if check(&labeled) { + let mut v = vec![]; + list_to_vector(&mut v, labeled); + ListLit(Some(bx(t)), v) + } else { + app(App(f, bx(t)), k) + } + } + (App(box App(box App(box App(box Builtin(ListFold), _), box ListLit(_, xs)), _), cons), nil) => { + let e2: Expr<_, _> = xs.into_iter().rev().fold(nil, |y, ys| // foldr + App(bx(App(cons.clone(), bx(y))), bx(ys)) + ); + normalize(&e2) + } + (App(f, x_), ListLit(t, ys)) => match *f { + Builtin(ListLength) => + NaturalLit(ys.len()), + Builtin(ListHead) => + normalize(&OptionalLit(t, ys.into_iter().take(1).collect())), + Builtin(ListLast) => + normalize(&OptionalLit(t, ys.into_iter().last().into_iter().collect())), + Builtin(ListReverse) => { + let mut xs = ys; + xs.reverse(); + normalize(&ListLit(t, xs)) + } + _ => app(App(f, x_), ListLit(t, ys)), + }, + /* + App (App ListIndexed _) (ListLit t xs) -> + normalize (ListLit t' (fmap adapt (Data.Vector.indexed xs))) + where + t' = Record (Data.Map.fromList kts) + where + kts = [ ("index", Natural) + , ("value", t) + ] + adapt (n, x) = RecordLit (Data.Map.fromList kvs) + where + kvs = [ ("index", NaturalLit (fromIntegral n)) + , ("value", x) + ] + */ + (App(box App(box App(box App(box Builtin(OptionalFold), _), box OptionalLit(_, xs)), _), just), nothing) => { + let e2: Expr<_, _> = xs.into_iter().fold(nothing, |y, _| + App(just.clone(), bx(y)) + ); + normalize(&e2) + } + (App(box Builtin(OptionalBuild), _), App(box App(box Builtin(OptionalFold), _), b)) => { + normalize(&b) + } + (App(box Builtin(OptionalBuild), a0), g) => { + let e2: Expr<_, _> = app(app(app(g, + App(bx(Builtin(Optional)), a0.clone())), + Lam("x", a0.clone(), + bx(OptionalLit(Some(a0.clone()), vec![Var(V("x", 0))])))), + OptionalLit(Some(a0), vec![])); + normalize(&e2) + } + (f2, a2) => app(f2, a2), + }, + }, + Let(f, _, ref r, ref b) => { + let r2 = shift::<_, S, _>(1, V(f, 0), r); + let b2 = subst(V(f, 0), &r2, b); + let b3 = shift::<_, T, _>(-1, V(f, 0), &b2); + normalize(&b3) + } + Annot(ref x, _) => normalize(x), + Builtin(v) => Builtin(v), + BoolLit(b) => BoolLit(b), + BinOp(BoolAnd, ref x, ref y) => with_binop( + BoolAnd, + Expr::bool_lit, + |xn, yn| BoolLit(xn && yn), + normalize(x), + normalize(y), + ), + BinOp(BoolOr, ref x, ref y) => with_binop( + BoolOr, + Expr::bool_lit, + |xn, yn| BoolLit(xn || yn), + normalize(x), + normalize(y), + ), + BinOp(BoolEQ, ref x, ref y) => with_binop( + BoolEQ, + Expr::bool_lit, + |xn, yn| BoolLit(xn == yn), + normalize(x), + normalize(y), + ), + BinOp(BoolNE, ref x, ref y) => with_binop( + BoolNE, + Expr::bool_lit, + |xn, yn| BoolLit(xn != yn), + normalize(x), + normalize(y), + ), + BoolIf(ref b, ref t, ref f) => match normalize(b) { + BoolLit(true) => normalize(t), + BoolLit(false) => normalize(f), + b2 => BoolIf(bx(b2), bx(normalize(t)), bx(normalize(f))), + }, + NaturalLit(n) => NaturalLit(n), + BinOp(NaturalPlus, ref x, ref y) => with_binop( + NaturalPlus, + Expr::natural_lit, + |xn, yn| NaturalLit(xn + yn), + normalize(x), + normalize(y), + ), + BinOp(NaturalTimes, ref x, ref y) => with_binop( + NaturalTimes, + Expr::natural_lit, + |xn, yn| NaturalLit(xn * yn), + normalize(x), + normalize(y), + ), + IntegerLit(n) => IntegerLit(n), + DoubleLit(n) => DoubleLit(n), + TextLit(ref t) => TextLit(t.clone()), + BinOp(TextAppend, ref x, ref y) => with_binop( + TextAppend, + Expr::text_lit, + |xt, yt| TextLit(xt + &yt), + normalize(x), + normalize(y), + ), + ListLit(ref t, ref es) => { + let t2 = t.as_ref().map(|x| x.as_ref()).map(normalize).map(bx); + let es2 = es.iter().map(normalize).collect(); + ListLit(t2, es2) + } + OptionalLit(ref t, ref es) => { + let t2 = t.as_ref().map(|x| x.as_ref()).map(normalize).map(bx); + let es2 = es.iter().map(normalize).collect(); + OptionalLit(t2, es2) + } + Record(ref kts) => Record(map_record_value(kts, normalize)), + RecordLit(ref kvs) => RecordLit(map_record_value(kvs, normalize)), + Union(ref kts) => Union(map_record_value(kts, normalize)), + UnionLit(k, ref v, ref kvs) => { + UnionLit(k, bx(normalize(v)), map_record_value(kvs, normalize)) + } + Merge(ref _x, ref _y, ref _t) => unimplemented!(), + Field(ref r, x) => match normalize(r) { + RecordLit(kvs) => match kvs.get(x) { + Some(r2) => normalize(r2), + None => { + Field(bx(RecordLit(map_record_value(&kvs, normalize))), x) + } + }, + r2 => Field(bx(r2), x), + }, + Note(_, ref e) => normalize(e), + Embed(ref a) => Embed(a.clone()), + _ => unimplemented!(), + } +} + +fn with_binop<'a, S, A, U, Get, Set>( + op: BinOp, + get: Get, + set: Set, + x: Expr<'a, S, A>, + y: Expr<'a, S, A>, +) -> Expr<'a, S, A> +where + Get: Fn(&Expr<'a, S, A>) -> Option, + Set: FnOnce(U, U) -> Expr<'a, S, A>, +{ + if let (Some(xv), Some(yv)) = (get(&x), get(&y)) { + set(xv, yv) + } else { + Expr::BinOp(op, bx(x), bx(y)) + } +} diff --git a/dhall_core/src/grammar.lalrpop b/dhall_core/src/grammar.lalrpop new file mode 100644 index 0000000..1ffe2ff --- /dev/null +++ b/dhall_core/src/grammar.lalrpop @@ -0,0 +1,164 @@ +use std::collections::BTreeMap; +use std::iter; +use std::iter::FromIterator; + +use crate::core; +use crate::core::bx; +use crate::core::Expr::*; +use crate::core::Builtin; +use crate::core::Builtin::*; +use crate::core::BinOp::*; +use crate::grammar_util::*; +use crate::lexer::*; + +grammar<'input>; + +extern { + type Location = usize; + type Error = LexicalError; + + enum Tok<'input> { + Pi => Tok::Pi, + Lambda => Tok::Lambda, + Combine => Tok::Combine, + "->" => Tok::Arrow, + + Int => Tok::Integer(), + Nat => Tok::Natural(), + Text => Tok::Text(), + Bool => Tok::Bool(), + Label => Tok::Identifier(<&'input str>), + Const => Tok::Const(), + Let => Tok::Keyword(Keyword::Let), + In => Tok::Keyword(Keyword::In), + If => Tok::Keyword(Keyword::If), + Then => Tok::Keyword(Keyword::Then), + Else => Tok::Keyword(Keyword::Else), + List => Tok::ListLike(ListLike::List), + Optional => Tok::ListLike(ListLike::Optional), + Builtin => Tok::Builtin(), + + "{" => Tok::BraceL, + "}" => Tok::BraceR, + "[" => Tok::BracketL, + "]" => Tok::BracketR, + "(" => Tok::ParenL, + ")" => Tok::ParenR, + "&&" => Tok::BoolAnd, + "||" => Tok::BoolOr, + "==" => Tok::CompareEQ, + "!=" => Tok::CompareNE, + "++" => Tok::Append, + "*" => Tok::Times, + "+" => Tok::Plus, + "," => Tok::Comma, + "." => Tok::Dot, + ":" => Tok::Ascription, + "=" => Tok::Equals, + } +} + +pub Expr: BoxExpr<'input> = { // exprA + ExprB, +}; + +ExprB: BoxExpr<'input> = { + Lambda "("