diff options
Diffstat (limited to 'dhall/src/syntax')
-rw-r--r-- | dhall/src/syntax/binary/decode.rs | 437 | ||||
-rw-r--r-- | dhall/src/syntax/binary/encode.rs | 319 | ||||
-rw-r--r-- | dhall/src/syntax/binary/mod.rs | 4 | ||||
-rw-r--r-- | dhall/src/syntax/core/context.rs | 80 | ||||
-rw-r--r-- | dhall/src/syntax/core/expr.rs | 377 | ||||
-rw-r--r-- | dhall/src/syntax/core/import.rs | 130 | ||||
-rw-r--r-- | dhall/src/syntax/core/label.rs | 34 | ||||
-rw-r--r-- | dhall/src/syntax/core/map.rs | 394 | ||||
-rw-r--r-- | dhall/src/syntax/core/mod.rs | 13 | ||||
-rw-r--r-- | dhall/src/syntax/core/span.rs | 81 | ||||
-rw-r--r-- | dhall/src/syntax/core/text.rs | 181 | ||||
-rw-r--r-- | dhall/src/syntax/core/visitor.rs | 360 | ||||
-rw-r--r-- | dhall/src/syntax/mod.rs | 15 | ||||
-rw-r--r-- | dhall/src/syntax/text/mod.rs | 2 | ||||
-rw-r--r-- | dhall/src/syntax/text/parser.rs | 920 | ||||
-rw-r--r-- | dhall/src/syntax/text/printer.rs | 500 |
16 files changed, 3847 insertions, 0 deletions
diff --git a/dhall/src/syntax/binary/decode.rs b/dhall/src/syntax/binary/decode.rs new file mode 100644 index 0000000..46c9921 --- /dev/null +++ b/dhall/src/syntax/binary/decode.rs @@ -0,0 +1,437 @@ +use itertools::Itertools; +use serde_cbor::value::value as cbor; +use std::iter::FromIterator; + +use crate::semantics::error::DecodeError; +use crate::semantics::phase::DecodedExpr; +use crate::syntax; +use crate::syntax::{ + Expr, ExprF, FilePath, FilePrefix, Hash, ImportLocation, ImportMode, + Integer, InterpolatedText, Label, Natural, RawExpr, Scheme, Span, URL, V, +}; + +pub(crate) fn decode(data: &[u8]) -> Result<DecodedExpr, DecodeError> { + match serde_cbor::de::from_slice(data) { + Ok(v) => cbor_value_to_dhall(&v), + Err(e) => Err(DecodeError::CBORError(e)), + } +} + +// Should probably rename this +fn rc<E>(x: RawExpr<E>) -> Expr<E> { + Expr::new(x, Span::Decoded) +} + +fn cbor_value_to_dhall(data: &cbor::Value) -> Result<DecodedExpr, DecodeError> { + use cbor::Value::*; + use syntax::{BinOp, Builtin, Const}; + use ExprF::*; + Ok(rc(match data { + String(s) => match Builtin::parse(s) { + Some(b) => ExprF::Builtin(b), + None => match s.as_str() { + "True" => BoolLit(true), + "False" => BoolLit(false), + "Type" => Const(Const::Type), + "Kind" => Const(Const::Kind), + "Sort" => Const(Const::Sort), + _ => Err(DecodeError::WrongFormatError("builtin".to_owned()))?, + }, + }, + U64(n) => Var(V(Label::from("_"), *n as usize)), + F64(x) => DoubleLit((*x).into()), + Bool(b) => BoolLit(*b), + Array(vec) => match vec.as_slice() { + [String(l), U64(n)] => { + if l.as_str() == "_" { + Err(DecodeError::WrongFormatError( + "`_` variable was encoded incorrectly".to_owned(), + ))? + } + let l = Label::from(l.as_str()); + Var(V(l, *n as usize)) + } + [U64(0), f, args @ ..] => { + if args.is_empty() { + Err(DecodeError::WrongFormatError( + "Function application must have at least one argument" + .to_owned(), + ))? + } + let mut f = cbor_value_to_dhall(&f)?; + for a in args { + let a = cbor_value_to_dhall(&a)?; + f = rc(App(f, a)) + } + return Ok(f); + } + [U64(1), x, y] => { + let x = cbor_value_to_dhall(&x)?; + let y = cbor_value_to_dhall(&y)?; + Lam(Label::from("_"), x, y) + } + [U64(1), String(l), x, y] => { + if l.as_str() == "_" { + Err(DecodeError::WrongFormatError( + "`_` variable was encoded incorrectly".to_owned(), + ))? + } + let x = cbor_value_to_dhall(&x)?; + let y = cbor_value_to_dhall(&y)?; + let l = Label::from(l.as_str()); + Lam(l, x, y) + } + [U64(2), x, y] => { + let x = cbor_value_to_dhall(&x)?; + let y = cbor_value_to_dhall(&y)?; + Pi(Label::from("_"), x, y) + } + [U64(2), String(l), x, y] => { + if l.as_str() == "_" { + Err(DecodeError::WrongFormatError( + "`_` variable was encoded incorrectly".to_owned(), + ))? + } + let x = cbor_value_to_dhall(&x)?; + let y = cbor_value_to_dhall(&y)?; + let l = Label::from(l.as_str()); + Pi(l, x, y) + } + [U64(3), U64(n), x, y] => { + let x = cbor_value_to_dhall(&x)?; + let y = cbor_value_to_dhall(&y)?; + use BinOp::*; + let op = match n { + 0 => BoolOr, + 1 => BoolAnd, + 2 => BoolEQ, + 3 => BoolNE, + 4 => NaturalPlus, + 5 => NaturalTimes, + 6 => TextAppend, + 7 => ListAppend, + 8 => RecursiveRecordMerge, + 9 => RightBiasedRecordMerge, + 10 => RecursiveRecordTypeMerge, + 11 => ImportAlt, + 12 => Equivalence, + _ => { + Err(DecodeError::WrongFormatError("binop".to_owned()))? + } + }; + BinOp(op, x, y) + } + [U64(4), t] => { + let t = cbor_value_to_dhall(&t)?; + EmptyListLit(rc(App(rc(ExprF::Builtin(Builtin::List)), t))) + } + [U64(4), Null, rest @ ..] => { + let rest = rest + .iter() + .map(cbor_value_to_dhall) + .collect::<Result<Vec<_>, _>>()?; + NEListLit(rest) + } + [U64(5), Null, x] => { + let x = cbor_value_to_dhall(&x)?; + SomeLit(x) + } + // Old-style optional literals + [U64(5), t] => { + let t = cbor_value_to_dhall(&t)?; + App(rc(ExprF::Builtin(Builtin::OptionalNone)), t) + } + [U64(5), t, x] => { + let x = cbor_value_to_dhall(&x)?; + let t = cbor_value_to_dhall(&t)?; + Annot( + rc(SomeLit(x)), + rc(App(rc(ExprF::Builtin(Builtin::Optional)), t)), + ) + } + [U64(6), x, y] => { + let x = cbor_value_to_dhall(&x)?; + let y = cbor_value_to_dhall(&y)?; + Merge(x, y, None) + } + [U64(6), x, y, z] => { + let x = cbor_value_to_dhall(&x)?; + let y = cbor_value_to_dhall(&y)?; + let z = cbor_value_to_dhall(&z)?; + Merge(x, y, Some(z)) + } + [U64(7), Object(map)] => { + let map = cbor_map_to_dhall_map(map)?; + RecordType(map) + } + [U64(8), Object(map)] => { + let map = cbor_map_to_dhall_map(map)?; + RecordLit(map) + } + [U64(9), x, String(l)] => { + let x = cbor_value_to_dhall(&x)?; + let l = Label::from(l.as_str()); + Field(x, l) + } + [U64(10), x, Array(arr)] => { + let x = cbor_value_to_dhall(&x)?; + if let [y] = arr.as_slice() { + let y = cbor_value_to_dhall(&y)?; + ProjectionByExpr(x, y) + } else { + Err(DecodeError::WrongFormatError( + "projection-by-expr".to_owned(), + ))? + } + } + [U64(10), x, rest @ ..] => { + let x = cbor_value_to_dhall(&x)?; + let labels = rest + .iter() + .map(|s| match s { + String(s) => Ok(Label::from(s.as_str())), + _ => Err(DecodeError::WrongFormatError( + "projection".to_owned(), + )), + }) + .collect::<Result<_, _>>()?; + Projection(x, labels) + } + [U64(11), Object(map)] => { + let map = cbor_map_to_dhall_opt_map(map)?; + UnionType(map) + } + [U64(12), ..] => Err(DecodeError::WrongFormatError( + "Union literals are not supported anymore".to_owned(), + ))?, + [U64(14), x, y, z] => { + let x = cbor_value_to_dhall(&x)?; + let y = cbor_value_to_dhall(&y)?; + let z = cbor_value_to_dhall(&z)?; + BoolIf(x, y, z) + } + [U64(15), U64(x)] => NaturalLit(*x as Natural), + [U64(16), U64(x)] => IntegerLit(*x as Integer), + [U64(16), I64(x)] => IntegerLit(*x as Integer), + [U64(18), String(first), rest @ ..] => { + TextLit(InterpolatedText::from(( + first.clone(), + rest.iter() + .tuples() + .map(|(x, y)| { + let x = cbor_value_to_dhall(&x)?; + let y = match y { + String(s) => s.clone(), + _ => Err(DecodeError::WrongFormatError( + "text".to_owned(), + ))?, + }; + Ok((x, y)) + }) + .collect::<Result<_, _>>()?, + ))) + } + [U64(19), t] => { + let t = cbor_value_to_dhall(&t)?; + Assert(t) + } + [U64(24), hash, U64(mode), U64(scheme), rest @ ..] => { + let mode = match mode { + 0 => ImportMode::Code, + 1 => ImportMode::RawText, + 2 => ImportMode::Location, + _ => Err(DecodeError::WrongFormatError(format!( + "import/mode/unknown_mode: {:?}", + mode + )))?, + }; + let hash = match hash { + Null => None, + Bytes(bytes) => match bytes.as_slice() { + [18, 32, rest @ ..] => { + Some(Hash::SHA256(rest.to_vec())) + } + _ => Err(DecodeError::WrongFormatError(format!( + "import/hash/unknown_multihash: {:?}", + bytes + )))?, + }, + _ => Err(DecodeError::WrongFormatError( + "import/hash/should_be_bytes".to_owned(), + ))?, + }; + let mut rest = rest.iter(); + let location = match scheme { + 0 | 1 => { + let scheme = match scheme { + 0 => Scheme::HTTP, + _ => Scheme::HTTPS, + }; + let headers = match rest.next() { + Some(Null) => None, + Some(x) => { + let x = cbor_value_to_dhall(&x)?; + Some(x) + } + _ => Err(DecodeError::WrongFormatError( + "import/remote/headers".to_owned(), + ))?, + }; + let authority = match rest.next() { + Some(String(s)) => s.to_owned(), + _ => Err(DecodeError::WrongFormatError( + "import/remote/authority".to_owned(), + ))?, + }; + let query = match rest.next_back() { + Some(Null) => None, + Some(String(s)) => Some(s.to_owned()), + _ => Err(DecodeError::WrongFormatError( + "import/remote/query".to_owned(), + ))?, + }; + let file_path = rest + .map(|s| match s.as_string() { + Some(s) => Ok(s.clone()), + None => Err(DecodeError::WrongFormatError( + "import/remote/path".to_owned(), + )), + }) + .collect::<Result<_, _>>()?; + let path = FilePath { file_path }; + ImportLocation::Remote(URL { + scheme, + authority, + path, + query, + headers, + }) + } + 2 | 3 | 4 | 5 => { + let prefix = match scheme { + 2 => FilePrefix::Absolute, + 3 => FilePrefix::Here, + 4 => FilePrefix::Parent, + 5 => FilePrefix::Home, + _ => Err(DecodeError::WrongFormatError( + "import/local/prefix".to_owned(), + ))?, + }; + let file_path = rest + .map(|s| match s.as_string() { + Some(s) => Ok(s.clone()), + None => Err(DecodeError::WrongFormatError( + "import/local/path".to_owned(), + )), + }) + .collect::<Result<_, _>>()?; + let path = FilePath { file_path }; + ImportLocation::Local(prefix, path) + } + 6 => { + let env = match rest.next() { + Some(String(s)) => s.to_owned(), + _ => Err(DecodeError::WrongFormatError( + "import/env".to_owned(), + ))?, + }; + ImportLocation::Env(env) + } + 7 => ImportLocation::Missing, + _ => Err(DecodeError::WrongFormatError( + "import/type".to_owned(), + ))?, + }; + Import(syntax::Import { + mode, + hash, + location, + }) + } + [U64(25), bindings @ ..] => { + let mut tuples = bindings.iter().tuples(); + let bindings = (&mut tuples) + .map(|(x, t, v)| { + let x = x.as_string().ok_or_else(|| { + DecodeError::WrongFormatError( + "let/label".to_owned(), + ) + })?; + let x = Label::from(x.as_str()); + let t = match t { + Null => None, + t => Some(cbor_value_to_dhall(&t)?), + }; + let v = cbor_value_to_dhall(&v)?; + Ok((x, t, v)) + }) + .collect::<Result<Vec<_>, _>>()?; + let expr = tuples.into_buffer().next().ok_or_else(|| { + DecodeError::WrongFormatError("let/expr".to_owned()) + })?; + let expr = cbor_value_to_dhall(expr)?; + return Ok(bindings + .into_iter() + .rev() + .fold(expr, |acc, (x, t, v)| rc(Let(x, t, v, acc)))); + } + [U64(26), x, y] => { + let x = cbor_value_to_dhall(&x)?; + let y = cbor_value_to_dhall(&y)?; + Annot(x, y) + } + [U64(27), x] => { + let x = cbor_value_to_dhall(&x)?; + ToMap(x, None) + } + [U64(27), x, y] => { + let x = cbor_value_to_dhall(&x)?; + let y = cbor_value_to_dhall(&y)?; + ToMap(x, Some(y)) + } + [U64(28), x] => { + let x = cbor_value_to_dhall(&x)?; + EmptyListLit(x) + } + _ => Err(DecodeError::WrongFormatError(format!("{:?}", data)))?, + }, + _ => Err(DecodeError::WrongFormatError(format!("{:?}", data)))?, + })) +} + +fn cbor_map_to_dhall_map<'a, T>( + map: impl IntoIterator<Item = (&'a cbor::ObjectKey, &'a cbor::Value)>, +) -> Result<T, DecodeError> +where + T: FromIterator<(Label, DecodedExpr)>, +{ + map.into_iter() + .map(|(k, v)| -> Result<(_, _), _> { + let k = k.as_string().ok_or_else(|| { + DecodeError::WrongFormatError("map/key".to_owned()) + })?; + let v = cbor_value_to_dhall(v)?; + Ok((Label::from(k.as_ref()), v)) + }) + .collect::<Result<_, _>>() +} + +fn cbor_map_to_dhall_opt_map<'a, T>( + map: impl IntoIterator<Item = (&'a cbor::ObjectKey, &'a cbor::Value)>, +) -> Result<T, DecodeError> +where + T: FromIterator<(Label, Option<DecodedExpr>)>, +{ + map.into_iter() + .map(|(k, v)| -> Result<(_, _), _> { + let k = k.as_string().ok_or_else(|| { + DecodeError::WrongFormatError("map/key".to_owned()) + })?; + let v = match v { + cbor::Value::Null => None, + _ => Some(cbor_value_to_dhall(v)?), + }; + Ok((Label::from(k.as_ref()), v)) + }) + .collect::<Result<_, _>>() +} diff --git a/dhall/src/syntax/binary/encode.rs b/dhall/src/syntax/binary/encode.rs new file mode 100644 index 0000000..8e13efd --- /dev/null +++ b/dhall/src/syntax/binary/encode.rs @@ -0,0 +1,319 @@ +use serde_cbor::value::value as cbor; +use std::vec; + +use crate::semantics::error::EncodeError; +use crate::syntax; +use crate::syntax::map::DupTreeMap; +use crate::syntax::{ + Expr, ExprF, FilePrefix, Hash, Import, ImportLocation, ImportMode, Label, + Scheme, V, +}; + +/// Warning: will fail if `expr` contains an `Embed` node. +pub(crate) fn encode<E>(expr: &Expr<E>) -> Result<Vec<u8>, EncodeError> { + serde_cbor::ser::to_vec(&Serialize::Expr(expr)) + .map_err(|e| EncodeError::CBORError(e)) +} + +enum Serialize<'a, E> { + Expr(&'a Expr<E>), + CBOR(cbor::Value), + RecordMap(&'a DupTreeMap<Label, Expr<E>>), + UnionMap(&'a DupTreeMap<Label, Option<Expr<E>>>), +} + +macro_rules! count { + (@replace_with $_t:tt $sub:expr) => { $sub }; + ($($tts:tt)*) => {0usize $(+ count!(@replace_with $tts 1usize))*}; +} + +macro_rules! ser_seq { + ($ser:expr; $($elt:expr),* $(,)?) => {{ + use serde::ser::SerializeSeq; + let count = count!($($elt)*); + let mut ser_seq = $ser.serialize_seq(Some(count))?; + $( + ser_seq.serialize_element(&$elt)?; + )* + ser_seq.end() + }}; +} + +fn serialize_subexpr<S, E>(ser: S, e: &Expr<E>) -> Result<S::Ok, S::Error> +where + S: serde::ser::Serializer, +{ + use cbor::Value::{String, I64, U64}; + use std::iter::once; + use syntax::Builtin; + use syntax::ExprF::*; + + use self::Serialize::{RecordMap, UnionMap}; + fn expr<E>(x: &Expr<E>) -> self::Serialize<'_, E> { + self::Serialize::Expr(x) + } + let cbor = + |v: cbor::Value| -> self::Serialize<'_, E> { self::Serialize::CBOR(v) }; + let tag = |x: u64| cbor(U64(x)); + let null = || cbor(cbor::Value::Null); + let label = |l: &Label| cbor(cbor::Value::String(l.into())); + + match e.as_ref() { + Const(c) => ser.serialize_str(&c.to_string()), + Builtin(b) => ser.serialize_str(&b.to_string()), + BoolLit(b) => ser.serialize_bool(*b), + NaturalLit(n) => ser_seq!(ser; tag(15), U64(*n as u64)), + IntegerLit(n) => ser_seq!(ser; tag(16), I64(*n as i64)), + DoubleLit(n) => { + let n: f64 = (*n).into(); + ser.serialize_f64(n) + } + BoolIf(x, y, z) => ser_seq!(ser; tag(14), expr(x), expr(y), expr(z)), + Var(V(l, n)) if l == &"_".into() => ser.serialize_u64(*n as u64), + Var(V(l, n)) => ser_seq!(ser; label(l), U64(*n as u64)), + Lam(l, x, y) if l == &"_".into() => { + ser_seq!(ser; tag(1), expr(x), expr(y)) + } + Lam(l, x, y) => ser_seq!(ser; tag(1), label(l), expr(x), expr(y)), + Pi(l, x, y) if l == &"_".into() => { + ser_seq!(ser; tag(2), expr(x), expr(y)) + } + Pi(l, x, y) => ser_seq!(ser; tag(2), label(l), expr(x), expr(y)), + Let(_, _, _, _) => { + let (bound_e, bindings) = collect_nested_lets(e); + let count = 1 + 3 * bindings.len() + 1; + + use serde::ser::SerializeSeq; + let mut ser_seq = ser.serialize_seq(Some(count))?; + ser_seq.serialize_element(&tag(25))?; + for (l, t, v) in bindings { + ser_seq.serialize_element(&label(l))?; + match t { + Some(t) => ser_seq.serialize_element(&expr(t))?, + None => ser_seq.serialize_element(&null())?, + } + ser_seq.serialize_element(&expr(v))?; + } + ser_seq.serialize_element(&expr(bound_e))?; + ser_seq.end() + } + App(_, _) => { + let (f, args) = collect_nested_applications(e); + ser.collect_seq( + once(tag(0)) + .chain(once(expr(f))) + .chain(args.into_iter().rev().map(expr)), + ) + } + Annot(x, y) => ser_seq!(ser; tag(26), expr(x), expr(y)), + Assert(x) => ser_seq!(ser; tag(19), expr(x)), + SomeLit(x) => ser_seq!(ser; tag(5), null(), expr(x)), + EmptyListLit(x) => match x.as_ref() { + App(f, a) => match f.as_ref() { + ExprF::Builtin(Builtin::List) => ser_seq!(ser; tag(4), expr(a)), + _ => ser_seq!(ser; tag(28), expr(x)), + }, + _ => ser_seq!(ser; tag(28), expr(x)), + }, + NEListLit(xs) => ser.collect_seq( + once(tag(4)).chain(once(null())).chain(xs.iter().map(expr)), + ), + TextLit(xs) => { + use syntax::InterpolatedTextContents::{Expr, Text}; + ser.collect_seq(once(tag(18)).chain(xs.iter().map(|x| match x { + Expr(x) => expr(x), + Text(x) => cbor(String(x.clone())), + }))) + } + RecordType(map) => ser_seq!(ser; tag(7), RecordMap(map)), + RecordLit(map) => ser_seq!(ser; tag(8), RecordMap(map)), + UnionType(map) => ser_seq!(ser; tag(11), UnionMap(map)), + Field(x, l) => ser_seq!(ser; tag(9), expr(x), label(l)), + BinOp(op, x, y) => { + use syntax::BinOp::*; + let op = match op { + BoolOr => 0, + BoolAnd => 1, + BoolEQ => 2, + BoolNE => 3, + NaturalPlus => 4, + NaturalTimes => 5, + TextAppend => 6, + ListAppend => 7, + RecursiveRecordMerge => 8, + RightBiasedRecordMerge => 9, + RecursiveRecordTypeMerge => 10, + ImportAlt => 11, + Equivalence => 12, + }; + ser_seq!(ser; tag(3), U64(op), expr(x), expr(y)) + } + Merge(x, y, None) => ser_seq!(ser; tag(6), expr(x), expr(y)), + Merge(x, y, Some(z)) => { + ser_seq!(ser; tag(6), expr(x), expr(y), expr(z)) + } + ToMap(x, None) => ser_seq!(ser; tag(27), expr(x)), + ToMap(x, Some(y)) => ser_seq!(ser; tag(27), expr(x), expr(y)), + Projection(x, ls) => ser.collect_seq( + once(tag(10)) + .chain(once(expr(x))) + .chain(ls.iter().map(label)), + ), + ProjectionByExpr(x, y) => { + ser_seq!(ser; tag(10), expr(x), vec![expr(y)]) + } + Import(import) => serialize_import(ser, import), + Embed(_) => unimplemented!( + "An expression with resolved imports cannot be binary-encoded" + ), + } +} + +fn serialize_import<S, E>( + ser: S, + import: &Import<Expr<E>>, +) -> Result<S::Ok, S::Error> +where + S: serde::ser::Serializer, +{ + use cbor::Value::{Bytes, Null, U64}; + use serde::ser::SerializeSeq; + + let count = 4 + match &import.location { + ImportLocation::Remote(url) => 3 + url.path.file_path.len(), + ImportLocation::Local(_, path) => path.file_path.len(), + ImportLocation::Env(_) => 1, + ImportLocation::Missing => 0, + }; + let mut ser_seq = ser.serialize_seq(Some(count))?; + + ser_seq.serialize_element(&U64(24))?; + + let hash = match &import.hash { + None => Null, + Some(Hash::SHA256(h)) => { + let mut bytes = vec![18, 32]; + bytes.extend_from_slice(h); + Bytes(bytes) + } + }; + ser_seq.serialize_element(&hash)?; + + let mode = match import.mode { + ImportMode::Code => 0, + ImportMode::RawText => 1, + ImportMode::Location => 2, + }; + ser_seq.serialize_element(&U64(mode))?; + + let scheme = match &import.location { + ImportLocation::Remote(url) => match url.scheme { + Scheme::HTTP => 0, + Scheme::HTTPS => 1, + }, + ImportLocation::Local(prefix, _) => match prefix { + FilePrefix::Absolute => 2, + FilePrefix::Here => 3, + FilePrefix::Parent => 4, + FilePrefix::Home => 5, + }, + ImportLocation::Env(_) => 6, + ImportLocation::Missing => 7, + }; + ser_seq.serialize_element(&U64(scheme))?; + + match &import.location { + ImportLocation::Remote(url) => { + match &url.headers { + None => ser_seq.serialize_element(&Null)?, + Some(e) => { + ser_seq.serialize_element(&self::Serialize::Expr(e))? + } + }; + ser_seq.serialize_element(&url.authority)?; + for p in url.path.file_path.iter() { + ser_seq.serialize_element(&p)?; + } + match &url.query { + None => ser_seq.serialize_element(&Null)?, + Some(x) => ser_seq.serialize_element(x)?, + }; + } + ImportLocation::Local(_, path) => { + for p in path.file_path.iter() { + ser_seq.serialize_element(&p)?; + } + } + ImportLocation::Env(env) => { + ser_seq.serialize_element(env)?; + } + ImportLocation::Missing => {} + } + + ser_seq.end() +} + +impl<'a, E> serde::ser::Serialize for Serialize<'a, E> { + fn serialize<S>(&self, ser: S) -> Result<S::Ok, S::Error> + where + S: serde::ser::Serializer, + { + match self { + Serialize::Expr(e) => serialize_subexpr(ser, e), + Serialize::CBOR(v) => v.serialize(ser), + Serialize::RecordMap(map) => { + ser.collect_map(map.iter().map(|(k, v)| { + (cbor::Value::String(k.into()), Serialize::Expr(v)) + })) + } + Serialize::UnionMap(map) => { + ser.collect_map(map.iter().map(|(k, v)| { + let v = match v { + Some(x) => Serialize::Expr(x), + None => Serialize::CBOR(cbor::Value::Null), + }; + (cbor::Value::String(k.into()), v) + })) + } + } + } +} + +fn collect_nested_applications<'a, E>( + e: &'a Expr<E>, +) -> (&'a Expr<E>, Vec<&'a Expr<E>>) { + fn go<'a, E>(e: &'a Expr<E>, vec: &mut Vec<&'a Expr<E>>) -> &'a Expr<E> { + match e.as_ref() { + ExprF::App(f, a) => { + vec.push(a); + go(f, vec) + } + _ => e, + } + } + let mut vec = vec![]; + let e = go(e, &mut vec); + (e, vec) +} + +type LetBinding<'a, E> = (&'a Label, &'a Option<Expr<E>>, &'a Expr<E>); + +fn collect_nested_lets<'a, E>( + e: &'a Expr<E>, +) -> (&'a Expr<E>, Vec<LetBinding<'a, E>>) { + fn go<'a, E>( + e: &'a Expr<E>, + vec: &mut Vec<LetBinding<'a, E>>, + ) -> &'a Expr<E> { + match e.as_ref() { + ExprF::Let(l, t, v, e) => { + vec.push((l, t, v)); + go(e, vec) + } + _ => e, + } + } + let mut vec = vec![]; + let e = go(e, &mut vec); + (e, vec) +} diff --git a/dhall/src/syntax/binary/mod.rs b/dhall/src/syntax/binary/mod.rs new file mode 100644 index 0000000..7ed1f6e --- /dev/null +++ b/dhall/src/syntax/binary/mod.rs @@ -0,0 +1,4 @@ +mod decode; +mod encode; +pub(crate) use decode::decode; +pub(crate) use encode::encode; diff --git a/dhall/src/syntax/core/context.rs b/dhall/src/syntax/core/context.rs new file mode 100644 index 0000000..6844baa --- /dev/null +++ b/dhall/src/syntax/core/context.rs @@ -0,0 +1,80 @@ +use std::cmp::Eq; +use std::collections::HashMap; +use std::hash::Hash; + +/// A `(Context a)` associates `Text` labels with values of type `a` +/// +/// The `Context` is used for type-checking when `(a = Expr)` +/// +/// * You create a `Context` using `empty` and `insert` +/// * You transform a `Context` using `fmap` +/// * You consume a `Context` using `lookup` and `toList` +/// +/// The difference between a `Context` and a `Map` is that a `Context` lets you +/// have multiple ordered occurrences of the same key and you can query for the +/// `n`th occurrence of a given key. +/// +#[derive(Debug, Clone)] +pub struct Context<K: Eq + Hash, T>(HashMap<K, Vec<T>>); + +impl<K: Hash + Eq + Clone, T> Context<K, T> { + /// An empty context with no key-value pairs + pub fn new() -> Self { + Context(HashMap::new()) + } + + /// Look up a key by name and index + /// + /// ```c + /// lookup _ _ empty = Nothing + /// lookup k 0 (insert k v c) = Just v + /// lookup k n (insert k v c) = lookup k (n - 1) c -- 1 <= n + /// lookup k n (insert j v c) = lookup k n c -- k /= j + /// ``` + pub fn lookup<'a>(&'a self, k: &K, n: usize) -> Option<&'a T> { + self.0.get(k).and_then(|v| { + if n < v.len() { + v.get(v.len() - 1 - n) + } else { + None + } + }) + } + + pub fn map<U, F: Fn(&K, &T) -> U>(&self, f: F) -> Context<K, U> { + Context( + self.0 + .iter() + .map(|(k, vs)| { + ((*k).clone(), vs.iter().map(|v| f(k, v)).collect()) + }) + .collect(), + ) + } + + pub fn lookup_all<'a>(&'a self, k: &K) -> impl Iterator<Item = &T> { + self.0.get(k).into_iter().flat_map(|v| v.iter()) + } + + pub fn iter(&self) -> impl Iterator<Item = (&K, &T)> { + self.0 + .iter() + .flat_map(|(k, vs)| vs.iter().map(move |v| (k, v))) + } + + pub fn iter_keys(&self) -> impl Iterator<Item = (&K, &Vec<T>)> { + self.0.iter() + } +} + +impl<K: Hash + Eq + Clone, T: Clone> Context<K, T> { + /// Add a key-value pair to the `Context` + pub fn insert(&self, k: K, v: T) -> Self { + let mut ctx = (*self).clone(); + { + let m = ctx.0.entry(k).or_insert_with(Vec::new); + m.push(v); + } + ctx + } +} diff --git a/dhall/src/syntax/core/expr.rs b/dhall/src/syntax/core/expr.rs new file mode 100644 index 0000000..5b9f401 --- /dev/null +++ b/dhall/src/syntax/core/expr.rs @@ -0,0 +1,377 @@ +use crate::syntax::map::{DupTreeMap, DupTreeSet}; +use crate::syntax::visitor::{self, ExprFMutVisitor, ExprFVisitor}; +use crate::syntax::*; + +pub type Integer = isize; +pub type Natural = usize; +pub type Double = NaiveDouble; + +pub fn trivial_result<T>(x: Result<T, !>) -> T { + match x { + Ok(x) => x, + Err(e) => e, + } +} + +/// Double with bitwise equality +#[derive(Debug, Copy, Clone)] +pub struct NaiveDouble(f64); + +impl PartialEq for NaiveDouble { + fn eq(&self, other: &Self) -> bool { + self.0.to_bits() == other.0.to_bits() + } +} + +impl Eq for NaiveDouble {} + +impl std::hash::Hash for NaiveDouble { + fn hash<H>(&self, state: &mut H) + where + H: std::hash::Hasher, + { + self.0.to_bits().hash(state) + } +} + +impl From<f64> for NaiveDouble { + fn from(x: f64) -> Self { + NaiveDouble(x) + } +} + +impl From<NaiveDouble> for f64 { + fn from(x: NaiveDouble) -> f64 { + x.0 + } +} + +/// Constants for a pure type system +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum Const { + Type, + Kind, + Sort, +} + +/// Bound variable +/// +/// The `Label` field is the variable's name (i.e. \"`x`\"). +/// The `Int` field is a DeBruijn index. +/// See dhall-lang/standard/semantics.md for details +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct V<Label>(pub Label, pub usize); + +// This is only for the specific `Label` type, not generic +impl From<Label> for V<Label> { + fn from(x: Label) -> V<Label> { + V(x, 0) + } +} +impl<'a> From<&'a Label> for V<Label> { + fn from(x: &'a Label) -> V<Label> { + V(x.clone(), 0) + } +} + +// Definition order must match precedence order for +// pretty-printing to work correctly +#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub enum BinOp { + /// `x ? y` + ImportAlt, + /// `x || y` + BoolOr, + /// `x + y` + NaturalPlus, + /// `x ++ y` + TextAppend, + /// `x # y` + ListAppend, + /// `x && y` + BoolAnd, + /// `x ∧ y` + RecursiveRecordMerge, + /// `x ⫽ y` + RightBiasedRecordMerge, + /// `x ⩓ y` + RecursiveRecordTypeMerge, + /// `x * y` + NaturalTimes, + /// `x == y` + BoolEQ, + /// `x != y` + BoolNE, + /// x === y + Equivalence, +} + +/// Built-ins +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum Builtin { + Bool, + Natural, + Integer, + Double, + Text, + List, + Optional, + OptionalNone, + NaturalBuild, + NaturalFold, + NaturalIsZero, + NaturalEven, + NaturalOdd, + NaturalToInteger, + NaturalShow, + NaturalSubtract, + IntegerToDouble, + IntegerShow, + DoubleShow, + ListBuild, + ListFold, + ListLength, + ListHead, + ListLast, + ListIndexed, + ListReverse, + OptionalFold, + OptionalBuild, + TextShow, +} + +// Each node carries an annotation. +#[derive(Debug, Clone)] +pub struct Expr<Embed>(Box<(RawExpr<Embed>, Span)>); + +pub type RawExpr<Embed> = ExprF<Expr<Embed>, Embed>; + +impl<Embed: PartialEq> std::cmp::PartialEq for Expr<Embed> { + fn eq(&self, other: &Self) -> bool { + self.0.as_ref().0 == other.0.as_ref().0 + } +} + +impl<Embed: Eq> std::cmp::Eq for Expr<Embed> {} + +impl<Embed: std::hash::Hash> std::hash::Hash for Expr<Embed> { + fn hash<H>(&self, state: &mut H) + where + H: std::hash::Hasher, + { + (self.0).0.hash(state) + } +} + +/// Syntax tree for expressions +// Having the recursion out of the enum definition enables writing +// much more generic code and improves pattern-matching behind +// smart pointers. +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ExprF<SubExpr, Embed> { + Const(Const), + /// `x` + /// `x@n` + Var(V<Label>), + /// `λ(x : A) -> b` + Lam(Label, SubExpr, SubExpr), + /// `A -> B` + /// `∀(x : A) -> B` + Pi(Label, SubExpr, SubExpr), + /// `f a` + App(SubExpr, SubExpr), + /// `let x = r in e` + /// `let x : t = r in e` + Let(Label, Option<SubExpr>, SubExpr, SubExpr), + /// `x : t` + Annot(SubExpr, SubExpr), + /// `assert : t` + Assert(SubExpr), + /// Built-in values + Builtin(Builtin), + // Binary operations + BinOp(BinOp, SubExpr, SubExpr), + /// `True` + BoolLit(bool), + /// `if x then y else z` + BoolIf(SubExpr, SubExpr, SubExpr), + /// `1` + NaturalLit(Natural), + /// `+2` + IntegerLit(Integer), + /// `3.24` + DoubleLit(Double), + /// `"Some ${interpolated} text"` + TextLit(InterpolatedText<SubExpr>), + /// `[] : t` + EmptyListLit(SubExpr), + /// `[x, y, z]` + NEListLit(Vec<SubExpr>), + /// `Some e` + SomeLit(SubExpr), + /// `{ k1 : t1, k2 : t1 }` + RecordType(DupTreeMap<Label, SubExpr>), + /// `{ k1 = v1, k2 = v2 }` + RecordLit(DupTreeMap<Label, SubExpr>), + /// `< k1 : t1, k2 >` + UnionType(DupTreeMap<Label, Option<SubExpr>>), + /// `merge x y : t` + Merge(SubExpr, SubExpr, Option<SubExpr>), + /// `toMap x : t` + ToMap(SubExpr, Option<SubExpr>), + /// `e.x` + Field(SubExpr, Label), + /// `e.{ x, y, z }` + Projection(SubExpr, DupTreeSet<Label>), + /// `e.(t)` + ProjectionByExpr(SubExpr, SubExpr), + /// `./some/path` + Import(Import<SubExpr>), + /// Embeds the result of resolving an import + Embed(Embed), +} + +impl<SE, E> ExprF<SE, E> { + pub fn traverse_ref_with_special_handling_of_binders<'a, SE2, Err>( + &'a self, + visit_subexpr: impl FnMut(&'a SE) -> Result<SE2, Err>, + visit_under_binder: impl FnOnce(&'a Label, &'a SE) -> Result<SE2, Err>, + ) -> Result<ExprF<SE2, E>, Err> + where + E: Clone, + { + visitor::TraverseRefWithBindersVisitor { + visit_subexpr, + visit_under_binder, + } + .visit(self) + } + + fn traverse_ref<'a, SE2, Err>( + &'a self, + visit_subexpr: impl FnMut(&'a SE) -> Result<SE2, Err>, + ) -> Result<ExprF<SE2, E>, Err> + where + E: Clone, + { + visitor::TraverseRefVisitor { visit_subexpr }.visit(self) + } + + fn traverse_mut<'a, Err>( + &'a mut self, + visit_subexpr: impl FnMut(&'a mut SE) -> Result<(), Err>, + ) -> Result<(), Err> { + visitor::TraverseMutVisitor { visit_subexpr }.visit(self) + } + + pub fn map_ref_with_special_handling_of_binders<'a, SE2>( + &'a self, + mut map_subexpr: impl FnMut(&'a SE) -> SE2, + mut map_under_binder: impl FnMut(&'a Label, &'a SE) -> SE2, + ) -> ExprF<SE2, E> + where + E: Clone, + { + trivial_result(self.traverse_ref_with_special_handling_of_binders( + |x| Ok(map_subexpr(x)), + |l, x| Ok(map_under_binder(l, x)), + )) + } + + pub fn map_ref<'a, SE2>( + &'a self, + mut map_subexpr: impl FnMut(&'a SE) -> SE2, + ) -> ExprF<SE2, E> + where + E: Clone, + { + trivial_result(self.traverse_ref(|x| Ok(map_subexpr(x)))) + } + + pub fn map_mut<'a>(&'a mut self, mut map_subexpr: impl FnMut(&'a mut SE)) { + trivial_result(self.traverse_mut(|x| Ok(map_subexpr(x)))) + } +} + +impl<E> Expr<E> { + pub fn as_ref(&self) -> &RawExpr<E> { + &self.0.as_ref().0 + } + pub fn as_mut(&mut self) -> &mut RawExpr<E> { + &mut self.0.as_mut().0 + } + pub fn span(&self) -> Span { + self.0.as_ref().1.clone() + } + + pub fn new(x: RawExpr<E>, n: Span) -> Self { + Expr(Box::new((x, n))) + } + + pub fn rewrap<E2>(&self, x: RawExpr<E2>) -> Expr<E2> { + Expr(Box::new((x, (self.0).1.clone()))) + } + + pub fn traverse_resolve_mut<Err, F1>( + &mut self, + f: &mut F1, + ) -> Result<(), Err> + where + E: Clone, + F1: FnMut(Import<Expr<E>>) -> Result<E, Err>, + { + match self.as_mut() { + ExprF::BinOp(BinOp::ImportAlt, l, r) => { + let garbage_expr = ExprF::BoolLit(false); + let new_self = if l.traverse_resolve_mut(f).is_ok() { + l + } else { + r.traverse_resolve_mut(f)?; + r + }; + *self.as_mut() = + std::mem::replace(new_self.as_mut(), garbage_expr); + } + _ => { + self.as_mut().traverse_mut(|e| e.traverse_resolve_mut(f))?; + if let ExprF::Import(import) = self.as_mut() { + let garbage_import = Import { + mode: ImportMode::Code, + location: ImportLocation::Missing, + hash: None, + }; + // Move out of &mut import + let import = std::mem::replace(import, garbage_import); + *self.as_mut() = ExprF::Embed(f(import)?); + } + } + } + Ok(()) + } +} + +/// Add an isize to an usize +/// Returns `None` on over/underflow +fn add_ui(u: usize, i: isize) -> Option<usize> { + Some(if i < 0 { + u.checked_sub(i.checked_neg()? as usize)? + } else { + u.checked_add(i as usize)? + }) +} + +impl<Label: PartialEq + Clone> V<Label> { + pub fn shift(&self, delta: isize, var: &V<Label>) -> Option<Self> { + let V(x, n) = var; + let V(y, m) = self; + Some(if x == y && n <= m { + V(y.clone(), add_ui(*m, delta)?) + } else { + V(y.clone(), *m) + }) + } + + pub fn over_binder(&self, x: &Label) -> Option<Self> { + self.shift(-1, &V(x.clone(), 0)) + } +} diff --git a/dhall/src/syntax/core/import.rs b/dhall/src/syntax/core/import.rs new file mode 100644 index 0000000..da3e99b --- /dev/null +++ b/dhall/src/syntax/core/import.rs @@ -0,0 +1,130 @@ +/// The beginning of a file path which anchors subsequent path components +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum FilePrefix { + /// Absolute path + Absolute, + /// Path relative to . + Here, + /// Path relative to .. + Parent, + /// Path relative to ~ + Home, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct FilePath { + pub file_path: Vec<String>, +} + +/// The location of import (i.e. local vs. remote vs. environment) +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ImportLocation<SubExpr> { + Local(FilePrefix, FilePath), + Remote(URL<SubExpr>), + Env(String), + Missing, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct URL<SubExpr> { + pub scheme: Scheme, + pub authority: String, + pub path: FilePath, + pub query: Option<String>, + pub headers: Option<SubExpr>, +} + +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum Scheme { + HTTP, + HTTPS, +} + +/// How to interpret the import's contents (i.e. as Dhall code or raw text) +#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash)] +pub enum ImportMode { + Code, + RawText, + Location, +} + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum Hash { + SHA256(Vec<u8>), +} + +/// Reference to an external resource +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct Import<SubExpr> { + pub mode: ImportMode, + pub location: ImportLocation<SubExpr>, + pub hash: Option<Hash>, +} + +impl<SE> URL<SE> { + pub fn traverse_ref<'a, Err, SE2>( + &'a self, + f: impl FnOnce(&'a SE) -> Result<SE2, Err>, + ) -> Result<URL<SE2>, Err> { + let headers = self.headers.as_ref().map(f).transpose()?; + Ok(URL { + scheme: self.scheme, + authority: self.authority.clone(), + path: self.path.clone(), + query: self.query.clone(), + headers, + }) + } + pub fn traverse_mut<'a, Err>( + &'a mut self, + f: impl FnOnce(&'a mut SE) -> Result<(), Err>, + ) -> Result<(), Err> { + if let Some(header) = &mut self.headers { + f(header)?; + } + Ok(()) + } +} + +impl<SE> ImportLocation<SE> { + pub fn traverse_ref<'a, Err, SE2>( + &'a self, + f: impl FnOnce(&'a SE) -> Result<SE2, Err>, + ) -> Result<ImportLocation<SE2>, Err> { + use ImportLocation::*; + Ok(match self { + Local(prefix, path) => Local(*prefix, path.clone()), + Remote(url) => Remote(url.traverse_ref(f)?), + Env(env) => Env(env.clone()), + Missing => Missing, + }) + } + pub fn traverse_mut<'a, Err>( + &'a mut self, + f: impl FnOnce(&'a mut SE) -> Result<(), Err>, + ) -> Result<(), Err> { + if let ImportLocation::Remote(url) = self { + url.traverse_mut(f)?; + } + Ok(()) + } +} + +impl<SE> Import<SE> { + pub fn traverse_ref<'a, Err, SE2>( + &'a self, + f: impl FnOnce(&'a SE) -> Result<SE2, Err>, + ) -> Result<Import<SE2>, Err> { + Ok(Import { + mode: self.mode, + location: self.location.traverse_ref(f)?, + hash: self.hash.clone(), + }) + } + pub fn traverse_mut<'a, Err>( + &'a mut self, + f: impl FnOnce(&'a mut SE) -> Result<(), Err>, + ) -> Result<(), Err> { + self.location.traverse_mut(f) + } +} diff --git a/dhall/src/syntax/core/label.rs b/dhall/src/syntax/core/label.rs new file mode 100644 index 0000000..43c3f53 --- /dev/null +++ b/dhall/src/syntax/core/label.rs @@ -0,0 +1,34 @@ +use std::rc::Rc; + +// The type for labels throughout the AST +// It owns the data because otherwise lifetimes would make recursive imports impossible +#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord)] +pub struct Label(Rc<str>); + +impl From<String> for Label { + fn from(s: String) -> Self { + let s: &str = &s; + Label(s.into()) + } +} + +impl<'a> From<&'a str> for Label { + fn from(s: &'a str) -> Self { + Label(Rc::from(s)) + } +} + +impl From<&Label> for String { + fn from(x: &Label) -> String { + x.0.as_ref().to_owned() + } +} + +impl Label { + pub fn from_str(s: &str) -> Label { + Label(s.into()) + } + pub fn as_ref(&self) -> &str { + self.0.as_ref() + } +} diff --git a/dhall/src/syntax/core/map.rs b/dhall/src/syntax/core/map.rs new file mode 100644 index 0000000..c4c6126 --- /dev/null +++ b/dhall/src/syntax/core/map.rs @@ -0,0 +1,394 @@ +/// A sorted map that allows multiple values for each key. +pub use dup_tree_map::DupTreeMap; +pub use dup_tree_set::DupTreeSet; + +mod one_or_more { + use either::Either; + use std::{iter, slice, vec}; + + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub enum OneOrMore<T> { + One(T), + More(Vec<T>), + } + + pub type Iter<'a, T> = Either<slice::Iter<'a, T>, iter::Once<&'a T>>; + pub type IterMut<'a, T> = + Either<slice::IterMut<'a, T>, iter::Once<&'a mut T>>; + pub type IntoIter<T> = Either<vec::IntoIter<T>, iter::Once<T>>; + + impl<T> OneOrMore<T> { + pub fn new(x: T) -> Self { + OneOrMore::One(x) + } + + pub fn push(&mut self, x: T) { + take_mut::take(self, |sef| match sef { + OneOrMore::More(mut vec) => { + vec.push(x); + OneOrMore::More(vec) + } + OneOrMore::One(one) => OneOrMore::More(vec![one, x]), + }) + } + + pub fn iter(&self) -> Iter<'_, T> { + match self { + OneOrMore::More(vec) => Either::Left(vec.iter()), + OneOrMore::One(x) => Either::Right(iter::once(x)), + } + } + + pub fn iter_mut(&mut self) -> IterMut<'_, T> { + match self { + OneOrMore::More(vec) => Either::Left(vec.iter_mut()), + OneOrMore::One(x) => Either::Right(iter::once(x)), + } + } + } + + impl<T> IntoIterator for OneOrMore<T> { + type Item = T; + type IntoIter = IntoIter<T>; + + fn into_iter(self) -> Self::IntoIter { + match self { + OneOrMore::More(vec) => Either::Left(vec.into_iter()), + OneOrMore::One(x) => Either::Right(iter::once(x)), + } + } + } +} + +mod dup_tree_map { + use super::one_or_more; + use super::one_or_more::OneOrMore; + use std::collections::{btree_map, BTreeMap}; + use std::iter; + + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct DupTreeMap<K, V> { + map: BTreeMap<K, OneOrMore<V>>, + size: usize, + } + + pub type IterInternalIntermediate<'a, K, V> = + iter::Zip<iter::Repeat<&'a K>, one_or_more::Iter<'a, V>>; + pub type IterInternal<'a, K, V> = iter::FlatMap< + btree_map::Iter<'a, K, OneOrMore<V>>, + IterInternalIntermediate<'a, K, V>, + for<'b> fn( + (&'b K, &'b OneOrMore<V>), + ) -> IterInternalIntermediate<'b, K, V>, + >; + pub struct Iter<'a, K, V> { + iter: IterInternal<'a, K, V>, + size: usize, + } + pub type IterMutInternalIntermediate<'a, K, V> = + iter::Zip<iter::Repeat<&'a K>, one_or_more::IterMut<'a, V>>; + pub type IterMutInternal<'a, K, V> = iter::FlatMap< + btree_map::IterMut<'a, K, OneOrMore<V>>, + IterMutInternalIntermediate<'a, K, V>, + for<'b> fn( + (&'b K, &'b mut OneOrMore<V>), + ) -> IterMutInternalIntermediate<'b, K, V>, + >; + pub struct IterMut<'a, K, V> { + iter: IterMutInternal<'a, K, V>, + size: usize, + } + pub type IntoIterInternalIntermediate<K, V> = + iter::Zip<iter::Repeat<K>, one_or_more::IntoIter<V>>; + pub type IntoIterInternal<K, V> = iter::FlatMap< + btree_map::IntoIter<K, OneOrMore<V>>, + IntoIterInternalIntermediate<K, V>, + fn((K, OneOrMore<V>)) -> IntoIterInternalIntermediate<K, V>, + >; + pub struct IntoIter<K: Clone, V> { + iter: IntoIterInternal<K, V>, + size: usize, + } + + impl<K, V> DupTreeMap<K, V> { + pub fn new() -> Self + where + K: Ord, + { + DupTreeMap { + map: BTreeMap::new(), + size: 0, + } + } + + pub fn insert(&mut self, key: K, value: V) + where + K: Ord, + { + use std::collections::btree_map::Entry; + match self.map.entry(key) { + Entry::Vacant(e) => { + e.insert(OneOrMore::new(value)); + } + Entry::Occupied(mut e) => e.get_mut().push(value), + } + self.size += 1; + } + + pub fn len(&self) -> usize { + self.size + } + pub fn is_empty(&self) -> bool { + self.size == 0 + } + + pub fn iter(&self) -> Iter<'_, K, V> + where + K: Ord, + { + fn foo<'a, K, V>( + (k, oom): (&'a K, &'a OneOrMore<V>), + ) -> IterInternalIntermediate<'a, K, V> { + iter::repeat(k).zip(oom.iter()) + } + Iter { + iter: self.map.iter().flat_map(foo), + size: self.size, + } + } + + pub fn iter_mut(&mut self) -> IterMut<'_, K, V> + where + K: Ord, + { + fn foo<'a, K, V>( + (k, oom): (&'a K, &'a mut OneOrMore<V>), + ) -> IterMutInternalIntermediate<'a, K, V> { + iter::repeat(k).zip(oom.iter_mut()) + } + IterMut { + iter: self.map.iter_mut().flat_map(foo), + size: self.size, + } + } + } + + impl<K, V> Default for DupTreeMap<K, V> + where + K: Ord, + { + fn default() -> Self { + Self::new() + } + } + + impl<K, V> IntoIterator for DupTreeMap<K, V> + where + K: Ord + Clone, + { + type Item = (K, V); + type IntoIter = IntoIter<K, V>; + + fn into_iter(self) -> Self::IntoIter { + fn foo<K, V>( + (k, oom): (K, OneOrMore<V>), + ) -> IntoIterInternalIntermediate<K, V> + where + K: Clone, + { + iter::repeat(k).zip(oom.into_iter()) + } + IntoIter { + iter: self.map.into_iter().flat_map(foo), + size: self.size, + } + } + } + + impl<'a, K, V> IntoIterator for &'a DupTreeMap<K, V> + where + K: Ord, + { + type Item = (&'a K, &'a V); + type IntoIter = Iter<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } + } + + impl<'a, K, V> IntoIterator for &'a mut DupTreeMap<K, V> + where + K: Ord, + { + type Item = (&'a K, &'a mut V); + type IntoIter = IterMut<'a, K, V>; + + fn into_iter(self) -> Self::IntoIter { + self.iter_mut() + } + } + + impl<K, V> iter::FromIterator<(K, V)> for DupTreeMap<K, V> + where + K: Ord, + { + fn from_iter<T>(iter: T) -> Self + where + T: IntoIterator<Item = (K, V)>, + { + let mut map = DupTreeMap::new(); + for (k, v) in iter { + map.insert(k, v); + } + map + } + } + + impl<'a, K, V> Iterator for Iter<'a, K, V> { + type Item = (&'a K, &'a V); + + fn next(&mut self) -> Option<Self::Item> { + let next = self.iter.next(); + if next.is_some() { + self.size -= 1; + } + next + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.size, Some(self.size)) + } + } + + impl<'a, K, V> Iterator for IterMut<'a, K, V> { + type Item = (&'a K, &'a mut V); + + fn next(&mut self) -> Option<Self::Item> { + let next = self.iter.next(); + if next.is_some() { + self.size -= 1; + } + next + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.size, Some(self.size)) + } + } + + impl<K, V> Iterator for IntoIter<K, V> + where + K: Clone, + { + type Item = (K, V); + + fn next(&mut self) -> Option<Self::Item> { + let next = self.iter.next(); + if next.is_some() { + self.size -= 1; + } + next + } + + fn size_hint(&self) -> (usize, Option<usize>) { + (self.size, Some(self.size)) + } + } + + // unsafe impl<K, V> iter::TrustedLen for IntoIter<K, V> {} +} + +mod dup_tree_set { + use super::DupTreeMap; + use std::iter; + + #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] + pub struct DupTreeSet<K> { + map: DupTreeMap<K, ()>, + } + + pub type Iter<'a, K> = iter::Map< + super::dup_tree_map::Iter<'a, K, ()>, + for<'b> fn((&'b K, &'b ())) -> &'b K, + >; + pub type IntoIter<K> = + iter::Map<super::dup_tree_map::IntoIter<K, ()>, fn((K, ())) -> K>; + + impl<K> DupTreeSet<K> { + pub fn new() -> Self + where + K: Ord, + { + DupTreeSet { + map: DupTreeMap::new(), + } + } + + pub fn len(&self) -> usize { + self.map.len() + } + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + pub fn iter(&self) -> Iter<'_, K> + where + K: Ord, + { + fn foo<'a, K>((k, ()): (&'a K, &'a ())) -> &'a K { + k + } + self.map.iter().map(foo) + } + } + + impl<K> Default for DupTreeSet<K> + where + K: Ord, + { + fn default() -> Self { + Self::new() + } + } + + impl<K> IntoIterator for DupTreeSet<K> + where + K: Ord + Clone, + { + type Item = K; + type IntoIter = IntoIter<K>; + + fn into_iter(self) -> Self::IntoIter { + fn foo<K>((k, ()): (K, ())) -> K { + k + } + self.map.into_iter().map(foo) + } + } + + impl<'a, K> IntoIterator for &'a DupTreeSet<K> + where + K: Ord, + { + type Item = &'a K; + type IntoIter = Iter<'a, K>; + + fn into_iter(self) -> Self::IntoIter { + self.iter() + } + } + + impl<K> iter::FromIterator<K> for DupTreeSet<K> + where + K: Ord, + { + fn from_iter<T>(iter: T) -> Self + where + T: IntoIterator<Item = K>, + { + let map = iter.into_iter().map(|k| (k, ())).collect(); + DupTreeSet { map } + } + } +} diff --git a/dhall/src/syntax/core/mod.rs b/dhall/src/syntax/core/mod.rs new file mode 100644 index 0000000..66bf229 --- /dev/null +++ b/dhall/src/syntax/core/mod.rs @@ -0,0 +1,13 @@ +mod expr; +pub use expr::*; +mod import; +pub use import::*; +mod label; +pub use label::*; +mod span; +pub use span::*; +mod text; +pub use text::*; +pub mod context; +pub mod map; +pub mod visitor; diff --git a/dhall/src/syntax/core/span.rs b/dhall/src/syntax/core/span.rs new file mode 100644 index 0000000..f9c7008 --- /dev/null +++ b/dhall/src/syntax/core/span.rs @@ -0,0 +1,81 @@ +use std::rc::Rc; + +/// A location in the source text +#[derive(Debug, Clone)] +pub struct ParsedSpan { + input: Rc<str>, + /// # Safety + /// + /// Must be a valid character boundary index into `input`. + start: usize, + /// # Safety + /// + /// Must be a valid character boundary index into `input`. + end: usize, +} + +#[derive(Debug, Clone)] +pub enum Span { + /// A location in the source text + Parsed(ParsedSpan), + /// For expressions obtained from decoding binary + Decoded, + /// For expressions constructed during normalization/typecheck + Artificial, +} + +impl Span { + pub(crate) fn make(input: Rc<str>, sp: pest::Span) -> Self { + Span::Parsed(ParsedSpan { + input, + start: sp.start(), + end: sp.end(), + }) + } + + /// Takes the union of the two spans, i.e. the range of input covered by the two spans plus any + /// input between them. Assumes that the spans come from the same input. Fails if one of the + /// spans does not point to an input location. + pub fn union(&self, other: &Span) -> Self { + use std::cmp::{max, min}; + use Span::*; + match (self, other) { + (Parsed(x), Parsed(y)) if Rc::ptr_eq(&x.input, &y.input) => { + Parsed(ParsedSpan { + input: x.input.clone(), + start: min(x.start, y.start), + end: max(x.end, y.end), + }) + } + _ => panic!( + "Tried to union incompatible spans: {:?} and {:?}", + self, other + ), + } + } + + /// Merges two spans assumed to point to a similar thing. If only one of them points to an + /// input location, use that one. + pub fn merge(&self, other: &Span) -> Self { + use Span::*; + match (self, other) { + (Parsed(x), _) | (_, Parsed(x)) => Parsed(x.clone()), + (Artificial, _) | (_, Artificial) => Artificial, + (Decoded, Decoded) => Decoded, + } + } + + pub fn error(&self, message: impl Into<String>) -> String { + use pest::error::{Error, ErrorVariant}; + use pest::Span; + let message: String = message.into(); + let span = match self { + self::Span::Parsed(span) => span, + _ => return format!("[unknown location] {}", message), + }; + let span = Span::new(&*span.input, span.start, span.end).unwrap(); + let err: ErrorVariant<!> = ErrorVariant::CustomError { message }; + let err = Error::new_from_span(err, span); + format!("{}", err) + } +} diff --git a/dhall/src/syntax/core/text.rs b/dhall/src/syntax/core/text.rs new file mode 100644 index 0000000..fb390ee --- /dev/null +++ b/dhall/src/syntax/core/text.rs @@ -0,0 +1,181 @@ +use std::iter::FromIterator; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub struct InterpolatedText<SubExpr> { + head: String, + tail: Vec<(SubExpr, String)>, +} + +impl<SubExpr> From<(String, Vec<(SubExpr, String)>)> + for InterpolatedText<SubExpr> +{ + fn from(x: (String, Vec<(SubExpr, String)>)) -> Self { + InterpolatedText { + head: x.0, + tail: x.1, + } + } +} + +impl<SubExpr> From<String> for InterpolatedText<SubExpr> { + fn from(s: String) -> Self { + InterpolatedText { + head: s, + tail: vec![], + } + } +} + +#[derive(Debug, Clone, PartialEq, Eq)] +pub enum InterpolatedTextContents<SubExpr> { + Text(String), + Expr(SubExpr), +} + +impl<SubExpr> InterpolatedTextContents<SubExpr> { + pub fn is_empty(&self) -> bool { + use InterpolatedTextContents::{Expr, Text}; + match self { + Expr(_) => false, + Text(s) => s.is_empty(), + } + } + + pub fn traverse_ref<'a, SubExpr2, E, F>( + &'a self, + mut f: F, + ) -> Result<InterpolatedTextContents<SubExpr2>, E> + where + F: FnMut(&'a SubExpr) -> Result<SubExpr2, E>, + { + use InterpolatedTextContents::{Expr, Text}; + Ok(match self { + Expr(e) => Expr(f(e)?), + Text(s) => Text(s.clone()), + }) + } + pub fn traverse_mut<'a, E, F>(&'a mut self, mut f: F) -> Result<(), E> + where + F: FnMut(&'a mut SubExpr) -> Result<(), E>, + { + use InterpolatedTextContents::Expr; + if let Expr(e) = self { + f(e)?; + } + Ok(()) + } + pub fn map_ref<'a, SubExpr2, F>( + &'a self, + mut f: F, + ) -> InterpolatedTextContents<SubExpr2> + where + F: FnMut(&'a SubExpr) -> SubExpr2, + { + use InterpolatedTextContents::{Expr, Text}; + match self { + Expr(e) => Expr(f(e)), + Text(s) => Text(s.clone()), + } + } + pub fn map_mut<'a, F>(&'a mut self, mut f: F) + where + F: FnMut(&'a mut SubExpr), + { + use InterpolatedTextContents::Expr; + if let Expr(e) = self { + f(e); + } + } +} + +impl<SubExpr> InterpolatedText<SubExpr> { + pub fn len(&self) -> usize { + 1 + 2 * self.tail.len() + } + + pub fn head(&self) -> &str { + &self.head + } + + pub fn head_mut(&mut self) -> &mut String { + &mut self.head + } + + pub fn is_empty(&self) -> bool { + self.head.is_empty() && self.tail.is_empty() + } + + pub fn traverse_ref<'a, SubExpr2, E, F>( + &'a self, + mut f: F, + ) -> Result<InterpolatedText<SubExpr2>, E> + where + F: FnMut(&'a SubExpr) -> Result<SubExpr2, E>, + { + Ok(InterpolatedText { + head: self.head.clone(), + tail: self + .tail + .iter() + .map(|(e, s)| Ok((f(e)?, s.clone()))) + .collect::<Result<_, _>>()?, + }) + } + + pub fn traverse_mut<'a, E, F>(&'a mut self, mut f: F) -> Result<(), E> + where + F: FnMut(&'a mut SubExpr) -> Result<(), E>, + { + for (e, _) in &mut self.tail { + f(e)? + } + Ok(()) + } + + pub fn iter<'a>( + &'a self, + ) -> impl Iterator<Item = InterpolatedTextContents<&'a SubExpr>> + 'a { + use std::iter::once; + use InterpolatedTextContents::{Expr, Text}; + let exprs = self.tail.iter().map(|(e, _)| Expr(e)); + let texts = self.tail.iter().map(|(_, s)| Text(s.clone())); + once(Text(self.head.clone())).chain(itertools::interleave(exprs, texts)) + } + + pub fn into_iter( + self, + ) -> impl Iterator<Item = InterpolatedTextContents<SubExpr>> { + use std::iter::once; + use InterpolatedTextContents::{Expr, Text}; + once(Text(self.head)).chain( + self.tail + .into_iter() + .flat_map(|(e, s)| once(Expr(e)).chain(once(Text(s)))), + ) + } +} + +impl<SubExpr> FromIterator<InterpolatedTextContents<SubExpr>> + for InterpolatedText<SubExpr> +{ + fn from_iter<T>(iter: T) -> Self + where + T: IntoIterator<Item = InterpolatedTextContents<SubExpr>>, + { + let mut res = InterpolatedText { + head: String::new(), + tail: Vec::new(), + }; + let mut crnt_str = &mut res.head; + for x in iter.into_iter() { + match x { + InterpolatedTextContents::Text(s) => crnt_str.push_str(&s), + InterpolatedTextContents::Expr(e) => { + res.tail.push((e, String::new())); + crnt_str = &mut res.tail.last_mut().unwrap().1; + } + } + } + res + } +} diff --git a/dhall/src/syntax/core/visitor.rs b/dhall/src/syntax/core/visitor.rs new file mode 100644 index 0000000..b76d037 --- /dev/null +++ b/dhall/src/syntax/core/visitor.rs @@ -0,0 +1,360 @@ +use crate::syntax::*; +use std::iter::FromIterator; + +/// A visitor trait that can be used to traverse `ExprF`s. We need this pattern so that Rust lets +/// us have as much mutability as we can. +/// For example, `traverse_ref_with_special_handling_of_binders` cannot be made using only +/// `traverse_ref`, because `traverse_ref` takes a `FnMut` so we would need to pass multiple +/// mutable reverences to this argument to `traverse_ref`. But Rust's ownership system is all about +/// preventing exactly this ! So we have to be more clever. The visitor pattern allows us to have +/// only one mutable thing the whole time: the visitor itself. The visitor can then carry around +/// multiple closures or just one, and Rust is ok with either. See for example TraverseRefVisitor. +pub trait ExprFVisitor<'a, SE1, SE2, E1, E2>: Sized { + type Error; + + fn visit_subexpr(&mut self, subexpr: &'a SE1) -> Result<SE2, Self::Error>; + fn visit_embed(self, embed: &'a E1) -> Result<E2, Self::Error>; + + fn visit_subexpr_under_binder( + mut self, + _label: &'a Label, + subexpr: &'a SE1, + ) -> Result<SE2, Self::Error> { + self.visit_subexpr(subexpr) + } + + fn visit( + self, + input: &'a ExprF<SE1, E1>, + ) -> Result<ExprF<SE2, E2>, Self::Error> { + visit_ref(self, input) + } +} + +/// Like `ExprFVisitor`, but by mutable reference +pub trait ExprFMutVisitor<'a, SE, E>: Sized { + type Error; + + fn visit_subexpr(&mut self, subexpr: &'a mut SE) + -> Result<(), Self::Error>; + fn visit_embed(self, _embed: &'a mut E) -> Result<(), Self::Error> { + Ok(()) + } + + fn visit_subexpr_under_binder( + mut self, + _label: &'a mut Label, + subexpr: &'a mut SE, + ) -> Result<(), Self::Error> { + self.visit_subexpr(subexpr) + } + + fn visit(self, input: &'a mut ExprF<SE, E>) -> Result<(), Self::Error> { + visit_mut(self, input) + } +} + +fn visit_ref<'a, V, SE1, SE2, E1, E2>( + mut v: V, + input: &'a ExprF<SE1, E1>, +) -> Result<ExprF<SE2, E2>, V::Error> +where + V: ExprFVisitor<'a, SE1, SE2, E1, E2>, +{ + fn vec<'a, T, U, Err, F: FnMut(&'a T) -> Result<U, Err>>( + x: &'a [T], + f: F, + ) -> Result<Vec<U>, Err> { + x.iter().map(f).collect() + } + fn opt<'a, T, U, Err, F: FnOnce(&'a T) -> Result<U, Err>>( + x: &'a Option<T>, + f: F, + ) -> Result<Option<U>, Err> { + Ok(match x { + Some(x) => Some(f(x)?), + None => None, + }) + } + fn dupmap<'a, V, SE1, SE2, E1, E2, T>( + x: impl IntoIterator<Item = (&'a Label, &'a SE1)>, + mut v: V, + ) -> Result<T, V::Error> + where + SE1: 'a, + T: FromIterator<(Label, SE2)>, + V: ExprFVisitor<'a, SE1, SE2, E1, E2>, + { + x.into_iter() + .map(|(k, x)| Ok((k.clone(), v.visit_subexpr(x)?))) + .collect() + } + fn optdupmap<'a, V, SE1, SE2, E1, E2, T>( + x: impl IntoIterator<Item = (&'a Label, &'a Option<SE1>)>, + mut v: V, + ) -> Result<T, V::Error> + where + SE1: 'a, + T: FromIterator<(Label, Option<SE2>)>, + V: ExprFVisitor<'a, SE1, SE2, E1, E2>, + { + x.into_iter() + .map(|(k, x)| { + Ok(( + k.clone(), + match x { + Some(x) => Some(v.visit_subexpr(x)?), + None => None, + }, + )) + }) + .collect() + } + + use crate::syntax::ExprF::*; + Ok(match input { + Var(v) => Var(v.clone()), + Lam(l, t, e) => { + let t = v.visit_subexpr(t)?; + let e = v.visit_subexpr_under_binder(l, e)?; + Lam(l.clone(), t, e) + } + Pi(l, t, e) => { + let t = v.visit_subexpr(t)?; + let e = v.visit_subexpr_under_binder(l, e)?; + Pi(l.clone(), t, e) + } + Let(l, t, a, e) => { + let t = opt(t, &mut |e| v.visit_subexpr(e))?; + let a = v.visit_subexpr(a)?; + let e = v.visit_subexpr_under_binder(l, e)?; + Let(l.clone(), t, a, e) + } + App(f, a) => App(v.visit_subexpr(f)?, v.visit_subexpr(a)?), + Annot(x, t) => Annot(v.visit_subexpr(x)?, v.visit_subexpr(t)?), + Const(k) => Const(*k), + Builtin(v) => Builtin(*v), + BoolLit(b) => BoolLit(*b), + NaturalLit(n) => NaturalLit(*n), + IntegerLit(n) => IntegerLit(*n), + DoubleLit(n) => DoubleLit(*n), + TextLit(t) => TextLit(t.traverse_ref(|e| v.visit_subexpr(e))?), + BinOp(o, x, y) => BinOp(*o, v.visit_subexpr(x)?, v.visit_subexpr(y)?), + BoolIf(b, t, f) => BoolIf( + v.visit_subexpr(b)?, + v.visit_subexpr(t)?, + v.visit_subexpr(f)?, + ), + EmptyListLit(t) => EmptyListLit(v.visit_subexpr(t)?), + NEListLit(es) => NEListLit(vec(es, |e| v.visit_subexpr(e))?), + SomeLit(e) => SomeLit(v.visit_subexpr(e)?), + RecordType(kts) => RecordType(dupmap(kts, v)?), + RecordLit(kvs) => RecordLit(dupmap(kvs, v)?), + UnionType(kts) => UnionType(optdupmap(kts, v)?), + Merge(x, y, t) => Merge( + v.visit_subexpr(x)?, + v.visit_subexpr(y)?, + opt(t, |e| v.visit_subexpr(e))?, + ), + ToMap(x, t) => { + ToMap(v.visit_subexpr(x)?, opt(t, |e| v.visit_subexpr(e))?) + } + Field(e, l) => Field(v.visit_subexpr(e)?, l.clone()), + Projection(e, ls) => Projection(v.visit_subexpr(e)?, ls.clone()), + ProjectionByExpr(e, x) => { + ProjectionByExpr(v.visit_subexpr(e)?, v.visit_subexpr(x)?) + } + Assert(e) => Assert(v.visit_subexpr(e)?), + Import(i) => Import(i.traverse_ref(|e| v.visit_subexpr(e))?), + Embed(a) => Embed(v.visit_embed(a)?), + }) +} + +fn visit_mut<'a, V, SE, E>( + mut v: V, + input: &'a mut ExprF<SE, E>, +) -> Result<(), V::Error> +where + V: ExprFMutVisitor<'a, SE, E>, +{ + fn vec<'a, V, SE, E>(v: &mut V, x: &'a mut Vec<SE>) -> Result<(), V::Error> + where + V: ExprFMutVisitor<'a, SE, E>, + { + for x in x { + v.visit_subexpr(x)?; + } + Ok(()) + } + fn opt<'a, V, SE, E>( + v: &mut V, + x: &'a mut Option<SE>, + ) -> Result<(), V::Error> + where + V: ExprFMutVisitor<'a, SE, E>, + { + if let Some(x) = x { + v.visit_subexpr(x)?; + } + Ok(()) + } + fn dupmap<'a, V, SE, E>( + mut v: V, + x: impl IntoIterator<Item = (&'a Label, &'a mut SE)>, + ) -> Result<(), V::Error> + where + SE: 'a, + V: ExprFMutVisitor<'a, SE, E>, + { + for (_, x) in x { + v.visit_subexpr(x)?; + } + Ok(()) + } + fn optdupmap<'a, V, SE, E>( + mut v: V, + x: impl IntoIterator<Item = (&'a Label, &'a mut Option<SE>)>, + ) -> Result<(), V::Error> + where + SE: 'a, + V: ExprFMutVisitor<'a, SE, E>, + { + for (_, x) in x { + opt(&mut v, x)?; + } + Ok(()) + } + + use crate::syntax::ExprF::*; + match input { + Var(_) | Const(_) | Builtin(_) | BoolLit(_) | NaturalLit(_) + | IntegerLit(_) | DoubleLit(_) => {} + Lam(l, t, e) => { + v.visit_subexpr(t)?; + v.visit_subexpr_under_binder(l, e)?; + } + Pi(l, t, e) => { + v.visit_subexpr(t)?; + v.visit_subexpr_under_binder(l, e)?; + } + Let(l, t, a, e) => { + opt(&mut v, t)?; + v.visit_subexpr(a)?; + v.visit_subexpr_under_binder(l, e)?; + } + App(f, a) => { + v.visit_subexpr(f)?; + v.visit_subexpr(a)?; + } + Annot(x, t) => { + v.visit_subexpr(x)?; + v.visit_subexpr(t)?; + } + TextLit(t) => t.traverse_mut(|e| v.visit_subexpr(e))?, + BinOp(_, x, y) => { + v.visit_subexpr(x)?; + v.visit_subexpr(y)?; + } + BoolIf(b, t, f) => { + v.visit_subexpr(b)?; + v.visit_subexpr(t)?; + v.visit_subexpr(f)?; + } + EmptyListLit(t) => v.visit_subexpr(t)?, + NEListLit(es) => vec(&mut v, es)?, + SomeLit(e) => v.visit_subexpr(e)?, + RecordType(kts) => dupmap(v, kts)?, + RecordLit(kvs) => dupmap(v, kvs)?, + UnionType(kts) => optdupmap(v, kts)?, + Merge(x, y, t) => { + v.visit_subexpr(x)?; + v.visit_subexpr(y)?; + opt(&mut v, t)?; + } + ToMap(x, t) => { + v.visit_subexpr(x)?; + opt(&mut v, t)?; + } + Field(e, _) => v.visit_subexpr(e)?, + Projection(e, _) => v.visit_subexpr(e)?, + ProjectionByExpr(e, x) => { + v.visit_subexpr(e)?; + v.visit_subexpr(x)?; + } + Assert(e) => v.visit_subexpr(e)?, + Import(i) => i.traverse_mut(|e| v.visit_subexpr(e))?, + Embed(a) => v.visit_embed(a)?, + } + Ok(()) +} + +pub struct TraverseRefWithBindersVisitor<F1, F2> { + pub visit_subexpr: F1, + pub visit_under_binder: F2, +} + +impl<'a, SE, E, SE2, Err, F1, F2> ExprFVisitor<'a, SE, SE2, E, E> + for TraverseRefWithBindersVisitor<F1, F2> +where + SE: 'a, + E: 'a + Clone, + F1: FnMut(&'a SE) -> Result<SE2, Err>, + F2: FnOnce(&'a Label, &'a SE) -> Result<SE2, Err>, +{ + type Error = Err; + + fn visit_subexpr(&mut self, subexpr: &'a SE) -> Result<SE2, Self::Error> { + (self.visit_subexpr)(subexpr) + } + fn visit_subexpr_under_binder( + self, + label: &'a Label, + subexpr: &'a SE, + ) -> Result<SE2, Self::Error> { + (self.visit_under_binder)(label, subexpr) + } + fn visit_embed(self, embed: &'a E) -> Result<E, Self::Error> { + Ok(embed.clone()) + } +} + +pub struct TraverseRefVisitor<F1> { + pub visit_subexpr: F1, +} + +impl<'a, SE, E, SE2, Err, F1> ExprFVisitor<'a, SE, SE2, E, E> + for TraverseRefVisitor<F1> +where + SE: 'a, + E: 'a + Clone, + F1: FnMut(&'a SE) -> Result<SE2, Err>, +{ + type Error = Err; + + fn visit_subexpr(&mut self, subexpr: &'a SE) -> Result<SE2, Self::Error> { + (self.visit_subexpr)(subexpr) + } + fn visit_embed(self, embed: &'a E) -> Result<E, Self::Error> { + Ok(embed.clone()) + } +} + +pub struct TraverseMutVisitor<F1> { + pub visit_subexpr: F1, +} + +impl<'a, SE, E, Err, F1> ExprFMutVisitor<'a, SE, E> for TraverseMutVisitor<F1> +where + SE: 'a, + E: 'a, + F1: FnMut(&'a mut SE) -> Result<(), Err>, +{ + type Error = Err; + + fn visit_subexpr( + &mut self, + subexpr: &'a mut SE, + ) -> Result<(), Self::Error> { + (self.visit_subexpr)(subexpr) + } +} diff --git a/dhall/src/syntax/mod.rs b/dhall/src/syntax/mod.rs new file mode 100644 index 0000000..a82e827 --- /dev/null +++ b/dhall/src/syntax/mod.rs @@ -0,0 +1,15 @@ +#![allow( + clippy::many_single_char_names, + clippy::should_implement_trait, + clippy::new_without_default, + clippy::type_complexity +)] + +mod core; +pub use crate::syntax::core::context; +pub use crate::syntax::core::visitor; +pub use crate::syntax::core::*; +pub use crate::syntax::text::parser::*; +pub use crate::syntax::text::printer::*; +pub mod binary; +pub mod text; diff --git a/dhall/src/syntax/text/mod.rs b/dhall/src/syntax/text/mod.rs new file mode 100644 index 0000000..c868288 --- /dev/null +++ b/dhall/src/syntax/text/mod.rs @@ -0,0 +1,2 @@ +pub mod parser; +pub mod printer; diff --git a/dhall/src/syntax/text/parser.rs b/dhall/src/syntax/text/parser.rs new file mode 100644 index 0000000..f6b6577 --- /dev/null +++ b/dhall/src/syntax/text/parser.rs @@ -0,0 +1,920 @@ +use itertools::Itertools; +use pest::prec_climber as pcl; +use pest::prec_climber::PrecClimber; +use std::rc::Rc; + +use pest_consume::{match_nodes, Parser}; + +use crate::semantics::phase::Normalized; +use crate::syntax; +use crate::syntax::core; +use crate::syntax::map::{DupTreeMap, DupTreeSet}; +use crate::syntax::ExprF::*; +use crate::syntax::{ + FilePath, FilePrefix, Hash, ImportLocation, ImportMode, InterpolatedText, + InterpolatedTextContents, Label, NaiveDouble, RawExpr, Scheme, Span, URL, + V, +}; + +// This file consumes the parse tree generated by pest and turns it into +// our own AST. All those custom macros should eventually moved into +// their own crate because they are quite general and useful. For now they +// are here and hopefully you can figure out how they work. + +type Expr = syntax::Expr<Normalized>; +type ParsedText = InterpolatedText<Expr>; +type ParsedTextContents = InterpolatedTextContents<Expr>; +type ParseInput<'input> = pest_consume::Node<'input, Rule, Rc<str>>; + +pub type ParseError = pest::error::Error<Rule>; +pub type ParseResult<T> = Result<T, ParseError>; + +#[derive(Debug)] +enum Selector { + Field(Label), + Projection(DupTreeSet<Label>), + ProjectionByExpr(Expr), +} + +impl crate::syntax::Builtin { + pub fn parse(s: &str) -> Option<Self> { + use crate::syntax::Builtin::*; + match s { + "Bool" => Some(Bool), + "Natural" => Some(Natural), + "Integer" => Some(Integer), + "Double" => Some(Double), + "Text" => Some(Text), + "List" => Some(List), + "Optional" => Some(Optional), + "None" => Some(OptionalNone), + "Natural/build" => Some(NaturalBuild), + "Natural/fold" => Some(NaturalFold), + "Natural/isZero" => Some(NaturalIsZero), + "Natural/even" => Some(NaturalEven), + "Natural/odd" => Some(NaturalOdd), + "Natural/toInteger" => Some(NaturalToInteger), + "Natural/show" => Some(NaturalShow), + "Natural/subtract" => Some(NaturalSubtract), + "Integer/toDouble" => Some(IntegerToDouble), + "Integer/show" => Some(IntegerShow), + "Double/show" => Some(DoubleShow), + "List/build" => Some(ListBuild), + "List/fold" => Some(ListFold), + "List/length" => Some(ListLength), + "List/head" => Some(ListHead), + "List/last" => Some(ListLast), + "List/indexed" => Some(ListIndexed), + "List/reverse" => Some(ListReverse), + "Optional/fold" => Some(OptionalFold), + "Optional/build" => Some(OptionalBuild), + "Text/show" => Some(TextShow), + _ => None, + } + } +} + +fn input_to_span(input: ParseInput) -> Span { + Span::make(input.user_data().clone(), input.as_pair().as_span()) +} +fn spanned(input: ParseInput, x: RawExpr<Normalized>) -> Expr { + Expr::new(x, input_to_span(input)) +} +fn spanned_union(span1: Span, span2: Span, x: RawExpr<Normalized>) -> Expr { + Expr::new(x, span1.union(&span2)) +} + +// Trim the shared indent off of a vec of lines, as defined by the Dhall semantics of multiline +// literals. +fn trim_indent(lines: &mut Vec<ParsedText>) { + let is_indent = |c: char| c == ' ' || c == '\t'; + + // There is at least one line so this is safe + let last_line_head = lines.last().unwrap().head(); + let indent_chars = last_line_head + .char_indices() + .take_while(|(_, c)| is_indent(*c)); + let mut min_indent_idx = match indent_chars.last() { + Some((i, _)) => i, + // If there is no indent char, then no indent needs to be stripped + None => return, + }; + + for line in lines.iter() { + // Ignore empty lines + if line.is_empty() { + continue; + } + // Take chars from line while they match the current minimum indent. + let indent_chars = last_line_head[0..=min_indent_idx] + .char_indices() + .zip(line.head().chars()) + .take_while(|((_, c1), c2)| c1 == c2); + match indent_chars.last() { + Some(((i, _), _)) => min_indent_idx = i, + // If there is no indent char, then no indent needs to be stripped + None => return, + }; + } + + // Remove the shared indent from non-empty lines + for line in lines.iter_mut() { + if !line.is_empty() { + line.head_mut().replace_range(0..=min_indent_idx, ""); + } + } +} + +lazy_static::lazy_static! { + static ref PRECCLIMBER: PrecClimber<Rule> = { + use Rule::*; + // In order of precedence + let operators = vec![ + import_alt, + bool_or, + natural_plus, + text_append, + list_append, + bool_and, + combine, + prefer, + combine_types, + natural_times, + bool_eq, + bool_ne, + equivalent, + ]; + PrecClimber::new( + operators + .into_iter() + .map(|op| pcl::Operator::new(op, pcl::Assoc::Left)) + .collect(), + ) + }; +} + +#[derive(Parser)] +#[grammar = "dhall.pest"] +struct DhallParser; + +#[pest_consume::parser(parser = DhallParser, rule = Rule)] +impl DhallParser { + fn EOI(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + + #[alias(label)] + fn simple_label(input: ParseInput) -> ParseResult<Label> { + Ok(Label::from(input.as_str())) + } + #[alias(label)] + fn quoted_label(input: ParseInput) -> ParseResult<Label> { + Ok(Label::from(input.as_str())) + } + + fn double_quote_literal(input: ParseInput) -> ParseResult<ParsedText> { + Ok(match_nodes!(input.into_children(); + [double_quote_chunk(chunks)..] => { + chunks.collect() + } + )) + } + + fn double_quote_chunk( + input: ParseInput, + ) -> ParseResult<ParsedTextContents> { + Ok(match_nodes!(input.into_children(); + [expression(e)] => { + InterpolatedTextContents::Expr(e) + }, + [double_quote_char(s)] => { + InterpolatedTextContents::Text(s) + }, + )) + } + #[alias(double_quote_char)] + fn double_quote_escaped(input: ParseInput) -> ParseResult<String> { + Ok(match input.as_str() { + "\"" => "\"".to_owned(), + "$" => "$".to_owned(), + "\\" => "\\".to_owned(), + "/" => "/".to_owned(), + "b" => "\u{0008}".to_owned(), + "f" => "\u{000C}".to_owned(), + "n" => "\n".to_owned(), + "r" => "\r".to_owned(), + "t" => "\t".to_owned(), + // "uXXXX" or "u{XXXXX}" + s => { + use std::convert::{TryFrom, TryInto}; + + let s = &s[1..]; + let s = if &s[0..1] == "{" { + &s[1..s.len() - 1] + } else { + &s[0..s.len()] + }; + + if s.len() > 8 { + Err(input.error(format!( + "Escape sequences can't have more than 8 chars: \"{}\"", + s + )))? + } + + // pad with zeroes + let s: String = std::iter::repeat('0') + .take(8 - s.len()) + .chain(s.chars()) + .collect(); + + // `s` has length 8, so `bytes` has length 4 + let bytes: &[u8] = &hex::decode(s).unwrap(); + let i = u32::from_be_bytes(bytes.try_into().unwrap()); + let c = char::try_from(i).unwrap(); + match i { + 0xD800..=0xDFFF => { + let c_ecapsed = c.escape_unicode(); + Err(input.error(format!("Escape sequences can't contain surrogate pairs: \"{}\"", c_ecapsed)))? + } + 0x0FFFE..=0x0FFFF + | 0x1FFFE..=0x1FFFF + | 0x2FFFE..=0x2FFFF + | 0x3FFFE..=0x3FFFF + | 0x4FFFE..=0x4FFFF + | 0x5FFFE..=0x5FFFF + | 0x6FFFE..=0x6FFFF + | 0x7FFFE..=0x7FFFF + | 0x8FFFE..=0x8FFFF + | 0x9FFFE..=0x9FFFF + | 0xAFFFE..=0xAFFFF + | 0xBFFFE..=0xBFFFF + | 0xCFFFE..=0xCFFFF + | 0xDFFFE..=0xDFFFF + | 0xEFFFE..=0xEFFFF + | 0xFFFFE..=0xFFFFF + | 0x10_FFFE..=0x10_FFFF => { + let c_ecapsed = c.escape_unicode(); + Err(input.error(format!("Escape sequences can't contain non-characters: \"{}\"", c_ecapsed)))? + } + _ => {} + } + std::iter::once(c).collect() + } + }) + } + fn double_quote_char(input: ParseInput) -> ParseResult<String> { + Ok(input.as_str().to_owned()) + } + + fn single_quote_literal(input: ParseInput) -> ParseResult<ParsedText> { + Ok(match_nodes!(input.into_children(); + [single_quote_continue(lines)] => { + let newline: ParsedText = "\n".to_string().into(); + + // Reverse lines and chars in each line + let mut lines: Vec<ParsedText> = lines + .into_iter() + .rev() + .map(|l| l.into_iter().rev().collect::<ParsedText>()) + .collect(); + + trim_indent(&mut lines); + + lines + .into_iter() + .intersperse(newline) + .flat_map(InterpolatedText::into_iter) + .collect::<ParsedText>() + } + )) + } + fn single_quote_char(input: ParseInput) -> ParseResult<&str> { + Ok(input.as_str()) + } + #[alias(single_quote_char)] + fn escaped_quote_pair(_input: ParseInput) -> ParseResult<&str> { + Ok("''") + } + #[alias(single_quote_char)] + fn escaped_interpolation(_input: ParseInput) -> ParseResult<&str> { + Ok("${") + } + + // Returns a vec of lines in reversed order, where each line is also in reversed order. + fn single_quote_continue( + input: ParseInput, + ) -> ParseResult<Vec<Vec<ParsedTextContents>>> { + Ok(match_nodes!(input.into_children(); + [expression(e), single_quote_continue(lines)] => { + let c = InterpolatedTextContents::Expr(e); + let mut lines = lines; + lines.last_mut().unwrap().push(c); + lines + }, + [single_quote_char(c), single_quote_continue(lines)] => { + let mut lines = lines; + if c == "\n" || c == "\r\n" { + lines.push(vec![]); + } else { + // TODO: don't allocate for every char + let c = InterpolatedTextContents::Text(c.to_owned()); + lines.last_mut().unwrap().push(c); + } + lines + }, + [] => { + vec![vec![]] + }, + )) + } + + #[alias(expression)] + fn builtin(input: ParseInput) -> ParseResult<Expr> { + let s = input.as_str(); + let e = match crate::syntax::Builtin::parse(s) { + Some(b) => Builtin(b), + None => match s { + "True" => BoolLit(true), + "False" => BoolLit(false), + "Type" => Const(crate::syntax::Const::Type), + "Kind" => Const(crate::syntax::Const::Kind), + "Sort" => Const(crate::syntax::Const::Sort), + _ => { + Err(input.error(format!("Unrecognized builtin: '{}'", s)))? + } + }, + }; + Ok(spanned(input, e)) + } + + #[alias(double_literal)] + fn NaN(_input: ParseInput) -> ParseResult<core::Double> { + Ok(std::f64::NAN.into()) + } + #[alias(double_literal)] + fn minus_infinity_literal(_input: ParseInput) -> ParseResult<core::Double> { + Ok(std::f64::NEG_INFINITY.into()) + } + #[alias(double_literal)] + fn plus_infinity_literal(_input: ParseInput) -> ParseResult<core::Double> { + Ok(std::f64::INFINITY.into()) + } + + #[alias(double_literal)] + fn numeric_double_literal(input: ParseInput) -> ParseResult<core::Double> { + let s = input.as_str().trim(); + match s.parse::<f64>() { + Ok(x) if x.is_infinite() => Err(input.error(format!( + "Overflow while parsing double literal '{}'", + s + ))), + Ok(x) => Ok(NaiveDouble::from(x)), + Err(e) => Err(input.error(format!("{}", e))), + } + } + + fn natural_literal(input: ParseInput) -> ParseResult<core::Natural> { + input + .as_str() + .trim() + .parse() + .map_err(|e| input.error(format!("{}", e))) + } + + fn integer_literal(input: ParseInput) -> ParseResult<core::Integer> { + input + .as_str() + .trim() + .parse() + .map_err(|e| input.error(format!("{}", e))) + } + + #[alias(expression, shortcut = true)] + fn identifier(input: ParseInput) -> ParseResult<Expr> { + Ok(match_nodes!(input.children(); + [variable(v)] => spanned(input, Var(v)), + [expression(e)] => e, + )) + } + + fn variable(input: ParseInput) -> ParseResult<V<Label>> { + Ok(match_nodes!(input.into_children(); + [label(l), natural_literal(idx)] => V(l, idx), + [label(l)] => V(l, 0), + )) + } + + #[alias(path_component)] + fn unquoted_path_component(input: ParseInput) -> ParseResult<String> { + Ok(input.as_str().to_string()) + } + #[alias(path_component)] + fn quoted_path_component(input: ParseInput) -> ParseResult<String> { + #[rustfmt::skip] + const RESERVED: &percent_encoding::AsciiSet = + &percent_encoding::CONTROLS + .add(b'=').add(b':').add(b'/').add(b'?') + .add(b'#').add(b'[').add(b']').add(b'@') + .add(b'!').add(b'$').add(b'&').add(b'\'') + .add(b'(').add(b')').add(b'*').add(b'+') + .add(b',').add(b';'); + Ok(input + .as_str() + .chars() + .map(|c| { + // Percent-encode ascii chars + if c.is_ascii() { + percent_encoding::utf8_percent_encode( + &c.to_string(), + RESERVED, + ) + .to_string() + } else { + c.to_string() + } + }) + .collect()) + } + fn path(input: ParseInput) -> ParseResult<FilePath> { + Ok(match_nodes!(input.into_children(); + [path_component(components)..] => { + FilePath { file_path: components.collect() } + } + )) + } + + #[alias(import_type)] + fn local(input: ParseInput) -> ParseResult<ImportLocation<Expr>> { + Ok(match_nodes!(input.into_children(); + [local_path((prefix, p))] => ImportLocation::Local(prefix, p), + )) + } + + #[alias(local_path)] + fn parent_path(input: ParseInput) -> ParseResult<(FilePrefix, FilePath)> { + Ok(match_nodes!(input.into_children(); + [path(p)] => (FilePrefix::Parent, p) + )) + } + #[alias(local_path)] + fn here_path(input: ParseInput) -> ParseResult<(FilePrefix, FilePath)> { + Ok(match_nodes!(input.into_children(); + [path(p)] => (FilePrefix::Here, p) + )) + } + #[alias(local_path)] + fn home_path(input: ParseInput) -> ParseResult<(FilePrefix, FilePath)> { + Ok(match_nodes!(input.into_children(); + [path(p)] => (FilePrefix::Home, p) + )) + } + #[alias(local_path)] + fn absolute_path(input: ParseInput) -> ParseResult<(FilePrefix, FilePath)> { + Ok(match_nodes!(input.into_children(); + [path(p)] => (FilePrefix::Absolute, p) + )) + } + + fn scheme(input: ParseInput) -> ParseResult<Scheme> { + Ok(match input.as_str() { + "http" => Scheme::HTTP, + "https" => Scheme::HTTPS, + _ => unreachable!(), + }) + } + + fn http_raw(input: ParseInput) -> ParseResult<URL<Expr>> { + Ok(match_nodes!(input.into_children(); + [scheme(sch), authority(auth), path(p)] => URL { + scheme: sch, + authority: auth, + path: p, + query: None, + headers: None, + }, + [scheme(sch), authority(auth), path(p), query(q)] => URL { + scheme: sch, + authority: auth, + path: p, + query: Some(q), + headers: None, + }, + )) + } + + fn authority(input: ParseInput) -> ParseResult<String> { + Ok(input.as_str().to_owned()) + } + + fn query(input: ParseInput) -> ParseResult<String> { + Ok(input.as_str().to_owned()) + } + + #[alias(import_type)] + fn http(input: ParseInput) -> ParseResult<ImportLocation<Expr>> { + Ok(ImportLocation::Remote(match_nodes!(input.into_children(); + [http_raw(url)] => url, + [http_raw(url), expression(e)] => URL { headers: Some(e), ..url }, + ))) + } + + #[alias(import_type)] + fn env(input: ParseInput) -> ParseResult<ImportLocation<Expr>> { + Ok(match_nodes!(input.into_children(); + [environment_variable(v)] => ImportLocation::Env(v), + )) + } + #[alias(environment_variable)] + fn bash_environment_variable(input: ParseInput) -> ParseResult<String> { + Ok(input.as_str().to_owned()) + } + #[alias(environment_variable)] + fn posix_environment_variable(input: ParseInput) -> ParseResult<String> { + Ok(match_nodes!(input.into_children(); + [posix_environment_variable_character(chars)..] => { + chars.collect() + }, + )) + } + fn posix_environment_variable_character( + input: ParseInput, + ) -> ParseResult<&str> { + Ok(match input.as_str() { + "\\\"" => "\"", + "\\\\" => "\\", + "\\a" => "\u{0007}", + "\\b" => "\u{0008}", + "\\f" => "\u{000C}", + "\\n" => "\n", + "\\r" => "\r", + "\\t" => "\t", + "\\v" => "\u{000B}", + s => s, + }) + } + + #[alias(import_type)] + fn missing(_input: ParseInput) -> ParseResult<ImportLocation<Expr>> { + Ok(ImportLocation::Missing) + } + + fn hash(input: ParseInput) -> ParseResult<Hash> { + let s = input.as_str().trim(); + let protocol = &s[..6]; + let hash = &s[7..]; + if protocol != "sha256" { + Err(input.error(format!("Unknown hashing protocol '{}'", protocol)))? + } + Ok(Hash::SHA256(hex::decode(hash).unwrap())) + } + + fn import_hashed( + input: ParseInput, + ) -> ParseResult<crate::syntax::Import<Expr>> { + use crate::syntax::Import; + let mode = ImportMode::Code; + Ok(match_nodes!(input.into_children(); + [import_type(location)] => Import { mode, location, hash: None }, + [import_type(location), hash(h)] => Import { mode, location, hash: Some(h) }, + )) + } + + #[alias(import_mode)] + fn Text(_input: ParseInput) -> ParseResult<ImportMode> { + Ok(ImportMode::RawText) + } + #[alias(import_mode)] + fn Location(_input: ParseInput) -> ParseResult<ImportMode> { + Ok(ImportMode::Location) + } + + #[alias(expression)] + fn import(input: ParseInput) -> ParseResult<Expr> { + use crate::syntax::Import; + let import = match_nodes!(input.children(); + [import_hashed(imp)] => { + Import { mode: ImportMode::Code, ..imp } + }, + [import_hashed(imp), import_mode(mode)] => { + Import { mode, ..imp } + }, + ); + Ok(spanned(input, Import(import))) + } + + fn lambda(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn forall(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn arrow(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn merge(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn assert(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn if_(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + fn toMap(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + + #[alias(expression)] + fn empty_list_literal(input: ParseInput) -> ParseResult<Expr> { + Ok(match_nodes!(input.children(); + [expression(e)] => spanned(input, EmptyListLit(e)), + )) + } + + fn expression(input: ParseInput) -> ParseResult<Expr> { + Ok(match_nodes!(input.children(); + [lambda(()), label(l), expression(typ), + arrow(()), expression(body)] => { + spanned(input, Lam(l, typ, body)) + }, + [if_(()), expression(cond), expression(left), + expression(right)] => { + spanned(input, BoolIf(cond, left, right)) + }, + [let_binding(bindings).., expression(final_expr)] => { + bindings.rev().fold( + final_expr, + |acc, x| { + spanned_union( + acc.span(), + x.3, + Let(x.0, x.1, x.2, acc) + ) + } + ) + }, + [forall(()), label(l), expression(typ), + arrow(()), expression(body)] => { + spanned(input, Pi(l, typ, body)) + }, + [expression(typ), arrow(()), expression(body)] => { + spanned(input, Pi("_".into(), typ, body)) + }, + [merge(()), expression(x), expression(y), expression(z)] => { + spanned(input, Merge(x, y, Some(z))) + }, + [assert(()), expression(x)] => { + spanned(input, Assert(x)) + }, + [toMap(()), expression(x), expression(y)] => { + spanned(input, ToMap(x, Some(y))) + }, + [expression(e), expression(annot)] => { + spanned(input, Annot(e, annot)) + }, + [expression(e)] => e, + )) + } + + fn let_binding( + input: ParseInput, + ) -> ParseResult<(Label, Option<Expr>, Expr, Span)> { + Ok(match_nodes!(input.children(); + [label(name), expression(annot), expression(expr)] => + (name, Some(annot), expr, input_to_span(input)), + [label(name), expression(expr)] => + (name, None, expr, input_to_span(input)), + )) + } + + #[alias(expression, shortcut = true)] + #[prec_climb(expression, PRECCLIMBER)] + fn operator_expression( + l: Expr, + op: ParseInput, + r: Expr, + ) -> ParseResult<Expr> { + use crate::syntax::BinOp::*; + use Rule::*; + let op = match op.as_rule() { + import_alt => ImportAlt, + bool_or => BoolOr, + natural_plus => NaturalPlus, + text_append => TextAppend, + list_append => ListAppend, + bool_and => BoolAnd, + combine => RecursiveRecordMerge, + prefer => RightBiasedRecordMerge, + combine_types => RecursiveRecordTypeMerge, + natural_times => NaturalTimes, + bool_eq => BoolEQ, + bool_ne => BoolNE, + equivalent => Equivalence, + r => Err(op.error(format!("Rule {:?} isn't an operator", r)))?, + }; + + Ok(spanned_union(l.span(), r.span(), BinOp(op, l, r))) + } + + fn Some_(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + + #[alias(expression, shortcut = true)] + fn application_expression(input: ParseInput) -> ParseResult<Expr> { + Ok(match_nodes!(input.children(); + [expression(e)] => e, + [expression(first), expression(rest)..] => { + rest.fold( + first, + |acc, e| { + spanned_union( + acc.span(), + e.span(), + App(acc, e) + ) + } + ) + }, + )) + } + + #[alias(expression, shortcut = true)] + fn first_application_expression(input: ParseInput) -> ParseResult<Expr> { + Ok(match_nodes!(input.children(); + [Some_(()), expression(e)] => { + spanned(input, SomeLit(e)) + }, + [merge(()), expression(x), expression(y)] => { + spanned(input, Merge(x, y, None)) + }, + [toMap(()), expression(x)] => { + spanned(input, ToMap(x, None)) + }, + [expression(e)] => e, + )) + } + + #[alias(expression, shortcut = true)] + fn selector_expression(input: ParseInput) -> ParseResult<Expr> { + Ok(match_nodes!(input.children(); + [expression(e)] => e, + [expression(first), selector(rest)..] => { + rest.fold( + first, + |acc, e| { + spanned_union( + acc.span(), + e.1, + match e.0 { + Selector::Field(l) => Field(acc, l), + Selector::Projection(ls) => Projection(acc, ls), + Selector::ProjectionByExpr(e) => ProjectionByExpr(acc, e) + } + ) + } + ) + }, + )) + } + + fn selector(input: ParseInput) -> ParseResult<(Selector, Span)> { + let stor = match_nodes!(input.children(); + [label(l)] => Selector::Field(l), + [labels(ls)] => Selector::Projection(ls), + [expression(e)] => Selector::ProjectionByExpr(e), + ); + Ok((stor, input_to_span(input))) + } + + fn labels(input: ParseInput) -> ParseResult<DupTreeSet<Label>> { + Ok(match_nodes!(input.into_children(); + [label(ls)..] => ls.collect(), + )) + } + + #[alias(expression, shortcut = true)] + fn primitive_expression(input: ParseInput) -> ParseResult<Expr> { + Ok(match_nodes!(input.children(); + [double_literal(n)] => spanned(input, DoubleLit(n)), + [natural_literal(n)] => spanned(input, NaturalLit(n)), + [integer_literal(n)] => spanned(input, IntegerLit(n)), + [double_quote_literal(s)] => spanned(input, TextLit(s)), + [single_quote_literal(s)] => spanned(input, TextLit(s)), + [expression(e)] => e, + )) + } + + #[alias(expression)] + fn empty_record_literal(input: ParseInput) -> ParseResult<Expr> { + Ok(spanned(input, RecordLit(Default::default()))) + } + + #[alias(expression)] + fn empty_record_type(input: ParseInput) -> ParseResult<Expr> { + Ok(spanned(input, RecordType(Default::default()))) + } + + #[alias(expression)] + fn non_empty_record_type_or_literal( + input: ParseInput, + ) -> ParseResult<Expr> { + let e = match_nodes!(input.children(); + [label(first_label), non_empty_record_type(rest)] => { + let (first_expr, mut map) = rest; + map.insert(first_label, first_expr); + RecordType(map) + }, + [label(first_label), non_empty_record_literal(rest)] => { + let (first_expr, mut map) = rest; + map.insert(first_label, first_expr); + RecordLit(map) + }, + ); + Ok(spanned(input, e)) + } + + fn non_empty_record_type( + input: ParseInput, + ) -> ParseResult<(Expr, DupTreeMap<Label, Expr>)> { + Ok(match_nodes!(input.into_children(); + [expression(expr), record_type_entry(entries)..] => { + (expr, entries.collect()) + } + )) + } + + fn record_type_entry(input: ParseInput) -> ParseResult<(Label, Expr)> { + Ok(match_nodes!(input.into_children(); + [label(name), expression(expr)] => (name, expr) + )) + } + + fn non_empty_record_literal( + input: ParseInput, + ) -> ParseResult<(Expr, DupTreeMap<Label, Expr>)> { + Ok(match_nodes!(input.into_children(); + [expression(expr), record_literal_entry(entries)..] => { + (expr, entries.collect()) + } + )) + } + + fn record_literal_entry(input: ParseInput) -> ParseResult<(Label, Expr)> { + Ok(match_nodes!(input.into_children(); + [label(name), expression(expr)] => (name, expr) + )) + } + + #[alias(expression)] + fn union_type(input: ParseInput) -> ParseResult<Expr> { + let map = match_nodes!(input.children(); + [empty_union_type(_)] => Default::default(), + [union_type_entry(entries)..] => entries.collect(), + ); + Ok(spanned(input, UnionType(map))) + } + + fn empty_union_type(_input: ParseInput) -> ParseResult<()> { + Ok(()) + } + + fn union_type_entry( + input: ParseInput, + ) -> ParseResult<(Label, Option<Expr>)> { + Ok(match_nodes!(input.children(); + [label(name), expression(expr)] => (name, Some(expr)), + [label(name)] => (name, None), + )) + } + + #[alias(expression)] + fn non_empty_list_literal(input: ParseInput) -> ParseResult<Expr> { + Ok(match_nodes!(input.children(); + [expression(items)..] => spanned( + input, + NEListLit(items.collect()) + ) + )) + } + + #[alias(expression)] + fn final_expression(input: ParseInput) -> ParseResult<Expr> { + Ok(match_nodes!(input.into_children(); + [expression(e), EOI(_)] => e + )) + } +} + +pub fn parse_expr(input_str: &str) -> ParseResult<Expr> { + let rc_input_str = input_str.to_string().into(); + let inputs = DhallParser::parse_with_userdata( + Rule::final_expression, + input_str, + rc_input_str, + )?; + Ok(match_nodes!(<DhallParser>; inputs; + [expression(e)] => e, + )) +} diff --git a/dhall/src/syntax/text/printer.rs b/dhall/src/syntax/text/printer.rs new file mode 100644 index 0000000..8df456b --- /dev/null +++ b/dhall/src/syntax/text/printer.rs @@ -0,0 +1,500 @@ +use crate::syntax::*; +use itertools::Itertools; +use std::fmt::{self, Display}; + +/// Generic instance that delegates to subexpressions +impl<SE: Display + Clone, E: Display> Display for ExprF<SE, E> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::syntax::ExprF::*; + match self { + Lam(a, b, c) => { + write!(f, "λ({} : {}) → {}", a, b, c)?; + } + BoolIf(a, b, c) => { + write!(f, "if {} then {} else {}", a, b, c)?; + } + Pi(a, b, c) if &String::from(a) == "_" => { + write!(f, "{} → {}", b, c)?; + } + Pi(a, b, c) => { + write!(f, "∀({} : {}) → {}", a, b, c)?; + } + Let(a, b, c, d) => { + write!(f, "let {}", a)?; + if let Some(b) = b { + write!(f, " : {}", b)?; + } + write!(f, " = {} in {}", c, d)?; + } + EmptyListLit(t) => { + write!(f, "[] : {}", t)?; + } + NEListLit(es) => { + fmt_list("[", ", ", "]", es, f, Display::fmt)?; + } + SomeLit(e) => { + write!(f, "Some {}", e)?; + } + Merge(a, b, c) => { + write!(f, "merge {} {}", a, b)?; + if let Some(c) = c { + write!(f, " : {}", c)?; + } + } + ToMap(a, b) => { + write!(f, "toMap {}", a)?; + if let Some(b) = b { + write!(f, " : {}", b)?; + } + } + Annot(a, b) => { + write!(f, "{} : {}", a, b)?; + } + Assert(a) => { + write!(f, "assert : {}", a)?; + } + ExprF::BinOp(op, a, b) => { + write!(f, "{} {} {}", a, op, b)?; + } + ExprF::App(a, b) => { + write!(f, "{} {}", a, b)?; + } + Field(a, b) => { + write!(f, "{}.{}", a, b)?; + } + Projection(e, ls) => { + write!(f, "{}.", e)?; + fmt_list("{ ", ", ", " }", ls, f, Display::fmt)?; + } + ProjectionByExpr(a, b) => { + write!(f, "{}.({})", a, b)?; + } + Var(a) => a.fmt(f)?, + Const(k) => k.fmt(f)?, + Builtin(v) => v.fmt(f)?, + BoolLit(true) => f.write_str("True")?, + BoolLit(false) => f.write_str("False")?, + NaturalLit(a) => a.fmt(f)?, + IntegerLit(a) if *a >= 0 => { + f.write_str("+")?; + a.fmt(f)?; + } + IntegerLit(a) => a.fmt(f)?, + DoubleLit(a) => a.fmt(f)?, + TextLit(a) => a.fmt(f)?, + RecordType(a) if a.is_empty() => f.write_str("{}")?, + RecordType(a) => fmt_list("{ ", ", ", " }", a, f, |(k, t), f| { + write!(f, "{} : {}", k, t) + })?, + RecordLit(a) if a.is_empty() => f.write_str("{=}")?, + RecordLit(a) => fmt_list("{ ", ", ", " }", a, f, |(k, v), f| { + write!(f, "{} = {}", k, v) + })?, + UnionType(a) => fmt_list("< ", " | ", " >", a, f, |(k, v), f| { + write!(f, "{}", k)?; + if let Some(v) = v { + write!(f, ": {}", v)?; + } + Ok(()) + })?, + Import(a) => a.fmt(f)?, + Embed(a) => a.fmt(f)?, + } + Ok(()) + } +} + +// There is a one-to-one correspondence between the formatter and the grammar. Each phase is +// named after a corresponding grammar group, and the structure of the formatter reflects +// the relationship between the corresponding grammar rules. This leads to the nice property +// of automatically getting all the parentheses and precedences right. +#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq)] +enum PrintPhase { + Base, + Operator, + BinOp(core::BinOp), + App, + Import, + Primitive, +} + +// Wraps an Expr with a phase, so that phase selsction can be done +// separate from the actual printing +#[derive(Clone)] +struct PhasedExpr<'a, A>(&'a Expr<A>, PrintPhase); + +impl<'a, A: Display + Clone> Display for PhasedExpr<'a, A> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + self.0.as_ref().fmt_phase(f, self.1) + } +} + +impl<'a, A: Display + Clone> PhasedExpr<'a, A> { + fn phase(self, phase: PrintPhase) -> PhasedExpr<'a, A> { + PhasedExpr(self.0, phase) + } +} + +impl<A: Display + Clone> RawExpr<A> { + fn fmt_phase( + &self, + f: &mut fmt::Formatter, + phase: PrintPhase, + ) -> Result<(), fmt::Error> { + use crate::syntax::ExprF::*; + use PrintPhase::*; + + let needs_paren = match self { + Lam(_, _, _) + | BoolIf(_, _, _) + | Pi(_, _, _) + | Let(_, _, _, _) + | EmptyListLit(_) + | NEListLit(_) + | SomeLit(_) + | Merge(_, _, _) + | ToMap(_, _) + | Annot(_, _) + if phase > Base => + { + true + } + // Precedence is magically handled by the ordering of BinOps. + ExprF::BinOp(op, _, _) if phase > PrintPhase::BinOp(*op) => true, + ExprF::App(_, _) if phase > PrintPhase::App => true, + Field(_, _) | Projection(_, _) | ProjectionByExpr(_, _) + if phase > PrintPhase::Import => + { + true + } + _ => false, + }; + + // Annotate subexpressions with the appropriate phase, defaulting to Base + let phased_self = match self.map_ref(|e| PhasedExpr(e, Base)) { + Pi(a, b, c) => { + if &String::from(&a) == "_" { + Pi(a, b.phase(Operator), c) + } else { + Pi(a, b, c) + } + } + Merge(a, b, c) => Merge( + a.phase(PrintPhase::Import), + b.phase(PrintPhase::Import), + c.map(|x| x.phase(PrintPhase::App)), + ), + ToMap(a, b) => ToMap( + a.phase(PrintPhase::Import), + b.map(|x| x.phase(PrintPhase::App)), + ), + Annot(a, b) => Annot(a.phase(Operator), b), + ExprF::BinOp(op, a, b) => ExprF::BinOp( + op, + a.phase(PrintPhase::BinOp(op)), + b.phase(PrintPhase::BinOp(op)), + ), + SomeLit(e) => SomeLit(e.phase(PrintPhase::Import)), + ExprF::App(f, a) => ExprF::App( + f.phase(PrintPhase::Import), + a.phase(PrintPhase::Import), + ), + Field(a, b) => Field(a.phase(Primitive), b), + Projection(e, ls) => Projection(e.phase(Primitive), ls), + ProjectionByExpr(a, b) => ProjectionByExpr(a.phase(Primitive), b), + e => e, + }; + + if needs_paren { + f.write_str("(")?; + } + + // Uses the ExprF<PhasedExpr<_>, _> instance + phased_self.fmt(f)?; + + if needs_paren { + f.write_str(")")?; + } + Ok(()) + } +} + +impl<A: Display + Clone> Display for Expr<A> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + self.as_ref().fmt_phase(f, PrintPhase::Base) + } +} + +fn fmt_list<T, I, F>( + open: &str, + sep: &str, + close: &str, + it: I, + f: &mut fmt::Formatter, + func: F, +) -> Result<(), fmt::Error> +where + I: IntoIterator<Item = T>, + F: Fn(T, &mut fmt::Formatter) -> Result<(), fmt::Error>, +{ + f.write_str(open)?; + for (i, x) in it.into_iter().enumerate() { + if i > 0 { + f.write_str(sep)?; + } + func(x, f)?; + } + f.write_str(close) +} + +impl<SubExpr: Display> Display for InterpolatedText<SubExpr> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + f.write_str("\"")?; + for x in self.iter() { + match x { + InterpolatedTextContents::Text(a) => { + for c in a.chars() { + match c { + '\\' => f.write_str("\\\\"), + '"' => f.write_str("\\\""), + '$' => f.write_str("\\u0024"), + '\u{0008}' => f.write_str("\\b"), + '\u{000C}' => f.write_str("\\f"), + '\n' => f.write_str("\\n"), + '\r' => f.write_str("\\r"), + '\t' => f.write_str("\\t"), + '\u{0000}'..='\u{001F}' => { + // Escape to an explicit "\u{XXXX}" form + let escaped: String = + c.escape_default().collect(); + // Print as "\uXXXX" + write!( + f, + "\\u{:0>4}", + &escaped[3..escaped.len() - 1] + ) + } + c => write!(f, "{}", c), + }?; + } + } + InterpolatedTextContents::Expr(e) => { + f.write_str("${ ")?; + e.fmt(f)?; + f.write_str(" }")?; + } + } + } + f.write_str("\"")?; + Ok(()) + } +} + +impl Display for Const { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + <Self as fmt::Debug>::fmt(self, f) + } +} + +impl Display for BinOp { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::syntax::BinOp::*; + f.write_str(match self { + BoolOr => "||", + TextAppend => "++", + NaturalPlus => "+", + BoolAnd => "&&", + RecursiveRecordMerge => "∧", + NaturalTimes => "*", + BoolEQ => "==", + BoolNE => "!=", + RecursiveRecordTypeMerge => "⩓", + ImportAlt => "?", + RightBiasedRecordMerge => "⫽", + ListAppend => "#", + Equivalence => "≡", + }) + } +} + +impl Display for NaiveDouble { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let v = f64::from(*self); + if v == std::f64::INFINITY { + f.write_str("Infinity") + } else if v == std::f64::NEG_INFINITY { + f.write_str("-Infinity") + } else if v.is_nan() { + f.write_str("NaN") + } else { + let s = format!("{}", v); + if s.contains('e') || s.contains('.') { + f.write_str(&s) + } else { + write!(f, "{}.0", s) + } + } + } +} + +impl Display for Label { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + // TODO: distinguish between reserved and nonreserved locations for quoting builtins + let s = String::from(self); + let is_reserved = match s.as_str() { + "let" | "in" | "if" | "then" | "else" | "Type" | "Kind" + | "Sort" | "True" | "False" => true, + _ => crate::syntax::Builtin::parse(&s).is_some(), + }; + if !is_reserved && s.chars().all(|c| c.is_ascii_alphanumeric()) { + write!(f, "{}", s) + } else { + write!(f, "`{}`", s) + } + } +} + +impl Display for Hash { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + match self { + Hash::SHA256(hash) => write!(f, "sha256:{}", hex::encode(hash)), + } + } +} +impl<SubExpr: Display> Display for Import<SubExpr> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use FilePrefix::*; + use ImportLocation::*; + use ImportMode::*; + let quote_if_needed = |s: &str| -> String { + if s.chars().all(|c| c.is_ascii_alphanumeric()) { + s.to_string() + } else { + format!("\"{}\"", s) + } + }; + + match &self.location { + Local(prefix, path) => { + let prefix = match prefix { + Here => ".", + Parent => "..", + Home => "~", + Absolute => "", + }; + write!(f, "{}/", prefix)?; + let path: String = path + .file_path + .iter() + .map(|c| quote_if_needed(&*c)) + .join("/"); + f.write_str(&path)?; + } + Remote(url) => { + write!(f, "{}://{}/", url.scheme, url.authority,)?; + let path: String = url.path.file_path.iter().join("/"); + f.write_str(&path)?; + if let Some(q) = &url.query { + write!(f, "?{}", q)? + } + if let Some(h) = &url.headers { + write!(f, " using ({})", h)? + } + } + Env(s) => { + write!(f, "env:")?; + if s.chars().all(|c| c.is_ascii_alphanumeric()) { + write!(f, "{}", s)?; + } else { + write!(f, "\"")?; + for c in s.chars() { + match c { + '"' => f.write_str("\\\"")?, + '\\' => f.write_str("\\\\")?, + '\u{0007}' => f.write_str("\\a")?, + '\u{0008}' => f.write_str("\\b")?, + '\u{000C}' => f.write_str("\\f")?, + '\n' => f.write_str("\\n")?, + '\r' => f.write_str("\\r")?, + '\t' => f.write_str("\\t")?, + '\u{000B}' => f.write_str("\\v")?, + _ => write!(f, "{}", c)?, + } + } + write!(f, "\"")?; + } + } + Missing => { + write!(f, "missing")?; + } + } + if let Some(hash) = &self.hash { + write!(f, " ")?; + hash.fmt(f)?; + } + match self.mode { + Code => {} + RawText => write!(f, " as Text")?, + Location => write!(f, " as Location")?, + } + Ok(()) + } +} + +impl Display for Builtin { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::syntax::Builtin::*; + f.write_str(match *self { + Bool => "Bool", + Natural => "Natural", + Integer => "Integer", + Double => "Double", + Text => "Text", + List => "List", + Optional => "Optional", + OptionalNone => "None", + NaturalBuild => "Natural/build", + NaturalFold => "Natural/fold", + NaturalIsZero => "Natural/isZero", + NaturalEven => "Natural/even", + NaturalOdd => "Natural/odd", + NaturalToInteger => "Natural/toInteger", + NaturalShow => "Natural/show", + NaturalSubtract => "Natural/subtract", + IntegerToDouble => "Integer/toDouble", + IntegerShow => "Integer/show", + DoubleShow => "Double/show", + ListBuild => "List/build", + ListFold => "List/fold", + ListLength => "List/length", + ListHead => "List/head", + ListLast => "List/last", + ListIndexed => "List/indexed", + ListReverse => "List/reverse", + OptionalFold => "Optional/fold", + OptionalBuild => "Optional/build", + TextShow => "Text/show", + }) + } +} + +impl Display for Scheme { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + use crate::syntax::Scheme::*; + f.write_str(match *self { + HTTP => "http", + HTTPS => "https", + }) + } +} + +impl<Label: Display> Display for V<Label> { + fn fmt(&self, f: &mut fmt::Formatter) -> Result<(), fmt::Error> { + let V(x, n) = self; + x.fmt(f)?; + if *n != 0 { + write!(f, "@{}", n)?; + } + Ok(()) + } +} |