From 45be2ff1f5bb3d6e0faa098402adf985b3d5e7ca Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sat, 4 May 2019 12:38:36 +0200 Subject: Rename dhall_core to dhall_syntax --- dhall_syntax/src/parser.rs | 984 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 984 insertions(+) create mode 100644 dhall_syntax/src/parser.rs (limited to 'dhall_syntax/src/parser.rs') diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs new file mode 100644 index 0000000..12383d4 --- /dev/null +++ b/dhall_syntax/src/parser.rs @@ -0,0 +1,984 @@ +use itertools::Itertools; +use pest::iterators::Pair; +use pest::Parser; +pub use pest::Span; +use std::borrow::Cow; +use std::collections::BTreeMap; +use std::path::PathBuf; + +use dhall_generated_parser::{DhallParser, Rule}; + +use crate::*; + +// This file consumes the parse tree generated by pest and turns it into +// our own AST. All those custom macros should eventually moved into +// their own crate because they are quite general and useful. For now they +// are here and hopefully you can figure out how they work. + +use crate::ExprF::*; + +type ParsedExpr<'a> = Expr, Import>; +type ParsedSubExpr<'a> = SubExpr, Import>; +type ParsedText<'a> = InterpolatedText, Import>>; +type ParsedTextContents<'a> = + InterpolatedTextContents, Import>>; + +pub type ParseError = pest::error::Error; + +pub type ParseResult = Result; + +fn rc(x: ParsedExpr<'_>) -> ParsedSubExpr<'_> { + crate::rc(x) +} + +fn spanned<'a>(_span: Span<'a>, x: ParsedExpr<'a>) -> ParsedExpr<'a> { + x + // This breaks equality testing; I need to fix that first + // Note(span, rc(x)) +} + +#[derive(Debug)] +enum Either { + Left(A), + Right(B), +} + +impl crate::Builtin { + pub fn parse(s: &str) -> Option { + use crate::Builtin::*; + match s { + "Bool" => Some(Bool), + "Natural" => Some(Natural), + "Integer" => Some(Integer), + "Double" => Some(Double), + "Text" => Some(Text), + "List" => Some(List), + "Optional" => Some(Optional), + "None" => Some(OptionalNone), + "Natural/build" => Some(NaturalBuild), + "Natural/fold" => Some(NaturalFold), + "Natural/isZero" => Some(NaturalIsZero), + "Natural/even" => Some(NaturalEven), + "Natural/odd" => Some(NaturalOdd), + "Natural/toInteger" => Some(NaturalToInteger), + "Natural/show" => Some(NaturalShow), + "Integer/toDouble" => Some(IntegerToDouble), + "Integer/show" => Some(IntegerShow), + "Double/show" => Some(DoubleShow), + "List/build" => Some(ListBuild), + "List/fold" => Some(ListFold), + "List/length" => Some(ListLength), + "List/head" => Some(ListHead), + "List/last" => Some(ListLast), + "List/indexed" => Some(ListIndexed), + "List/reverse" => Some(ListReverse), + "Optional/fold" => Some(OptionalFold), + "Optional/build" => Some(OptionalBuild), + "Text/show" => Some(TextShow), + _ => None, + } + } +} + +pub fn custom_parse_error(pair: &Pair, msg: String) -> ParseError { + let msg = + format!("{} while matching on:\n{}", msg, debug_pair(pair.clone())); + let e = pest::error::ErrorVariant::CustomError { message: msg }; + pest::error::Error::new_from_span(e, pair.as_span()) +} + +fn debug_pair(pair: Pair) -> String { + use std::fmt::Write; + let mut s = String::new(); + fn aux(s: &mut String, indent: usize, prefix: String, pair: Pair) { + let indent_str = "| ".repeat(indent); + let rule = pair.as_rule(); + let contents = pair.as_str(); + let mut inner = pair.into_inner(); + let mut first = true; + while let Some(p) = inner.next() { + if first { + first = false; + let last = inner.peek().is_none(); + if last && p.as_str() == contents { + let prefix = format!("{}{:?} > ", prefix, rule); + aux(s, indent, prefix, p); + continue; + } else { + writeln!( + s, + r#"{}{}{:?}: "{}""#, + indent_str, prefix, rule, contents + ) + .unwrap(); + } + } + aux(s, indent + 1, "".into(), p); + } + if first { + writeln!( + s, + r#"{}{}{:?}: "{}""#, + indent_str, prefix, rule, contents + ) + .unwrap(); + } + } + aux(&mut s, 0, "".into(), pair); + s +} + +macro_rules! make_parser { + (@pattern, rule, $name:ident) => (Rule::$name); + (@pattern, token_rule, $name:ident) => (Rule::$name); + (@pattern, rule_group, $name:ident) => (_); + (@filter, rule) => (true); + (@filter, token_rule) => (true); + (@filter, rule_group) => (false); + + (@body, + $pair:expr, + $children:expr, + rule!( $name:ident<$o:ty>; $($args:tt)* ) + ) => ( + make_parser!(@body, + $pair, + $children, + rule!( $name<$o> as $name; $($args)* ) + ) + ); + (@body, + $pair:expr, + $children:expr, + rule!( + $name:ident<$o:ty> + as $group:ident; + captured_str!($x:pat) => $body:expr + ) + ) => ({ + let $x = $pair.as_str(); + let res: $o = $body; + Ok(ParsedValue::$group(res)) + }); + (@body, + $pair:expr, + $children:expr, + rule!( + $name:ident<$o:ty> + as $group:ident; + children!( $( [$($args:tt)*] => $body:expr ),* $(,)* ) + ) + ) => ({ + #[allow(unused_imports)] + use ParsedValue::*; + #[allow(unreachable_code)] + let res: $o = improved_slice_patterns::match_vec!($children; + $( [$($args)*] => $body, )* + [x..] => Err( + format!("Unexpected children: {:?}", x.collect::>()) + )?, + ).map_err(|_| -> String { unreachable!() })?; + Ok(ParsedValue::$group(res)) + }); + (@body, + $pair:expr, + $children:expr, + rule!( + $name:ident<$o:ty> + as $group:ident; + $span:ident; + $($args:tt)* + ) + ) => ({ + let $span = $pair.as_span(); + make_parser!(@body, + $pair, + $children, + rule!( + $name<$o> + as $group; + $($args)* + ) + ) + }); + (@body, + $pair:expr, + $children:expr, + token_rule!($name:ident<$o:ty>) + ) => ({ + Ok(ParsedValue::$name(())) + }); + (@body, $pair:expr, $children:expr, rule_group!( $name:ident<$o:ty> )) => ( + unreachable!() + ); + + ($( $submac:ident!( $name:ident<$o:ty> $($args:tt)* ); )*) => ( + #[allow(non_camel_case_types, dead_code, clippy::large_enum_variant)] + #[derive(Debug)] + enum ParsedValue<'a> { + $( $name($o), )* + } + + fn parse_any<'a>(pair: Pair<'a, Rule>, children: Vec>) + -> Result, String> { + match pair.as_rule() { + $( + make_parser!(@pattern, $submac, $name) + if make_parser!(@filter, $submac) + => make_parser!(@body, pair, children, + $submac!( $name<$o> $($args)* )) + , + )* + r => Err(format!("Unexpected {:?}", r)), + } + } + ); +} + +// Non-recursive implementation to avoid stack overflows +fn do_parse<'a>(initial_pair: Pair<'a, Rule>) -> ParseResult> { + enum StackFrame<'a> { + Unprocessed(Pair<'a, Rule>), + Processed(Pair<'a, Rule>, usize), + } + use StackFrame::*; + let mut pairs_stack: Vec = + vec![Unprocessed(initial_pair.clone())]; + let mut values_stack: Vec = vec![]; + while let Some(p) = pairs_stack.pop() { + match p { + Unprocessed(mut pair) => loop { + let mut pairs: Vec<_> = pair.clone().into_inner().collect(); + let n_children = pairs.len(); + if n_children == 1 && can_be_shortcutted(pair.as_rule()) { + pair = pairs.pop().unwrap(); + continue; + } else { + pairs_stack.push(Processed(pair, n_children)); + pairs_stack + .extend(pairs.into_iter().map(StackFrame::Unprocessed)); + break; + } + }, + Processed(pair, n) => { + let mut children: Vec<_> = + values_stack.split_off(values_stack.len() - n); + children.reverse(); + let val = match parse_any(pair.clone(), children) { + Ok(v) => v, + Err(msg) => Err(custom_parse_error(&pair, msg))?, + }; + values_stack.push(val); + } + } + } + Ok(values_stack.pop().unwrap()) +} + +// List of rules that can be shortcutted if they have a single child +fn can_be_shortcutted(rule: Rule) -> bool { + use Rule::*; + match rule { + expression + | import_alt_expression + | or_expression + | plus_expression + | text_append_expression + | list_append_expression + | and_expression + | combine_expression + | prefer_expression + | combine_types_expression + | times_expression + | equal_expression + | not_equal_expression + | application_expression + | first_application_expression + | selector_expression + | annotated_expression => true, + _ => false, + } +} + +make_parser! { + token_rule!(EOI<()>); + + rule!(simple_label