use pest::iterators::Pair; use pest::Parser; use std::collections::BTreeMap; use std::path::PathBuf; use std::rc::Rc; use dhall_parser::{DhallParser, Rule}; use crate::core; use crate::core::*; // This file consumes the parse tree generated by pest and turns it into // our own AST. All those custom macros should eventually moved into // their own crate because they are quite general and useful. For now they // are here and hopefully you can figure out how they work. pub type ParsedExpr = Expr; pub type ParsedText = InterpolatedText; pub type ParsedTextContents<'a> = InterpolatedTextContents<'a, X, Import>; pub type RcExpr = Rc; pub type ParseError = pest::error::Error; pub type ParseResult = Result; pub fn custom_parse_error(pair: &Pair, msg: String) -> ParseError { let msg = format!("{} while matching on:\n{}", msg, debug_pair(pair.clone())); let e = pest::error::ErrorVariant::CustomError { message: msg }; pest::error::Error::new_from_span(e, pair.as_span()) } fn debug_pair(pair: Pair) -> String { use std::fmt::Write; let mut s = String::new(); fn aux(s: &mut String, indent: usize, prefix: String, pair: Pair) { let indent_str = "| ".repeat(indent); let rule = pair.as_rule(); let contents = pair.as_str(); let mut inner = pair.into_inner(); let mut first = true; while let Some(p) = inner.next() { if first { first = false; let last = inner.peek().is_none(); if last && p.as_str() == contents { let prefix = format!("{}{:?} > ", prefix, rule); aux(s, indent, prefix, p); continue; } else { writeln!( s, r#"{}{}{:?}: "{}""#, indent_str, prefix, rule, contents ) .unwrap(); } } aux(s, indent + 1, "".into(), p); } if first { writeln!( s, r#"{}{}{:?}: "{}""#, indent_str, prefix, rule, contents ) .unwrap(); } } aux(&mut s, 0, "".into(), pair); s } #[derive(Debug)] enum IterMatchError { NoMatchFound, Other(T), // Allow other macros to inject their own errors } macro_rules! match_iter_typed { // Collect untyped arguments to pass to match_iter! (@collect, ($($vars:tt)*), ($($args:tt)*), ($($acc:tt)*), ($x:ident : $ty:ident, $($rest:tt)*)) => { match_iter_typed!(@collect, ($($vars)*), ($($args)*), ($($acc)*, $x), ($($rest)*)) }; (@collect, ($($vars:tt)*), ($($args:tt)*), ($($acc:tt)*), ($x:ident.. : $ty:ident, $($rest:tt)*)) => { match_iter_typed!(@collect, ($($vars)*), ($($args)*), ($($acc)*, $x..), ($($rest)*)) }; // Catch extra comma if exists (@collect, ($($vars:tt)*), ($($args:tt)*), (,$($acc:tt)*), ($(,)*)) => { match_iter_typed!(@collect, ($($vars)*), ($($args)*), ($($acc)*), ()) }; (@collect, ($iter:expr, $body:expr, $callback:ident, $error:ident), ($($args:tt)*), ($($acc:tt)*), ($(,)*)) => { { let res = iter_patterns::destructure_iter!($iter; [$($acc)*] => { match_iter_typed!(@callback, $callback, $iter, $($args)*); $body }); res.ok_or(IterMatchError::NoMatchFound) } }; // Pass the matches through the callback (@callback, $callback:ident, $iter:expr, $x:ident : $ty:ident $($rest:tt)*) => { let $x = $callback!(@type_callback, $ty, $x); #[allow(unused_mut)] let mut $x = match $x { Ok(x) => x, Err(e) => break Err(IterMatchError::Other(e)), }; match_iter_typed!(@callback, $callback, $iter $($rest)*); }; (@callback, $callback: ident, $iter:expr, $x:ident.. : $ty:ident $($rest:tt)*) => { let $x = $x.map(|x| $callback!(@type_callback, $ty, x)).collect(); let $x: Vec<_> = match $x { Ok(x) => x, Err(e) => break Err(IterMatchError::Other(e)), }; #[allow(unused_mut)] let mut $x = $x.into_iter(); match_iter_typed!(@callback, $callback, $iter $($rest)*); }; (@callback, $callback:ident, $iter:expr $(,)*) => {}; ($callback:ident; $iter:expr; ($($args:tt)*) => $body:expr) => { { #[allow(unused_mut)] let mut iter = $iter; let res: Result<_, IterMatchError<_>> = loop { break match_iter_typed!(@collect, (iter, $body, $callback, last_error), ($($args)*), (), ($($args)*,) ) }; res } }; } macro_rules! match_iter_branching { (@noclone, $callback:ident; $arg:expr; $( $submac:ident!($($args:tt)*) => $body:expr ),* $(,)*) => { { #[allow(unused_assignments)] let mut last_error = IterMatchError::NoMatchFound; // Not a real loop; used for error handling // Would use loop labels but they create warnings #[allow(unreachable_code)] loop { $( let matched: Result<_, IterMatchError<_>> = $callback!(@branch_callback, $submac, $arg; ($($args)*) => $body); #[allow(unused_assignments)] match matched { Ok(v) => break Ok(v), Err(e) => last_error = e, }; )* break Err(last_error); } } }; ($callback:ident; $iter:expr; $($args:tt)*) => { { #[allow(unused_mut)] let mut iter = $iter; match_iter_branching!(@noclone, $callback; iter.clone(); $($args)*) } }; } macro_rules! match_pair { (@type_callback, $ty:ident, $x:expr) => { ParseUnwrapped::$ty($x) // ParseWrapped::$ty($x).map(|x| x.$ty()) }; (@branch_callback, children, $pair:expr; $($args:tt)*) => { { #[allow(unused_mut)] let mut pairs = $pair.clone().into_inner(); match_iter_typed!(match_pair; pairs; $($args)*) } }; (@branch_callback, self, $pair:expr; ($x:ident : $ty:ident) => $body:expr) => { { let $x = match_pair!(@type_callback, $ty, $pair.clone()); match $x { Ok($x) => Ok($body), Err(e) => Err(IterMatchError::Other(e)), } } }; (@branch_callback, raw_pair, $pair:expr; ($x:ident) => $body:expr) => { { let $x = $pair.clone(); Ok($body) } }; (@branch_callback, captured_str, $pair:expr; ($x:ident) => $body:expr) => { { let $x = $pair.as_str(); Ok($body) } }; ($pair:expr; $( children!($($x:ident : $ty:ident),*) => $body:expr ),* $(,)*) => { { let pair = $pair; let rule = pair.as_rule(); let err = custom_parse_error(&pair, "No match found".to_owned()); let parsed: Vec<_> = pair.into_inner().map(ParseWrapped::parse_any_fast).collect::>()?; #[allow(unreachable_code)] iter_patterns::match_vec!(parsed; $( [$(ParsedValue::$ty($x)),*] => { $body }, )* [x..] => panic!("Unexpected children while parsing rule '{:?}': {:?}", rule, x.collect::>()), ).ok_or(err) } }; ($pair:expr; $($args:tt)*) => { { let pair = $pair; let result = match_iter_branching!(@noclone, match_pair; pair; $($args)*); result.map_err(|e| match e { IterMatchError::Other(e) => e, _ => custom_parse_error(&pair, "No match found".to_owned()), }) } }; } macro_rules! make_parser { (@branch_rules, $pair:expr, ($($acc:tt)*), rule!( $name:ident<$o:ty>; $($args:tt)* ); $($rest:tt)*) => ( make_parser!(@branch_rules, $pair, ($($acc)* Rule::$name => ParseWrapped::$name($pair),), $($rest)*) ); (@branch_rules, $pair:expr, ($($acc:tt)*), rule_group!( $name:ident<$o:ty>; $($ty:ident),* ); $($rest:tt)*) => ( make_parser!(@branch_rules, $pair, ($($acc)* $( Rule::$ty => ParseUnwrapped::$ty($pair).map(ParsedValue::$name),)* ), $($rest)*) ); (@branch_rules, $pair:expr, ($($acc:tt)*), $submac:ident!( $name:ident<$o:ty>; $($args:tt)* ); $($rest:tt)*) => ( make_parser!(@branch_rules, $pair, ($($acc)*), $($rest)*) ); (@branch_rules, $pair:expr, ($($acc:tt)*),) => ( #[allow(unreachable_patterns)] match $pair.as_rule() { $($acc)* r => Err(custom_parse_error(&$pair, format!("parse_any_fast: Unexpected {:?}", r))), // [x..] => panic!("{:?}", x.collect::>()), } ); ($( $submac:ident!( $name:ident<$o:ty>; $($args:tt)* ); )*) => ( // #[allow(non_camel_case_types, dead_code)] // enum ParsedType { // $( $name, )* // } // impl ParsedType { // #[allow(dead_code)] // fn parse(self, pair: Pair) -> ParseResult { // match self { // $( ParsedType::$name => { // let ret = $name(pair)?; // Ok(ParsedValue::$name(ret)) // }, )* // } // } // // fn parse(self, pair: Pair) -> ParseResult { // // match self { // // $( ParsedType::$name => $name(pair), )* // // } // // } // } struct ParseWrapped; struct ParseUnwrapped; #[allow(non_camel_case_types, dead_code)] #[derive(Debug)] enum ParsedValue<'a> { $( $name($o), )* parse_any(Box>), } impl<'a> ParsedValue<'a> { $( #[allow(non_snake_case, dead_code)] fn $name(self) -> $o { match self { ParsedValue::$name(x) => x, _ => unreachable!(), } } )* #[allow(non_snake_case, dead_code)] fn parse_any(self) -> Box> { match self { ParsedValue::parse_any(x) => x, x => Box::new(x), } } #[allow(non_snake_case, dead_code)] fn parse_any_fast(self) -> Box> { self.parse_any() } } named!(parse_any>>; // self!(x: parse_any_fast) => x, $( self!(x: $name) => Box::new(ParsedValue::$name(x)), )* ); impl ParseWrapped { #[allow(non_snake_case, dead_code)] fn parse_any_fast(pair: Pair) -> ParseResult { make_parser!(@branch_rules, pair, (), $( $submac!( $name<$o>; $($args)* ); )*) } } // fn do_the_parse(s: &str, r: Rule, ty: ParsedType) -> ParseResult { // let pairs = DhallParser::parse(r, s)?; // match_iter!(pairs; (e) => ty.parse(e)) // } $( $submac!($name<$o>; $($args)*); )* ); } macro_rules! make_pest_parse_function { ($name:ident<$o:ty>; $submac:ident!( $($args:tt)* )) => ( impl ParseUnwrapped { #[allow(unused_variables, non_snake_case, dead_code, clippy::all)] fn $name<'a>(pair: Pair<'a, Rule>) -> ParseResult<$o> { ParseWrapped::$name(pair).map(|x| x.$name()) } } impl ParseWrapped { #[allow(unused_variables, non_snake_case, dead_code, clippy::all)] fn $name<'a>(pair: Pair<'a, Rule>) -> ParseResult> { let res: ParseResult<$o> = $submac!(pair; $($args)*); Ok(ParsedValue::$name(res?)) } } ); } macro_rules! named { ($name:ident<$o:ty>; $($args:tt)*) => ( make_pest_parse_function!($name<$o>; match_pair!( $($args)* )); ); } macro_rules! rule { ($name:ident<$o:ty>; $($args:tt)*) => ( make_pest_parse_function!($name<$o>; match_rule!( Rule::$name => match_pair!( $($args)* ), )); ); } macro_rules! rule_group { ($name:ident<$o:ty>; $($ty:ident),*) => ( make_pest_parse_function!($name<$o>; match_rule!( $( Rule::$ty => match_pair!(raw_pair!(p) => ParseUnwrapped::$ty(p)?), )* )); ); } macro_rules! match_rule { ($pair:expr; $($pat:pat => $submac:ident!( $($args:tt)* ),)*) => { { #[allow(unreachable_patterns)] match $pair.as_rule() { $( $pat => $submac!($pair; $($args)*), )* r => Err(custom_parse_error(&$pair, format!("Unexpected {:?}", r))), } } }; } // List of rules that can be shortcutted as implemented in binop!() fn can_be_shortcutted(rule: Rule) -> bool { use Rule::*; match rule { import_alt_expression | or_expression | plus_expression | text_append_expression | list_append_expression | and_expression | combine_expression | prefer_expression | combine_types_expression | times_expression | equal_expression | not_equal_expression | application_expression | selector_expression_raw | annotated_expression => true, _ => false, } } macro_rules! binop { ($rule:ident<$ty:ty>; $op:ident) => { rule!($rule<$ty>; raw_pair!(pair) => { // This all could be a trivial fold, but to avoid stack explosion // we try to cut down on the recursion level here, by consuming // chains of blah_expression > ... > blih_expression in one go. let mut pair = pair; let mut pairs = pair.into_inner(); let first = pairs.next().unwrap(); let rest: Vec<_> = pairs.map(ParseUnwrapped::expression).collect::>()?; if !rest.is_empty() { // If there is more than one subexpression, handle it normally let first = ParseUnwrapped::expression(first)?; rest.into_iter().fold(first, |acc, e| bx(Expr::BinOp(BinOp::$op, acc, e))) } else { // Otherwise, consume short-cuttable rules as long as they contain only one subexpression. // println!("short-cutting {}", debug_pair(pair.clone())); pair = first; while can_be_shortcutted(pair.as_rule()) { let mut pairs = pair.clone().into_inner(); let first = pairs.next().unwrap(); let rest: Vec<_> = pairs.collect(); if !rest.is_empty() { break; } pair = first; } // println!("short-cutted {}", debug_pair(pair.clone())); // println!(); ParseUnwrapped::expression(pair)? } } // children!(first: expression, rest..: expression) => { // rest.fold(first, |acc, e| bx(Expr::BinOp(BinOp::$op, acc, e))) // } ); }; } make_parser!{ rule!(EOI<()>; children!() => ()); named!(str<&'a str>; captured_str!(s) => s.trim()); named!(raw_str<&'a str>; captured_str!(s) => s); named!(label