From 186eda565954f4b5a155374b9ba00c96cede439f Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Wed, 20 Mar 2019 19:32:27 +0100 Subject: Restore parser performance --- dhall_core/src/parser.rs | 392 ++++++++++------------------------------------- 1 file changed, 82 insertions(+), 310 deletions(-) diff --git a/dhall_core/src/parser.rs b/dhall_core/src/parser.rs index 6718c00..30e3367 100644 --- a/dhall_core/src/parser.rs +++ b/dhall_core/src/parser.rs @@ -71,137 +71,7 @@ fn debug_pair(pair: Pair) -> String { s } -// #[derive(Debug)] -// enum IterMatchError { -// NoMatchFound, -// Other(T), // Allow other macros to inject their own errors -// } - -// macro_rules! match_iter_typed { -// // Collect untyped arguments to pass to match_iter! -// (@collect, ($($vars:tt)*), ($($args:tt)*), ($($acc:tt)*), ($x:ident : $ty:ident, $($rest:tt)*)) => { -// match_iter_typed!(@collect, ($($vars)*), ($($args)*), ($($acc)*, $x), ($($rest)*)) -// }; -// (@collect, ($($vars:tt)*), ($($args:tt)*), ($($acc:tt)*), ($x:ident.. : $ty:ident, $($rest:tt)*)) => { -// match_iter_typed!(@collect, ($($vars)*), ($($args)*), ($($acc)*, $x..), ($($rest)*)) -// }; -// // Catch extra comma if exists -// (@collect, ($($vars:tt)*), ($($args:tt)*), (,$($acc:tt)*), ($(,)*)) => { -// match_iter_typed!(@collect, ($($vars)*), ($($args)*), ($($acc)*), ()) -// }; -// (@collect, ($iter:expr, $body:expr, $callback:ident, $error:ident), ($($args:tt)*), ($($acc:tt)*), ($(,)*)) => { -// { -// let res = iter_patterns::destructure_iter!($iter; [$($acc)*] => { -// match_iter_typed!(@callback, $callback, $iter, $($args)*); -// $body -// }); -// res.ok_or(IterMatchError::NoMatchFound) -// } -// }; - -// // Pass the matches through the callback -// (@callback, $callback:ident, $iter:expr, $x:ident : $ty:ident $($rest:tt)*) => { -// let $x = $callback!(@type_callback, $ty, $x); -// #[allow(unused_mut)] -// let mut $x = match $x { -// Ok(x) => x, -// Err(e) => break Err(IterMatchError::Other(e)), -// }; -// match_iter_typed!(@callback, $callback, $iter $($rest)*); -// }; -// (@callback, $callback: ident, $iter:expr, $x:ident.. : $ty:ident $($rest:tt)*) => { -// let $x = $x.map(|x| $callback!(@type_callback, $ty, x)).collect(); -// let $x: Vec<_> = match $x { -// Ok(x) => x, -// Err(e) => break Err(IterMatchError::Other(e)), -// }; -// #[allow(unused_mut)] -// let mut $x = $x.into_iter(); -// match_iter_typed!(@callback, $callback, $iter $($rest)*); -// }; -// (@callback, $callback:ident, $iter:expr $(,)*) => {}; - -// ($callback:ident; $iter:expr; ($($args:tt)*) => $body:expr) => { -// { -// #[allow(unused_mut)] -// let mut iter = $iter; -// let res: Result<_, IterMatchError<_>> = loop { -// break match_iter_typed!(@collect, -// (iter, $body, $callback, last_error), -// ($($args)*), (), ($($args)*,) -// ) -// }; -// res -// } -// }; -// } - -// macro_rules! match_iter_branching { -// (@noclone, $callback:ident; $arg:expr; $( $submac:ident!($($args:tt)*) => $body:expr ),* $(,)*) => { -// { -// #[allow(unused_assignments)] -// let mut last_error = IterMatchError::NoMatchFound; -// // Not a real loop; used for error handling -// // Would use loop labels but they create warnings -// #[allow(unreachable_code)] -// loop { -// $( -// let matched: Result<_, IterMatchError<_>> = -// $callback!(@branch_callback, $submac, $arg; ($($args)*) => $body); -// #[allow(unused_assignments)] -// match matched { -// Ok(v) => break Ok(v), -// Err(e) => last_error = e, -// }; -// )* -// break Err(last_error); -// } -// } -// }; -// ($callback:ident; $iter:expr; $($args:tt)*) => { -// { -// #[allow(unused_mut)] -// let mut iter = $iter; -// match_iter_branching!(@noclone, $callback; iter.clone(); $($args)*) -// } -// }; -// } - macro_rules! match_pair { - // // (@type_callback, parse_any_fast, $x:expr) => { - // // ParseWrapped::parse_any_fast($x).map(Box::new) - // // }; - // (@type_callback, $ty:ident, $x:expr) => { - // ParseUnwrapped::$ty($x) - // // ParseWrapped::$ty($x).map(|x| x.$ty()) - // // match ParseWrapped::parse_any($x) { - // // match parse_any_fast($x.clone()) { - // // // match ParseWrapped::$ty($x.clone()) { - // // Ok(ParsedValue::$ty(x)) => Ok(x), - // // // _ => ParseUnwrapped::$ty($x), - // // _ => Err(custom_parse_error(&$x, format!("Total failure"))), - // // } - // }; - // (@branch_callback, children, $pair:expr; $($args:tt)*) => { - // { - // #[allow(unused_mut)] - // let mut pairs = $pair.clone().into_inner(); - // match_iter_typed!(match_pair; pairs; $($args)*) - // } - // }; - // (@branch_callback, raw_pair, $pair:expr; ($x:ident) => $body:expr) => { - // { - // let $x = $pair.clone(); - // Ok($body) - // } - // }; - // (@branch_callback, captured_str, $pair:expr; ($x:ident) => $body:expr) => { - // { - // let $x = $pair.as_str(); - // Ok($body) - // } - // }; - (@make_child_match, $pair:expr, ($($outer_acc:tt)*), ($($acc:tt)*), ($(,)* $x:ident : $ty:ident $($rest_of_match:tt)*) => $body:expr, $($rest:tt)*) => { match_pair!(@make_child_match, $pair, ($($outer_acc)*), ($($acc)*, ParsedValue::$ty($x)), ($($rest_of_match)*) => $body, $($rest)*) }; @@ -220,9 +90,6 @@ macro_rules! match_pair { (@make_matches, $pair:expr, ($($acc:tt)*), children!($($args:tt)*) => $body:expr, $($rest:tt)*) => { match_pair!(@make_child_match, $pair, ($($acc)*), (), ($($args)*) => $body, $($rest)*) - // (@make_matches, $pair:expr, ($($acc:tt)*), children!($($x:ident : $ty:ident),*) => $body:expr, $($rest:tt)*) => { - // match_pair!(@make_child_match, $pair, ($($acc)*), ($(, ParsedValue::$ty($x))*), () => $body, $($rest)*) - // match_pair!(@make_matches, $pair, ([$(ParsedValue::$ty($x)),*] => { $body }, $($acc)*), $($rest)*) }; (@make_matches, $pair:expr, ($($acc:tt)*), raw_pair!($x:ident) => $body:expr, $($rest:tt)*) => { match_pair!(@make_matches, $pair, ([..] => { @@ -249,27 +116,8 @@ macro_rules! match_pair { } }; - // ($pair:expr; $( children!($($x:ident : $ty:ident),*) => $body:expr ),* $(,)*) => { - // match_pair!(@make_matches, $pair, (), $( children!($($x : $ty),*) => $body ),* ,) - // ($pair:expr; $( children!($($args:tt)*) => $body:expr ),* $(,)*) => { - // match_pair!(@make_matches, $pair, (), $( children!($($args)*) => $body ),* ,) ($pair:expr; $( $submac:ident!($($args:tt)*) => $body:expr ),* $(,)*) => { match_pair!(@make_matches, $pair, (), $( $submac!($($args)*) => $body ),* ,) - // ($pair:expr; $($args:tt)*) => { - // match_pair!(@make_matches, $pair, (), $($args)*) - // { - // let pair = $pair; - // let rule = pair.as_rule(); - // let err = custom_parse_error(&pair, "No match found".to_owned()); - // let parsed: Vec<_> = pair.into_inner().map(ParseWrapped::parse_any_fast).collect::>()?; - // #[allow(unreachable_code)] - // iter_patterns::match_vec!(parsed; - // $( - // [$(ParsedValue::$ty($x)),*] => { $body }, - // )* - // [x..] => panic!("Unexpected children while parsing rule '{:?}': {:?}", rule, x.collect::>()), - // ).ok_or(err) - // } }; ($pair:expr; $($args:tt)*) => { { @@ -285,11 +133,23 @@ macro_rules! match_pair { macro_rules! make_parser { (@branch_rules, $pair:expr, ($($acc:tt)*), rule!( $name:ident<$o:ty>; $($args:tt)* ); $($rest:tt)*) => ( - make_parser!(@branch_rules, $pair, ($($acc)* Rule::$name => ParseWrapped::$name($pair),), $($rest)*) + make_parser!(@branch_rules, $pair, ($($acc)* Rule::$name => { + ParseWrapped::$name($pair) + },), $($rest)*) ); - (@branch_rules, $pair:expr, ($($acc:tt)*), rule_group!( $name:ident<$o:ty>; $($ty:ident),* ); $($rest:tt)*) => ( - make_parser!(@branch_rules, $pair, ($($acc)* $( Rule::$ty => ParseUnwrapped::$ty($pair).map(ParsedValue::$name),)* ), $($rest)*) + (@branch_rules, $pair:expr, ($($acc:tt)*), rule_in_group!( $name:ident<$o:ty>; $group:ident; $($args:tt)* ); $($rest:tt)*) => ( + make_parser!(@branch_rules, $pair, ($($acc)* Rule::$name => { + ParseWrapped::$name($pair).map(|x| x.$name()).map(ParsedValue::$group) + },), $($rest)*) ); + (@branch_rules, $pair:expr, ($($acc:tt)*), binop!( $name:ident<$o:ty>; $op:ident ); $($rest:tt)*) => ( + make_parser!(@branch_rules, $pair, ($($acc)* Rule::$name => { + parse_binop($pair, BinOp::$op).map(ParsedValue::expression) + },), $($rest)*) + ); + // (@branch_rules, $pair:expr, ($($acc:tt)*), rule_group!( $name:ident<$o:ty>; $($ty:ident),* ); $($rest:tt)*) => ( + // make_parser!(@branch_rules, $pair, ($($acc)* $( Rule::$ty => ParseUnwrapped::$ty($pair).map(ParsedValue::$name),)* ), $($rest)*) + // ); (@branch_rules, $pair:expr, ($($acc:tt)*), $submac:ident!( $name:ident<$o:ty>; $($args:tt)* ); $($rest:tt)*) => ( make_parser!(@branch_rules, $pair, ($($acc)*), $($rest)*) ); @@ -298,7 +158,6 @@ macro_rules! make_parser { match $pair.as_rule() { $($acc)* r => Err(custom_parse_error(&$pair, format!("parse_any_fast: Unexpected {:?}", r))), - // [x..] => panic!("{:?}", x.collect::>()), } ); ($( $submac:ident!( $name:ident<$o:ty>; $($args:tt)* ); )*) => ( @@ -330,7 +189,6 @@ macro_rules! make_parser { #[derive(Debug)] enum ParsedValue<'a> { $( $name($o), )* - // parse_any(Box>), } impl<'a> ParsedValue<'a> { @@ -343,26 +201,8 @@ macro_rules! make_parser { } } )* - // #[allow(non_snake_case, dead_code)] - // fn parse_any(self) -> Box> { - // match self { - // ParsedValue::parse_any(x) => x, - // x => Box::new(x), - // } - // } - // #[allow(non_snake_case, dead_code)] - // fn parse_any_fast(self) -> Box> { - // self.parse_any() - // } } - // named!(parse_any>>; - // // self!(x: parse_any_fast) => x, - // $( - // self!(x: $name) => Box::new(ParsedValue::$name(x)), - // )* - // ); - impl ParseWrapped { #[allow(non_snake_case, dead_code)] fn parse_any_fast(pair: Pair) -> ParseResult { @@ -412,46 +252,23 @@ macro_rules! make_pest_parse_function { ); } -// macro_rules! named { -// ($name:ident<$o:ty>; $($args:tt)*) => ( -// make_pest_parse_function!($name<$o>; match_pair!( $($args)* )); -// ); -// } - macro_rules! rule { ($name:ident<$o:ty>; $($args:tt)*) => ( make_pest_parse_function!($name<$o>; match_pair!( $($args)* )); - // make_pest_parse_function!($name<$o>; match_rule!( - // Rule::$name => match_pair!( $($args)* ), - // )); ); } -macro_rules! rule_group { - ($name:ident<$o:ty>; $($ty:ident),*) => ( - // make_pest_parse_function!($name<$o>; match_rule!( - // $( - // Rule::$ty => match_pair!(raw_pair!(p) => ParseUnwrapped::$ty(p)?), - // )* - // )); +macro_rules! rule_in_group { + ($name:ident<$o:ty>; $group:ident; $($args:tt)*) => ( + make_pest_parse_function!($name<$o>; match_pair!( $($args)* )); ); } -// macro_rules! match_rule { -// ($pair:expr; $($pat:pat => $submac:ident!( $($args:tt)* ),)*) => { -// { -// #[allow(unreachable_patterns)] -// match $pair.as_rule() { -// $( -// $pat => $submac!($pair; $($args)*), -// )* -// r => Err(custom_parse_error(&$pair, format!("Unexpected {:?}", r))), -// } -// } -// }; -// } - -// List of rules that can be shortcutted as implemented in binop!() +macro_rules! rule_group { + ($name:ident<$o:ty>; $($ty:ident),*) => (); +} + +// List of rules that can be shortcutted as implemented in parse_binop fn can_be_shortcutted(rule: Rule) -> bool { use Rule::*; match rule { @@ -474,42 +291,43 @@ fn can_be_shortcutted(rule: Rule) -> bool { } } +fn parse_binop(pair: Pair, o: BinOp) -> ParseResult { + // This all could be a trivial fold, but to avoid stack explosion + // we try to cut down on the recursion level here, by consuming + // chains of blah_expression > ... > blih_expression in one go. + let mut pair = pair; + let mut pairs = pair.into_inner(); + let first = pairs.next().unwrap(); + let rest: Vec<_> = pairs.map(ParseUnwrapped::expression).collect::>()?; + if !rest.is_empty() { + // If there is more than one subexpression, handle it normally + let first = ParseUnwrapped::expression(first)?; + Ok(rest.into_iter().fold(first, |acc, e| bx(Expr::BinOp(o, acc, e)))) + } else { + // Otherwise, consume short-cuttable rules as long as they contain only one subexpression. + // println!("short-cutting {}", debug_pair(pair.clone())); + pair = first; + while can_be_shortcutted(pair.as_rule()) { + let mut pairs = pair.clone().into_inner(); + let first = pairs.next().unwrap(); + let rest: Vec<_> = pairs.collect(); + if !rest.is_empty() { + break; + } + pair = first; + } + // println!("short-cutted {}", debug_pair(pair.clone())); + // println!(); + Ok(ParseUnwrapped::expression(pair)?) + } +} + macro_rules! binop { ($rule:ident<$ty:ty>; $op:ident) => { rule!($rule<$ty>; raw_pair!(pair) => { - // This all could be a trivial fold, but to avoid stack explosion - // we try to cut down on the recursion level here, by consuming - // chains of blah_expression > ... > blih_expression in one go. - let mut pair = pair; - let mut pairs = pair.into_inner(); - let first = pairs.next().unwrap(); - let rest: Vec<_> = pairs.map(ParseUnwrapped::expression).collect::>()?; - if !rest.is_empty() { - // If there is more than one subexpression, handle it normally - let first = ParseUnwrapped::expression(first)?; - rest.into_iter().fold(first, |acc, e| bx(Expr::BinOp(BinOp::$op, acc, e))) - } else { - // Otherwise, consume short-cuttable rules as long as they contain only one subexpression. - // println!("short-cutting {}", debug_pair(pair.clone())); - pair = first; - while can_be_shortcutted(pair.as_rule()) { - let mut pairs = pair.clone().into_inner(); - let first = pairs.next().unwrap(); - let rest: Vec<_> = pairs.collect(); - if !rest.is_empty() { - break; - } - pair = first; - } - // println!("short-cutted {}", debug_pair(pair.clone())); - // println!(); - ParseUnwrapped::expression(pair)? - } + parse_binop(pair, BinOp::$op)? } - // children!(first: expression, rest..: expression) => { - // rest.fold(first, |acc, e| bx(Expr::BinOp(BinOp::$op, acc, e))) - // } ); }; } @@ -517,12 +335,6 @@ macro_rules! binop { make_parser! { rule!(EOI<()>; children!() => ()); -// named!(str<&'a str>; captured_str!(s) => s.trim()); - -// named!(raw_str<&'a str>; captured_str!(s) => s); - -// named!(label