diff options
author | Nadrieril Feneanar | 2019-08-15 22:03:27 +0200 |
---|---|---|
committer | GitHub | 2019-08-15 22:03:27 +0200 |
commit | b8234f63c27eb83ff54f719bf51bf3c969c36a3b (patch) | |
tree | b04f1b2bec18ad5f24a1619f8e4b6c6bd491b831 | |
parent | a9804009f405be7e8a89e301f280ee9d51b57e5e (diff) | |
parent | ffb5a4c26960960f526719d9b333df80119fef3c (diff) |
Merge pull request #103 from Nadrieril/precedence-climbing
Use precedence climbing to parse expressions with operators
Diffstat (limited to '')
-rw-r--r-- | dhall/build.rs | 5 | ||||
-rw-r--r-- | dhall_generated_parser/build.rs | 31 | ||||
-rw-r--r-- | dhall_generated_parser/src/dhall.pest.visibility | 10 | ||||
-rw-r--r-- | dhall_syntax/src/lib.rs | 1 | ||||
-rw-r--r-- | dhall_syntax/src/parser.rs | 381 |
5 files changed, 229 insertions, 199 deletions
diff --git a/dhall/build.rs b/dhall/build.rs index cbda288..2305208 100644 --- a/dhall/build.rs +++ b/dhall/build.rs @@ -91,8 +91,9 @@ fn main() -> std::io::Result<()> { let mut file = File::create(parser_tests_path)?; make_test_module(&mut file, "parse", "parser/", "Parser", |path| { - // Too slow in debug mode - path == "success/largeExpression" + false + // Too slow in debug mode + || path == "success/largeExpression" // TODO: projection by expression || path == "success/recordProjectionByExpression" || path == "success/RecordProjectionByType" diff --git a/dhall_generated_parser/build.rs b/dhall_generated_parser/build.rs index 070aa9c..c562fad 100644 --- a/dhall_generated_parser/build.rs +++ b/dhall_generated_parser/build.rs @@ -51,6 +51,37 @@ fn main() -> std::io::Result<()> { }}" )?; + // Setup grammar for precedence climbing + rules.remove("operator_expression"); + writeln!(&mut file, r##" + import_alt = {{ "?" ~ whsp1 }} + bool_or = {{ "||" }} + natural_plus = {{ "+" ~ whsp1 }} + text_append = {{ "++" }} + list_append = {{ "#" }} + bool_and = {{ "&&" }} + natural_times = {{ "*" }} + bool_eq = {{ "==" }} + bool_ne = {{ "!=" }} + + operator = _{{ + equivalent | + bool_ne | + bool_eq | + natural_times | + combine_types | + prefer | + combine | + bool_and | + list_append | + text_append | + natural_plus | + bool_or | + import_alt + }} + operator_expression = {{ application_expression ~ (whsp ~ operator ~ whsp ~ application_expression)* }} + "##)?; + writeln!( &mut file, "final_expression = ${{ SOI ~ complete_expression ~ EOI }}" diff --git a/dhall_generated_parser/src/dhall.pest.visibility b/dhall_generated_parser/src/dhall.pest.visibility index 6b4c974..6fa5e01 100644 --- a/dhall_generated_parser/src/dhall.pest.visibility +++ b/dhall_generated_parser/src/dhall.pest.visibility @@ -84,10 +84,10 @@ Location # Optional_fold # Optional_build # Text_show -# combine -# combine_types -# equivalent -# prefer +combine +combine_types +equivalent +prefer lambda forall arrow @@ -144,7 +144,7 @@ expression annotated_expression let_binding empty_list_literal -# operator_expression +operator_expression import_alt_expression or_expression plus_expression diff --git a/dhall_syntax/src/lib.rs b/dhall_syntax/src/lib.rs index 193c6ea..8406595 100644 --- a/dhall_syntax/src/lib.rs +++ b/dhall_syntax/src/lib.rs @@ -1,5 +1,6 @@ #![feature(trace_macros)] #![feature(slice_patterns)] +#![feature(try_blocks)] #![allow( clippy::many_single_char_names, clippy::should_implement_trait, diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs index 8ecd0ab..d467970 100644 --- a/dhall_syntax/src/parser.rs +++ b/dhall_syntax/src/parser.rs @@ -1,7 +1,10 @@ use itertools::Itertools; use pest::iterators::Pair; +use pest::prec_climber as pcl; +use pest::prec_climber::PrecClimber; use pest::Parser; use std::borrow::Cow; +use std::collections::HashMap; use std::rc::Rc; use dhall_generated_parser::{DhallParser, Rule}; @@ -124,61 +127,117 @@ macro_rules! make_parser { (@filter, token_rule) => (true); (@filter, rule_group) => (false); - (@body, - ($($things:tt)*), - rule!( $name:ident<$o:ty>; $($args:tt)* ) - ) => ( - make_parser!(@body, - ($($things)*), - rule!( $name<$o> as $name; $($args)* ) + (@construct_climber, + ($map:expr), + rule!( + $name:ident<$o:ty> + as $group:ident; + prec_climb!( $climber:expr, $($_rest:tt)* ) ) - ); + ) => ({ + $map.insert(Rule::$name, $climber) + }); + (@construct_climber, ($($things:tt)*), $($args:tt)*) => (()); + (@body, - ($_input:expr, $pair:expr, $_children:expr), + ($climbers:expr, $input:expr, $pair:expr), rule!( $name:ident<$o:ty> as $group:ident; + $span:ident; captured_str!($x:pat) => $body:expr ) ) => ({ - let $x = $pair.as_str(); - let res: $o = $body; - Ok(ParsedValue::$group(res)) + let res: Result<_, String> = try { + let $span = Span::make($input, $pair.as_span()); + let $x = $pair.as_str(); + ParsedValue::$group($body) + }; + res.map_err(|msg| custom_parse_error(&$pair, msg)) }); (@body, - ($_input:expr, $_pair:expr, $children:expr), + ($climbers:expr, $input:expr, $pair:expr), rule!( $name:ident<$o:ty> as $group:ident; + $span:ident; children!( $( [$($args:tt)*] => $body:expr ),* $(,)* ) ) ) => ({ - #[allow(unused_imports)] - use ParsedValue::*; + let children: Vec<_> = $pair + .clone() + .into_inner() + .map(|p| parse_any($climbers, $input.clone(), p)) + .collect::<Result<_, _>>()?; + #[allow(unreachable_code)] - let res: $o = improved_slice_patterns::match_vec!($children; - $( [$($args)*] => $body, )* - [x..] => Err( - format!("Unexpected children: {:?}", x.collect::<Vec<_>>()) - )?, - ).map_err(|_| -> String { unreachable!() })?; + let res: Result<_, String> = try { + #[allow(unused_imports)] + use ParsedValue::*; + let $span = Span::make($input, $pair.as_span()); + + improved_slice_patterns::match_vec!(children; + $( [$($args)*] => $body, )* + [x..] => Err( + format!("Unexpected children: {:?}", x.collect::<Vec<_>>()) + )?, + ).map_err(|_| -> String { unreachable!() })? + }; + + let res = res.map_err(|msg| custom_parse_error(&$pair, msg))?; Ok(ParsedValue::$group(res)) }); (@body, - ($input:expr, $pair:expr, $children:expr), + ($climbers:expr, $input:expr, $pair:expr), + rule!( + $name:ident<$o:ty> + as $group:ident; + prec_climb!( $_climber:expr, $args:pat => $body:expr $(,)* ) + ) + ) => ({ + let climber = $climbers.get(&Rule::$name).unwrap(); + climber.climb( + $pair.clone().into_inner(), + |p| parse_any($climbers, $input.clone(), p), + |l, op, r| { + let (l, r) = (l?, r?); + let res: Result<_, String> = try { + #[allow(unused_imports)] + use ParsedValue::*; + match (l, op, r) { + $args => ParsedValue::$group($body), + children => Err( + format!("Unexpected children: {:?}", children) + )?, + } + }; + res.map_err(|msg| custom_parse_error(&$pair, msg)) + }, + ) + }); + (@body, + ($($things:tt)*), + rule!( $name:ident<$o:ty>; $($args:tt)* ) + ) => ( + make_parser!(@body, + ($($things)*), + rule!( $name<$o> as $name; $($args)* ) + ) + ); + (@body, + ($($things:tt)*), rule!( $name:ident<$o:ty> as $group:ident; - $span:ident; $($args:tt)* ) ) => ({ - let $span = Span::make($input, $pair.as_span()); make_parser!(@body, - ($input, $pair, $children), + ($($things)*), rule!( $name<$o> as $group; + _span; $($args)* ) ) @@ -200,67 +259,74 @@ macro_rules! make_parser { $( $name($o), )* } + fn construct_precclimbers() -> HashMap<Rule, PrecClimber<Rule>> { + let mut map = HashMap::new(); + $( + make_parser!(@construct_climber, (map), + $submac!( $name<$o> $($args)* )); + )* + map + } + + struct Parsers; + + impl Parsers { + $( + #[allow(non_snake_case, unused_variables)] + fn $name<'a>( + climbers: &HashMap<Rule, PrecClimber<Rule>>, + input: Rc<str>, + pair: Pair<'a, Rule>, + ) -> ParseResult<ParsedValue<'a>> { + make_parser!(@body, (climbers, input, pair), + $submac!( $name<$o> $($args)* )) + } + )* + } + fn parse_any<'a>( + climbers: &HashMap<Rule, PrecClimber<Rule>>, input: Rc<str>, - pair: Pair<'a, Rule>, - children: Vec<ParsedValue<'a>>, - ) -> Result<ParsedValue<'a>, String> { + mut pair: Pair<'a, Rule>, + ) -> ParseResult<ParsedValue<'a>> { + // Avoid parsing while the pair has exactly one child that can be returned as-is + loop { + if can_be_shortcutted(pair.as_rule()) { + let mut i = pair.clone().into_inner(); + let first = i.next(); + let second = i.next(); + match (first, second) { + // If pair has exactly one child, just go on parsing that child + (Some(p), None) => { + pair = p; + continue; + } + // Otherwise parse normally + _ => break, + } + } + break; + } + match pair.as_rule() { $( make_parser!(@pattern, $submac, $name) if make_parser!(@filter, $submac) - => make_parser!(@body, (input, pair, children), - $submac!( $name<$o> $($args)* )) + => Parsers::$name(climbers, input, pair) , )* - r => Err(format!("Unexpected {:?}", r)), + r => Err(custom_parse_error(&pair, format!("Unexpected {:?}", r))), } } ); } -// Non-recursive implementation to avoid stack overflows fn do_parse<'a>( input: Rc<str>, - initial_pair: Pair<'a, Rule>, + pair: Pair<'a, Rule>, ) -> ParseResult<ParsedValue<'a>> { - enum StackFrame<'a> { - Unprocessed(Pair<'a, Rule>), - Processed(Pair<'a, Rule>, usize), - } - use StackFrame::*; - let mut pairs_stack: Vec<StackFrame> = - vec![Unprocessed(initial_pair.clone())]; - let mut values_stack: Vec<ParsedValue> = vec![]; - while let Some(p) = pairs_stack.pop() { - match p { - Unprocessed(mut pair) => loop { - let mut pairs: Vec<_> = pair.clone().into_inner().collect(); - let n_children = pairs.len(); - if n_children == 1 && can_be_shortcutted(pair.as_rule()) { - pair = pairs.pop().unwrap(); - continue; - } else { - pairs_stack.push(Processed(pair, n_children)); - pairs_stack - .extend(pairs.into_iter().map(StackFrame::Unprocessed)); - break; - } - }, - Processed(pair, n) => { - let mut children: Vec<_> = - values_stack.split_off(values_stack.len() - n); - children.reverse(); - let val = match parse_any(input.clone(), pair.clone(), children) - { - Ok(v) => v, - Err(msg) => Err(custom_parse_error(&pair, msg))?, - }; - values_stack.push(val); - } - } - } - Ok(values_stack.pop().unwrap()) + let climbers = construct_precclimbers(); + parse_any(&climbers, input, pair) } // List of rules that can be shortcutted if they have a single child @@ -268,19 +334,7 @@ fn can_be_shortcutted(rule: Rule) -> bool { use Rule::*; match rule { expression - | import_alt_expression - | or_expression - | plus_expression - | text_append_expression - | list_append_expression - | and_expression - | combine_expression - | prefer_expression - | combine_types_expression - | times_expression - | equal_expression - | not_equal_expression - | equivalent_expression + | operator_expression | application_expression | first_application_expression | selector_expression @@ -765,98 +819,6 @@ make_parser! { token_rule!(List<()>); token_rule!(Optional<()>); - rule!(import_alt_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::ImportAlt; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(or_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::BoolOr; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(plus_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::NaturalPlus; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(text_append_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::TextAppend; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(list_append_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::ListAppend; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(and_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::BoolAnd; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(combine_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::RecursiveRecordMerge; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(prefer_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::RightBiasedRecordMerge; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(combine_types_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::RecursiveRecordTypeMerge; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(times_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::NaturalTimes; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(equal_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::BoolEQ; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(not_equal_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::BoolNE; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(equivalent_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::Equivalence; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(annotated_expression<ParsedSubExpr> as expression; span; children!( [expression(e)] => e, [expression(e), expression(annot)] => { @@ -864,6 +826,58 @@ make_parser! { }, )); + rule!(operator_expression<ParsedSubExpr> as expression; prec_climb!( + { + use Rule::*; + // In order of precedence + let operators = vec![ + import_alt, + bool_or, + natural_plus, + text_append, + list_append, + bool_and, + combine, + prefer, + combine_types, + natural_times, + bool_eq, + bool_ne, + equivalent, + ]; + PrecClimber::new( + operators + .into_iter() + .map(|op| pcl::Operator::new(op, pcl::Assoc::Left)) + .collect(), + ) + }, + (expression(l), op, expression(r)) => { + use crate::BinOp::*; + use Rule::*; + let op = match op.as_rule() { + import_alt => ImportAlt, + bool_or => BoolOr, + natural_plus => NaturalPlus, + text_append => TextAppend, + list_append => ListAppend, + bool_and => BoolAnd, + combine => RecursiveRecordMerge, + prefer => RightBiasedRecordMerge, + combine_types => RecursiveRecordTypeMerge, + natural_times => NaturalTimes, + bool_eq => BoolEQ, + bool_ne => BoolNE, + equivalent => Equivalence, + r => Err( + format!("Rule {:?} isn't an operator", r), + )?, + }; + + unspanned(BinOp(op, l, r)) + } + )); + token_rule!(Some_<()>); token_rule!(toMap<()>); @@ -996,21 +1010,4 @@ pub fn parse_expr(s: &str) -> ParseResult<ParsedSubExpr> { ParsedValue::expression(e) => Ok(e), _ => unreachable!(), } - // Ok(BoolLit(false)) -} - -#[test] -fn test_parse() { - // let expr = r#"{ x = "foo", y = 4 }.x"#; - // let expr = r#"(1 + 2) * 3"#; - let expr = r#"(1) + 3 * 5"#; - println!("{:?}", parse_expr(expr)); - match parse_expr(expr) { - Err(e) => { - println!("{:?}", e); - println!("{}", e); - } - ok => println!("{:?}", ok), - }; - // assert!(false); } |