diff options
author | Nadrieril | 2019-08-15 15:33:53 +0200 |
---|---|---|
committer | Nadrieril | 2019-08-15 15:41:19 +0200 |
commit | e4bdadabeda8ff8dbae0376f9d1c25478b8c8821 (patch) | |
tree | f6f788553dde3db1def383bf495f38c2e39dc476 | |
parent | a9804009f405be7e8a89e301f280ee9d51b57e5e (diff) |
Use precedence climbing to parse expressions with operators
This speeds up parsing around 30%
Diffstat (limited to '')
-rw-r--r-- | dhall/build.rs | 5 | ||||
-rw-r--r-- | dhall_generated_parser/build.rs | 31 | ||||
-rw-r--r-- | dhall_generated_parser/src/dhall.pest.visibility | 10 | ||||
-rw-r--r-- | dhall_syntax/src/parser.rs | 227 |
4 files changed, 147 insertions, 126 deletions
diff --git a/dhall/build.rs b/dhall/build.rs index cbda288..2305208 100644 --- a/dhall/build.rs +++ b/dhall/build.rs @@ -91,8 +91,9 @@ fn main() -> std::io::Result<()> { let mut file = File::create(parser_tests_path)?; make_test_module(&mut file, "parse", "parser/", "Parser", |path| { - // Too slow in debug mode - path == "success/largeExpression" + false + // Too slow in debug mode + || path == "success/largeExpression" // TODO: projection by expression || path == "success/recordProjectionByExpression" || path == "success/RecordProjectionByType" diff --git a/dhall_generated_parser/build.rs b/dhall_generated_parser/build.rs index 070aa9c..c562fad 100644 --- a/dhall_generated_parser/build.rs +++ b/dhall_generated_parser/build.rs @@ -51,6 +51,37 @@ fn main() -> std::io::Result<()> { }}" )?; + // Setup grammar for precedence climbing + rules.remove("operator_expression"); + writeln!(&mut file, r##" + import_alt = {{ "?" ~ whsp1 }} + bool_or = {{ "||" }} + natural_plus = {{ "+" ~ whsp1 }} + text_append = {{ "++" }} + list_append = {{ "#" }} + bool_and = {{ "&&" }} + natural_times = {{ "*" }} + bool_eq = {{ "==" }} + bool_ne = {{ "!=" }} + + operator = _{{ + equivalent | + bool_ne | + bool_eq | + natural_times | + combine_types | + prefer | + combine | + bool_and | + list_append | + text_append | + natural_plus | + bool_or | + import_alt + }} + operator_expression = {{ application_expression ~ (whsp ~ operator ~ whsp ~ application_expression)* }} + "##)?; + writeln!( &mut file, "final_expression = ${{ SOI ~ complete_expression ~ EOI }}" diff --git a/dhall_generated_parser/src/dhall.pest.visibility b/dhall_generated_parser/src/dhall.pest.visibility index 6b4c974..6fa5e01 100644 --- a/dhall_generated_parser/src/dhall.pest.visibility +++ b/dhall_generated_parser/src/dhall.pest.visibility @@ -84,10 +84,10 @@ Location # Optional_fold # Optional_build # Text_show -# combine -# combine_types -# equivalent -# prefer +combine +combine_types +equivalent +prefer lambda forall arrow @@ -144,7 +144,7 @@ expression annotated_expression let_binding empty_list_literal -# operator_expression +operator_expression import_alt_expression or_expression plus_expression diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs index 8ecd0ab..5b213d1 100644 --- a/dhall_syntax/src/parser.rs +++ b/dhall_syntax/src/parser.rs @@ -1,5 +1,6 @@ use itertools::Itertools; use pest::iterators::Pair; +use pest::prec_climber as pcl; use pest::Parser; use std::borrow::Cow; use std::rc::Rc; @@ -219,7 +220,7 @@ macro_rules! make_parser { ); } -// Non-recursive implementation to avoid stack overflows +// (Mostly) non-recursive implementation to avoid stack overflows fn do_parse<'a>( input: Rc<str>, initial_pair: Pair<'a, Rule>, @@ -232,30 +233,122 @@ fn do_parse<'a>( let mut pairs_stack: Vec<StackFrame> = vec![Unprocessed(initial_pair.clone())]; let mut values_stack: Vec<ParsedValue> = vec![]; + + let operators = { + use Rule::*; + // In order of precedence + vec![ + import_alt, + bool_or, + natural_plus, + text_append, + list_append, + bool_and, + combine, + prefer, + combine_types, + natural_times, + bool_eq, + bool_ne, + equivalent, + ] + }; + let rule_to_binop = { + use crate::BinOp::*; + use Rule::*; + |r| { + Some(match r { + import_alt => ImportAlt, + bool_or => BoolOr, + natural_plus => NaturalPlus, + text_append => TextAppend, + list_append => ListAppend, + bool_and => BoolAnd, + combine => RecursiveRecordMerge, + prefer => RightBiasedRecordMerge, + combine_types => RecursiveRecordTypeMerge, + natural_times => NaturalTimes, + bool_eq => BoolEQ, + bool_ne => BoolNE, + equivalent => Equivalence, + _ => return None, + }) + } + }; + let climber = pcl::PrecClimber::new( + operators + .into_iter() + .map(|op| pcl::Operator::new(op, pcl::Assoc::Left)) + .collect(), + ); + while let Some(p) = pairs_stack.pop() { match p { Unprocessed(mut pair) => loop { - let mut pairs: Vec<_> = pair.clone().into_inner().collect(); - let n_children = pairs.len(); - if n_children == 1 && can_be_shortcutted(pair.as_rule()) { - pair = pairs.pop().unwrap(); - continue; + if can_be_shortcutted(pair.as_rule()) { + let mut i = pair.clone().into_inner(); + let first = i.next(); + let second = i.next(); + match (first, second) { + // If pair has exactly one child, just go on parsing that child + (Some(p), None) => { + pair = p; + continue; + } + // Otherwise parse normally + _ => {} + } + } + + // Use precedence climbing to parse operator_expression + if pair.as_rule() == Rule::operator_expression { + let val = climber.climb( + pair.clone().into_inner(), + |p| do_parse(input.clone(), p), + |l, p, r| { + let o = match rule_to_binop(p.as_rule()) { + Some(o) => o, + None => Err(custom_parse_error( + &pair, + format!( + "Rule {:?} isn't an operator", + p.as_rule() + ), + ))?, + }; + use ParsedValue::expression; + match (l?, r?) { + (expression(l), expression(r)) => { + Ok(expression(unspanned(BinOp(o, l, r)))) + } + (l, r) => Err(custom_parse_error( + &pair, + format!( + "Unexpected children: {:?}", + [l, r] + ), + ))?, + } + }, + )?; + values_stack.push(val); + break; } else { - pairs_stack.push(Processed(pair, n_children)); - pairs_stack - .extend(pairs.into_iter().map(StackFrame::Unprocessed)); + let n_children = pair.clone().into_inner().count(); + pairs_stack.push(Processed(pair.clone(), n_children)); + // Push the unparsed children onto the stack + pairs_stack.extend(pair.into_inner().map(Unprocessed)); break; } }, Processed(pair, n) => { + // Take as many values from the values stack as that pair had children. This + // means we get exactly the children of the pair, but now they are parsed. let mut children: Vec<_> = values_stack.split_off(values_stack.len() - n); children.reverse(); - let val = match parse_any(input.clone(), pair.clone(), children) - { - Ok(v) => v, - Err(msg) => Err(custom_parse_error(&pair, msg))?, - }; + let val = parse_any(input.clone(), pair.clone(), children) + .map_err(|msg| custom_parse_error(&pair, msg))?; values_stack.push(val); } } @@ -268,19 +361,7 @@ fn can_be_shortcutted(rule: Rule) -> bool { use Rule::*; match rule { expression - | import_alt_expression - | or_expression - | plus_expression - | text_append_expression - | list_append_expression - | and_expression - | combine_expression - | prefer_expression - | combine_types_expression - | times_expression - | equal_expression - | not_equal_expression - | equivalent_expression + | operator_expression | application_expression | first_application_expression | selector_expression @@ -765,98 +846,6 @@ make_parser! { token_rule!(List<()>); token_rule!(Optional<()>); - rule!(import_alt_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::ImportAlt; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(or_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::BoolOr; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(plus_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::NaturalPlus; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(text_append_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::TextAppend; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(list_append_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::ListAppend; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(and_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::BoolAnd; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(combine_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::RecursiveRecordMerge; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(prefer_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::RightBiasedRecordMerge; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(combine_types_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::RecursiveRecordTypeMerge; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(times_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::NaturalTimes; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(equal_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::BoolEQ; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(not_equal_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::BoolNE; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(equivalent_expression<ParsedSubExpr> as expression; children!( - [expression(e)] => e, - [expression(first), expression(rest)..] => { - let o = crate::BinOp::Equivalence; - rest.fold(first, |acc, e| unspanned(BinOp(o, acc, e))) - }, - )); - rule!(annotated_expression<ParsedSubExpr> as expression; span; children!( [expression(e)] => e, [expression(e), expression(annot)] => { |