From 2f6f21c52e60c560eb4c5fff9441b7d20c8c1d9a Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Tue, 3 Sep 2019 23:01:55 +0200 Subject: Store Spans at every node when parsing --- dhall_syntax/src/core/expr.rs | 18 ++++++++++++---- dhall_syntax/src/parser.rs | 50 ++++++++++++++++++++++++++++++++----------- 2 files changed, 51 insertions(+), 17 deletions(-) diff --git a/dhall_syntax/src/core/expr.rs b/dhall_syntax/src/core/expr.rs index 2cb23c9..455e42a 100644 --- a/dhall_syntax/src/core/expr.rs +++ b/dhall_syntax/src/core/expr.rs @@ -37,6 +37,16 @@ impl Span { end: sp.end(), } } + /// Takes the union of the two spans. Assumes that the spans come from the same input. + /// This will also capture any input between the spans. + pub fn union(&self, other: &Span) -> Self { + use std::cmp::{max, min}; + Span { + input: self.input.clone(), + start: min(self.start, other.start), + end: max(self.start, other.start), + } + } } /// Double with bitwise equality @@ -324,8 +334,11 @@ impl Expr { pub fn as_mut(&mut self) -> &mut RawExpr { &mut self.0.as_mut().0 } + pub fn span(&self) -> Option<&Span> { + self.0.as_ref().1.as_ref() + } - pub fn new(x: RawExpr, n: Span) -> Self { + pub(crate) fn new(x: RawExpr, n: Span) -> Self { Expr(Box::new((x, Some(n)))) } @@ -387,9 +400,6 @@ pub fn rc(x: RawExpr) -> Expr { pub(crate) fn spanned(span: Span, x: RawExpr) -> Expr { Expr::new(x, span) } -pub(crate) fn unspanned(x: RawExpr) -> Expr { - Expr::from_expr_no_span(x) -} /// Add an isize to an usize /// Panics on over/underflow diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs index 3f961e8..4cce5ed 100644 --- a/dhall_syntax/src/parser.rs +++ b/dhall_syntax/src/parser.rs @@ -783,7 +783,12 @@ impl Parsers { [let_binding(bindings).., expression(final_expr)] => { bindings.rev().fold( final_expr, - |acc, x| unspanned(Let(x.0, x.1, x.2, acc)) + |acc, x| { + spanned( + acc.span().unwrap().union(&x.3), + Let(x.0, x.1, x.2, acc) + ) + } ) }, [forall(()), label(l), expression(typ), @@ -811,12 +816,12 @@ impl Parsers { fn let_binding( input: ParseInput, - ) -> ParseResult<(Label, Option>, Expr)> { + ) -> ParseResult<(Label, Option>, Expr, Span)> { Ok(parse_children!(input; [label(name), expression(annot), expression(expr)] => - (name, Some(annot), expr), + (name, Some(annot), expr, input.as_span()), [label(name), expression(expr)] => - (name, None, expr), + (name, None, expr, input.as_span()), )) } @@ -847,7 +852,10 @@ impl Parsers { r => Err(input.error(format!("Rule {:?} isn't an operator", r)))?, }; - Ok(unspanned(BinOp(op, l, r))) + Ok(spanned( + l.span().unwrap().union(r.span().unwrap()), + BinOp(op, l, r), + )) } fn Some_(_input: ParseInput) -> ParseResult<()> { @@ -861,7 +869,15 @@ impl Parsers { Ok(parse_children!(input; [expression(e)] => e, [expression(first), expression(rest)..] => { - rest.fold(first, |acc, e| unspanned(App(acc, e))) + rest.fold( + first, + |acc, e| { + spanned( + acc.span().unwrap().union(e.span().unwrap()), + App(acc, e) + ) + } + ) }, )) } @@ -892,20 +908,28 @@ impl Parsers { Ok(parse_children!(input; [expression(e)] => e, [expression(first), selector(rest)..] => { - rest.fold(first, |acc, e| unspanned(match e { - Either::Left(l) => Field(acc, l), - Either::Right(ls) => Projection(acc, ls), - })) + rest.fold( + first, + |acc, e| { + spanned( + acc.span().unwrap().union(&e.1), + match e.0 { + Either::Left(l) => Field(acc, l), + Either::Right(ls) => Projection(acc, ls), + } + ) + } + ) }, )) } fn selector( input: ParseInput, - ) -> ParseResult>> { + ) -> ParseResult<(Either>, Span)> { Ok(parse_children!(input; - [label(l)] => Either::Left(l), - [labels(ls)] => Either::Right(ls), + [label(l)] => (Either::Left(l), input.as_span()), + [labels(ls)] => (Either::Right(ls), input.as_span()), // [expression(_e)] => unimplemented!("selection by expression"), // TODO )) } -- cgit v1.2.3 From 0388d9858627693bab2433f134eb4ed1d6e9b164 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Fri, 6 Sep 2019 18:17:36 +0200 Subject: Make ParseInput independent from dhall-specific types --- dhall_proc_macros/src/make_parser.rs | 8 +- dhall_proc_macros/src/parse_children.rs | 6 +- dhall_syntax/src/core/expr.rs | 8 +- dhall_syntax/src/parser.rs | 508 ++++++++++++++++---------------- 4 files changed, 268 insertions(+), 262 deletions(-) diff --git a/dhall_proc_macros/src/make_parser.rs b/dhall_proc_macros/src/make_parser.rs index a17ab61..c0594a5 100644 --- a/dhall_proc_macros/src/make_parser.rs +++ b/dhall_proc_macros/src/make_parser.rs @@ -179,7 +179,7 @@ fn apply_special_attrs(f: &mut ParsedFn, rule_enum: &Ident) -> Result<()> { #function #climber.climb( - #input_arg.pair.clone().into_inner(), + #input_arg.as_pair().clone().into_inner(), |p| Self::#child_rule(#input_arg.with_pair(p)), |l, op, r| { #fn_name(#input_arg.clone(), l?, op, r?) @@ -207,9 +207,9 @@ fn apply_special_attrs(f: &mut ParsedFn, rule_enum: &Ident) -> Result<()> { let mut #input_arg = #input_arg; // While the current rule allows shortcutting, and there is a single child, and the // child can still be parsed by the current function, then skip to that child. - while ::allows_shortcut(#input_arg.as_rule()) { + while ::allows_shortcut(#input_arg.as_rule()) { if let Some(child) = #input_arg.single_child() { - if &::rule_alias(child.as_rule()) + if &::rule_alias(child.as_rule()) == stringify!(#fn_name) { #input_arg = child; continue; @@ -313,7 +313,7 @@ pub fn make_parser( let ty = &imp.self_ty; let (impl_generics, _, where_clause) = imp.generics.split_for_impl(); Ok(quote!( - impl #impl_generics PestConsumer for #ty #where_clause { + impl #impl_generics pest_consume::PestConsumer for #ty #where_clause { type Rule = #rule_enum; fn rule_alias(rule: Self::Rule) -> String { match rule { diff --git a/dhall_proc_macros/src/parse_children.rs b/dhall_proc_macros/src/parse_children.rs index a35c03f..d6474a7 100644 --- a/dhall_proc_macros/src/parse_children.rs +++ b/dhall_proc_macros/src/parse_children.rs @@ -186,11 +186,11 @@ pub fn parse_children( .collect::>>()?; Ok(quote!({ - let #i_children_rules: Vec<_> = #input_expr.pair + let #i_children_rules: Vec<_> = #input_expr.as_pair() .clone() .into_inner() .map(|p| p.as_rule()) - .map(::rule_alias) + .map(::rule_alias) .collect(); let #i_children_rules: Vec<&str> = #i_children_rules .iter() @@ -199,7 +199,7 @@ pub fn parse_children( #[allow(unused_mut)] let mut #i_inputs = #input_expr - .pair + .as_pair() .clone() .into_inner() .map(|p| #input_expr.with_pair(p)); diff --git a/dhall_syntax/src/core/expr.rs b/dhall_syntax/src/core/expr.rs index 455e42a..74b481f 100644 --- a/dhall_syntax/src/core/expr.rs +++ b/dhall_syntax/src/core/expr.rs @@ -334,8 +334,8 @@ impl Expr { pub fn as_mut(&mut self) -> &mut RawExpr { &mut self.0.as_mut().0 } - pub fn span(&self) -> Option<&Span> { - self.0.as_ref().1.as_ref() + pub fn span(&self) -> Option { + self.0.as_ref().1.clone() } pub(crate) fn new(x: RawExpr, n: Span) -> Self { @@ -397,10 +397,6 @@ pub fn rc(x: RawExpr) -> Expr { Expr::from_expr_no_span(x) } -pub(crate) fn spanned(span: Span, x: RawExpr) -> Expr { - Expr::new(x, span) -} - /// Add an isize to an usize /// Panics on over/underflow fn add_ui(u: usize, i: isize) -> Option { diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs index 4cce5ed..c8b3fe6 100644 --- a/dhall_syntax/src/parser.rs +++ b/dhall_syntax/src/parser.rs @@ -3,7 +3,6 @@ use pest::iterators::Pair; use pest::prec_climber as pcl; use pest::prec_climber::PrecClimber; use pest::Parser; -use std::borrow::Cow; use std::rc::Rc; use dhall_generated_parser::{DhallParser, Rule}; @@ -25,110 +24,133 @@ pub type ParseError = pest::error::Error; pub type ParseResult = Result; -#[derive(Debug, Clone)] -struct ParseInput<'input, Rule> -where - Rule: pest::RuleType, -{ - pair: Pair<'input, Rule>, - original_input_str: Rc, -} - -impl<'input> ParseInput<'input, Rule> { - fn error(&self, message: String) -> ParseError { - let message = format!( - "{} while matching on:\n{}", - message, - debug_pair(self.pair.clone()) - ); - let e = pest::error::ErrorVariant::CustomError { message }; - pest::error::Error::new_from_span(e, self.pair.as_span()) - } - fn parse(input_str: &'input str, rule: Rule) -> ParseResult { - let mut pairs = DhallParser::parse(rule, input_str)?; - // TODO: proper errors - let pair = pairs.next().unwrap(); - assert_eq!(pairs.next(), None); - Ok(ParseInput { - original_input_str: input_str.to_string().into(), - pair, - }) - } - fn with_pair(&self, new_pair: Pair<'input, Rule>) -> Self { - ParseInput { - pair: new_pair, - original_input_str: self.original_input_str.clone(), +pub mod pest_consume { + use pest::error::{Error, ErrorVariant}; + use pest::iterators::Pair; + use pest::Span; + + /// Carries a pest Pair alongside custom user data. + #[derive(Debug, Clone)] + pub struct ParseInput<'input, 'data, Rule, Data> + where + Rule: pest::RuleType, + { + pair: Pair<'input, Rule>, + user_data: &'data Data, + } + + impl<'input, 'data, Rule, Data> ParseInput<'input, 'data, Rule, Data> + where + Rule: pest::RuleType, + { + pub fn new(pair: Pair<'input, Rule>, user_data: &'data Data) -> Self { + ParseInput { pair, user_data } } - } - /// If the contained pair has exactly one child, return a new Self containing it. - fn single_child(&self) -> Option { - let mut children = self.pair.clone().into_inner(); - if let Some(child) = children.next() { - if children.next().is_none() { - return Some(self.with_pair(child)); + /// Create an error that points to the span of the input. + pub fn error(&self, message: String) -> Error { + let message = format!( + "{} while matching on:\n{}", + message, + debug_pair(self.pair.clone()) + ); + Error::new_from_span( + ErrorVariant::CustomError { message }, + self.as_span(), + ) + } + /// Reconstruct the input with a new pair, passing the user data along. + pub fn with_pair(&self, new_pair: Pair<'input, Rule>) -> Self { + ParseInput { + pair: new_pair, + user_data: self.user_data, } } - None - } - fn as_span(&self) -> Span { - Span::make(self.original_input_str.clone(), self.pair.as_span()) - } - fn as_str(&self) -> &'input str { - self.pair.as_str() - } - fn as_rule(&self) -> Rule { - self.pair.as_rule() - } -} + /// If the contained pair has exactly one child, return a new Self containing it. + pub fn single_child(&self) -> Option { + let mut children = self.pair.clone().into_inner(); + if let Some(child) = children.next() { + if children.next().is_none() { + return Some(self.with_pair(child)); + } + } + None + } -// Used by the macros. -trait PestConsumer { - type Rule: pest::RuleType; - fn rule_alias(rule: Self::Rule) -> String; - fn allows_shortcut(rule: Self::Rule) -> bool; -} + pub fn user_data(&self) -> &'data Data { + self.user_data + } + pub fn as_pair(&self) -> &Pair<'input, Rule> { + &self.pair + } + pub fn as_span(&self) -> Span<'input> { + self.pair.as_span() + } + pub fn as_str(&self) -> &'input str { + self.pair.as_str() + } + pub fn as_rule(&self) -> Rule { + self.pair.as_rule() + } + } -fn debug_pair(pair: Pair) -> String { - use std::fmt::Write; - let mut s = String::new(); - fn aux(s: &mut String, indent: usize, prefix: String, pair: Pair) { - let indent_str = "| ".repeat(indent); - let rule = pair.as_rule(); - let contents = pair.as_str(); - let mut inner = pair.into_inner(); - let mut first = true; - while let Some(p) = inner.next() { - if first { - first = false; - let last = inner.peek().is_none(); - if last && p.as_str() == contents { - let prefix = format!("{}{:?} > ", prefix, rule); - aux(s, indent, prefix, p); - continue; - } else { - writeln!( - s, - r#"{}{}{:?}: "{}""#, - indent_str, prefix, rule, contents - ) - .unwrap(); + /// Used by the macros. + pub trait PestConsumer { + type Rule: pest::RuleType; + fn rule_alias(rule: Self::Rule) -> String; + fn allows_shortcut(rule: Self::Rule) -> bool; + } + + /// Pretty-print a pair and its nested children. + fn debug_pair(pair: Pair) -> String { + use std::fmt::Write; + let mut s = String::new(); + fn aux( + s: &mut String, + indent: usize, + prefix: String, + pair: Pair, + ) { + let indent_str = "| ".repeat(indent); + let rule = pair.as_rule(); + let contents = pair.as_str(); + let mut inner = pair.into_inner(); + let mut first = true; + while let Some(p) = inner.next() { + if first { + first = false; + let last = inner.peek().is_none(); + if last && p.as_str() == contents { + let prefix = format!("{}{:?} > ", prefix, rule); + aux(s, indent, prefix, p); + continue; + } else { + writeln!( + s, + r#"{}{}{:?}: "{}""#, + indent_str, prefix, rule, contents + ) + .unwrap(); + } } + aux(s, indent + 1, "".into(), p); + } + if first { + writeln!( + s, + r#"{}{}{:?}: "{}""#, + indent_str, prefix, rule, contents + ) + .unwrap(); } - aux(s, indent + 1, "".into(), p); - } - if first { - writeln!( - s, - r#"{}{}{:?}: "{}""#, - indent_str, prefix, rule, contents - ) - .unwrap(); } + aux(&mut s, 0, "".into(), pair); + s } - aux(&mut s, 0, "".into(), pair); - s } +type ParseInput<'input, 'data> = + pest_consume::ParseInput<'input, 'data, Rule, Rc>; + #[derive(Debug)] enum Either { Left(A), @@ -173,6 +195,16 @@ impl crate::Builtin { } } +fn input_to_span(input: ParseInput) -> Span { + Span::make(input.user_data().clone(), input.as_pair().as_span()) +} +fn spanned(input: ParseInput, x: RawExpr) -> Expr { + Expr::new(x, input_to_span(input)) +} +fn spanned_union(span1: Span, span2: Span, x: RawExpr) -> Expr { + Expr::new(x, span1.union(&span2)) +} + // Trim the shared indent off of a vec of lines, as defined by the Dhall semantics of multiline // literals. fn trim_indent(lines: &mut Vec>) { @@ -246,21 +278,21 @@ struct Parsers; #[make_parser(Rule)] impl Parsers { - fn EOI(_input: ParseInput) -> ParseResult<()> { + fn EOI(_input: ParseInput) -> ParseResult<()> { Ok(()) } #[alias(label)] - fn simple_label(input: ParseInput) -> ParseResult