From 97d74f514bd8c5c4b96fb4f4071f4a93ac28572d Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Mon, 2 Sep 2019 18:59:30 +0200 Subject: Separate both parser proc_macros into their own files --- dhall_proc_macros/src/parse_children.rs | 205 ++++++++++++++++++++++++++++++++ 1 file changed, 205 insertions(+) create mode 100644 dhall_proc_macros/src/parse_children.rs (limited to 'dhall_proc_macros/src/parse_children.rs') diff --git a/dhall_proc_macros/src/parse_children.rs b/dhall_proc_macros/src/parse_children.rs new file mode 100644 index 0000000..ce6f66c --- /dev/null +++ b/dhall_proc_macros/src/parse_children.rs @@ -0,0 +1,205 @@ +use proc_macro2::{Span, TokenStream}; +use quote::quote; +use syn::parse::{Parse, ParseStream, Result}; +use syn::punctuated::Punctuated; +use syn::spanned::Spanned; +use syn::{bracketed, parenthesized, token, Error, Expr, Ident, Pat, Token}; + +#[derive(Debug, Clone)] +struct ChildrenBranch { + pattern_span: Span, + pattern: Punctuated, + body: Expr, +} + +#[derive(Debug, Clone)] +enum ChildrenBranchPatternItem { + Single { rule_name: Ident, binder: Pat }, + Multiple { rule_name: Ident, binder: Ident }, +} + +#[derive(Debug, Clone)] +struct ParseChildrenInput { + input_expr: Expr, + branches: Punctuated, +} + +impl Parse for ChildrenBranch { + fn parse(input: ParseStream) -> Result { + let contents; + let _: token::Bracket = bracketed!(contents in input); + let pattern_unparsed: TokenStream = contents.fork().parse()?; + let pattern_span = pattern_unparsed.span(); + let pattern = Punctuated::parse_terminated(&contents)?; + let _: Token![=>] = input.parse()?; + let body = input.parse()?; + + Ok(ChildrenBranch { + pattern_span, + pattern, + body, + }) + } +} + +impl Parse for ChildrenBranchPatternItem { + fn parse(input: ParseStream) -> Result { + let contents; + let rule_name = input.parse()?; + parenthesized!(contents in input); + if input.peek(Token![..]) { + let binder = contents.parse()?; + let _: Token![..] = input.parse()?; + Ok(ChildrenBranchPatternItem::Multiple { rule_name, binder }) + } else if input.is_empty() || input.peek(Token![,]) { + let binder = contents.parse()?; + Ok(ChildrenBranchPatternItem::Single { rule_name, binder }) + } else { + Err(input.error("expected `..` or nothing")) + } + } +} + +impl Parse for ParseChildrenInput { + fn parse(input: ParseStream) -> Result { + let input_expr = input.parse()?; + let _: Token![;] = input.parse()?; + let branches = Punctuated::parse_terminated(input)?; + + Ok(ParseChildrenInput { + input_expr, + branches, + }) + } +} + +fn make_parser_branch(branch: &ChildrenBranch) -> Result { + use ChildrenBranchPatternItem::{Multiple, Single}; + + let body = &branch.body; + + // Convert the input pattern into a pattern-match on the Rules of the children. This uses + // slice_patterns. + // A single pattern just checks that the rule matches; a variable-length pattern binds the + // subslice and checks, in the if-guard, that its elements all match the chosen Rule. + let variable_pattern_ident = + Ident::new("variable_pattern", Span::call_site()); + let match_pat = branch.pattern.iter().map(|item| match item { + Single { rule_name, .. } => { + quote!(<::RuleEnum>::#rule_name) + } + Multiple { .. } => quote!(#variable_pattern_ident..), + }); + let match_filter = branch.pattern.iter().map(|item| match item { + Single { .. } => quote!(), + Multiple { rule_name, .. } => quote!( + { + // We can't use .all() directly in the pattern guard; see + // https://github.com/rust-lang/rust/issues/59803. + let all_match = |slice: &[_]| { + slice.iter().all(|r| + r == &<::RuleEnum>::#rule_name + ) + }; + all_match(#variable_pattern_ident) + } && + ), + }); + + // Once we have found a branch that matches, we need to parse the children. + let mut singles_before_multiple = Vec::new(); + let mut multiple = None; + let mut singles_after_multiple = Vec::new(); + for item in &branch.pattern { + match item { + Single { + rule_name, binder, .. + } => { + if multiple.is_none() { + singles_before_multiple.push((rule_name, binder)) + } else { + singles_after_multiple.push((rule_name, binder)) + } + } + Multiple { + rule_name, binder, .. + } => { + if multiple.is_none() { + multiple = Some((rule_name, binder)) + } else { + return Err(Error::new( + branch.pattern_span.clone(), + "multiple variable-length patterns are not allowed", + )); + } + } + } + } + let mut parses = Vec::new(); + for (rule_name, binder) in singles_before_multiple.into_iter() { + parses.push(quote!( + let #binder = Self::#rule_name( + inputs.next().unwrap() + )?; + )) + } + // Note the `rev()`: we are taking inputs from the end of the iterator in reverse order, so that + // only the unmatched inputs are left for the variable-length pattern, if any. + for (rule_name, binder) in singles_after_multiple.into_iter().rev() { + parses.push(quote!( + let #binder = Self::#rule_name( + inputs.next_back().unwrap() + )?; + )) + } + if let Some((rule_name, binder)) = multiple { + parses.push(quote!( + let #binder = inputs + .map(|i| Self::#rule_name(i)) + .collect::, _>>()? + .into_iter(); + )) + } + + Ok(quote!( + [#(#match_pat),*] if #(#match_filter)* true => { + #(#parses)* + #body + } + )) +} + +pub fn parse_children( + input: proc_macro::TokenStream, +) -> Result { + let input: ParseChildrenInput = syn::parse(input)?; + + let input_expr = &input.input_expr; + let branches = input + .branches + .iter() + .map(make_parser_branch) + .collect::>>()?; + Ok(quote!({ + let children_rules: Vec<_> = #input_expr.pair + .clone() + .into_inner() + .map(|p| p.as_rule()) + .collect(); + + #[allow(unused_mut)] + let mut inputs = #input_expr + .pair + .clone() + .into_inner() + .map(|p| #input_expr.with_pair(p)); + + #[allow(unreachable_code)] + match children_rules.as_slice() { + #(#branches,)* + [..] => return Err(#input_expr.error( + format!("Unexpected children: {:?}", children_rules) + )), + } + })) +} -- cgit v1.2.3