diff options
author | Nadrieril Feneanar | 2019-09-02 23:15:28 +0200 |
---|---|---|
committer | GitHub | 2019-09-02 23:15:28 +0200 |
commit | a92d09259fc1db995a036eb2f22491c7024ca878 (patch) | |
tree | d3dc2c2fd2b98c2938b01e7ffb5bf2a085b32feb /dhall_proc_macros | |
parent | 737abd9be6d35bbce784d9cf249edf7ad14677d6 (diff) | |
parent | f892f0bdb1dc9a8f2d1cda245e17c28fcf1090b3 (diff) |
Merge pull request #110 from Nadrieril/parser-proc-macro
Rewrite parser macros as proc_macros
Diffstat (limited to 'dhall_proc_macros')
-rw-r--r-- | dhall_proc_macros/Cargo.toml | 6 | ||||
-rw-r--r-- | dhall_proc_macros/src/lib.rs | 19 | ||||
-rw-r--r-- | dhall_proc_macros/src/make_parser.rs | 111 | ||||
-rw-r--r-- | dhall_proc_macros/src/parse_children.rs | 212 |
4 files changed, 345 insertions, 3 deletions
diff --git a/dhall_proc_macros/Cargo.toml b/dhall_proc_macros/Cargo.toml index df1eda8..b641a39 100644 --- a/dhall_proc_macros/Cargo.toml +++ b/dhall_proc_macros/Cargo.toml @@ -11,6 +11,6 @@ doctest = false [dependencies] itertools = "0.8.0" -quote = "0.6.11" -proc-macro2 = "0.4.27" -syn = "0.15.29" +quote = "1.0.2" +proc-macro2 = "1.0.2" +syn = { version = "1.0.5", features = ["full", "extra-traits"] } diff --git a/dhall_proc_macros/src/lib.rs b/dhall_proc_macros/src/lib.rs index 5304429..37e8f9f 100644 --- a/dhall_proc_macros/src/lib.rs +++ b/dhall_proc_macros/src/lib.rs @@ -1,3 +1,4 @@ +#![feature(drain_filter)] //! This crate contains the code-generation primitives for the [dhall-rust][dhall-rust] crate. //! This is highly unstable and breaks regularly; use at your own risk. //! @@ -6,6 +7,8 @@ extern crate proc_macro; mod derive; +mod make_parser; +mod parse_children; use proc_macro::TokenStream; @@ -13,3 +16,19 @@ use proc_macro::TokenStream; pub fn derive_static_type(input: TokenStream) -> TokenStream { derive::derive_static_type(input) } + +#[proc_macro_attribute] +pub fn make_parser(attrs: TokenStream, input: TokenStream) -> TokenStream { + TokenStream::from(match make_parser::make_parser(attrs, input) { + Ok(tokens) => tokens, + Err(err) => err.to_compile_error(), + }) +} + +#[proc_macro] +pub fn parse_children(input: TokenStream) -> TokenStream { + TokenStream::from(match parse_children::parse_children(input) { + Ok(tokens) => tokens, + Err(err) => err.to_compile_error(), + }) +} diff --git a/dhall_proc_macros/src/make_parser.rs b/dhall_proc_macros/src/make_parser.rs new file mode 100644 index 0000000..268a639 --- /dev/null +++ b/dhall_proc_macros/src/make_parser.rs @@ -0,0 +1,111 @@ +use quote::quote; +use syn::parse::{ParseStream, Result}; +use syn::spanned::Spanned; +use syn::{ + parse_quote, Error, Expr, FnArg, Ident, ImplItem, ImplItemMethod, ItemImpl, + Pat, Token, +}; + +fn apply_special_attrs(function: &mut ImplItemMethod) -> Result<()> { + *function = parse_quote!( + #[allow(non_snake_case, dead_code)] + #function + ); + + let recognized_attrs: Vec<_> = function + .attrs + .drain_filter(|attr| attr.path.is_ident("prec_climb")) + .collect(); + + let name = function.sig.ident.clone(); + + if recognized_attrs.is_empty() { + // do nothing + } else if recognized_attrs.len() > 1 { + return Err(Error::new( + recognized_attrs[1].span(), + "expected a single prec_climb attribute", + )); + } else { + let attr = recognized_attrs.into_iter().next().unwrap(); + let (child_rule, climber) = + attr.parse_args_with(|input: ParseStream| { + let child_rule: Ident = input.parse()?; + let _: Token![,] = input.parse()?; + let climber: Expr = input.parse()?; + Ok((child_rule, climber)) + })?; + + // Get the name of the first (`input`) function argument + let first_arg = function.sig.inputs.first().ok_or_else(|| { + Error::new( + function.sig.inputs.span(), + "a prec_climb function needs 4 arguments", + ) + })?; + let first_arg = match &first_arg { + FnArg::Receiver(_) => return Err(Error::new( + first_arg.span(), + "a prec_climb function should not have a `self` argument", + )), + FnArg::Typed(first_arg) => match &*first_arg.pat{ + Pat::Ident(ident) => &ident.ident, + _ => return Err(Error::new( + first_arg.span(), + "this argument should be a plain identifier instead of a pattern", + )), + } + }; + + function.block = parse_quote!({ + #function + + #climber.climb( + #first_arg.pair.clone().into_inner(), + |p| Self::#child_rule(#first_arg.with_pair(p)), + |l, op, r| { + #name(#first_arg.clone(), l?, op, r?) + }, + ) + }); + // Remove the 3 last arguments to keep only the `input` one + function.sig.inputs.pop(); + function.sig.inputs.pop(); + function.sig.inputs.pop(); + // Check that an argument remains + function.sig.inputs.first().ok_or_else(|| { + Error::new( + function.sig.inputs.span(), + "a prec_climb function needs 4 arguments", + ) + })?; + } + + Ok(()) +} + +pub fn make_parser( + attrs: proc_macro::TokenStream, + input: proc_macro::TokenStream, +) -> Result<proc_macro2::TokenStream> { + let rule_enum: Ident = syn::parse(attrs)?; + + let mut imp: ItemImpl = syn::parse(input)?; + imp.items + .iter_mut() + .map(|item| match item { + ImplItem::Method(m) => apply_special_attrs(m), + _ => Ok(()), + }) + .collect::<Result<()>>()?; + + let ty = &imp.self_ty; + let (impl_generics, _, where_clause) = imp.generics.split_for_impl(); + Ok(quote!( + impl #impl_generics PestConsumer for #ty #where_clause { + type RuleEnum = #rule_enum; + } + + #imp + )) +} diff --git a/dhall_proc_macros/src/parse_children.rs b/dhall_proc_macros/src/parse_children.rs new file mode 100644 index 0000000..c78adbc --- /dev/null +++ b/dhall_proc_macros/src/parse_children.rs @@ -0,0 +1,212 @@ +use proc_macro2::{Span, TokenStream}; +use quote::quote; +use syn::parse::{Parse, ParseStream, Result}; +use syn::punctuated::Punctuated; +use syn::spanned::Spanned; +use syn::{bracketed, parenthesized, token, Error, Expr, Ident, Pat, Token}; + +#[derive(Debug, Clone)] +struct ChildrenBranch { + pattern_span: Span, + pattern: Punctuated<ChildrenBranchPatternItem, Token![,]>, + body: Expr, +} + +#[derive(Debug, Clone)] +enum ChildrenBranchPatternItem { + Single { rule_name: Ident, binder: Pat }, + Multiple { rule_name: Ident, binder: Ident }, +} + +#[derive(Debug, Clone)] +struct ParseChildrenInput { + input_expr: Expr, + branches: Punctuated<ChildrenBranch, Token![,]>, +} + +impl Parse for ChildrenBranch { + fn parse(input: ParseStream) -> Result<Self> { + let contents; + let _: token::Bracket = bracketed!(contents in input); + let pattern_unparsed: TokenStream = contents.fork().parse()?; + let pattern_span = pattern_unparsed.span(); + let pattern = Punctuated::parse_terminated(&contents)?; + let _: Token![=>] = input.parse()?; + let body = input.parse()?; + + Ok(ChildrenBranch { + pattern_span, + pattern, + body, + }) + } +} + +impl Parse for ChildrenBranchPatternItem { + fn parse(input: ParseStream) -> Result<Self> { + let contents; + let rule_name = input.parse()?; + parenthesized!(contents in input); + if input.peek(Token![..]) { + let binder = contents.parse()?; + let _: Token![..] = input.parse()?; + Ok(ChildrenBranchPatternItem::Multiple { rule_name, binder }) + } else if input.is_empty() || input.peek(Token![,]) { + let binder = contents.parse()?; + Ok(ChildrenBranchPatternItem::Single { rule_name, binder }) + } else { + Err(input.error("expected `..` or nothing")) + } + } +} + +impl Parse for ParseChildrenInput { + fn parse(input: ParseStream) -> Result<Self> { + let input_expr = input.parse()?; + let _: Token![;] = input.parse()?; + let branches = Punctuated::parse_terminated(input)?; + + Ok(ParseChildrenInput { + input_expr, + branches, + }) + } +} + +fn make_parser_branch( + branch: &ChildrenBranch, + i_inputs: &Ident, +) -> Result<TokenStream> { + use ChildrenBranchPatternItem::{Multiple, Single}; + + let body = &branch.body; + + // Convert the input pattern into a pattern-match on the Rules of the children. This uses + // slice_patterns. + // A single pattern just checks that the rule matches; a variable-length pattern binds the + // subslice and checks, in the if-guard, that its elements all match the chosen Rule. + let i_variable_pattern = + Ident::new("___variable_pattern", Span::call_site()); + let match_pat = branch.pattern.iter().map(|item| match item { + Single { rule_name, .. } => { + quote!(<<Self as PestConsumer>::RuleEnum>::#rule_name) + } + Multiple { .. } => quote!(#i_variable_pattern..), + }); + let match_filter = branch.pattern.iter().map(|item| match item { + Single { .. } => quote!(), + Multiple { rule_name, .. } => quote!( + { + // We can't use .all() directly in the pattern guard; see + // https://github.com/rust-lang/rust/issues/59803. + let all_match = |slice: &[_]| { + slice.iter().all(|r| + r == &<<Self as PestConsumer>::RuleEnum>::#rule_name + ) + }; + all_match(#i_variable_pattern) + } && + ), + }); + + // Once we have found a branch that matches, we need to parse the children. + let mut singles_before_multiple = Vec::new(); + let mut multiple = None; + let mut singles_after_multiple = Vec::new(); + for item in &branch.pattern { + match item { + Single { + rule_name, binder, .. + } => { + if multiple.is_none() { + singles_before_multiple.push((rule_name, binder)) + } else { + singles_after_multiple.push((rule_name, binder)) + } + } + Multiple { + rule_name, binder, .. + } => { + if multiple.is_none() { + multiple = Some((rule_name, binder)) + } else { + return Err(Error::new( + branch.pattern_span.clone(), + "multiple variable-length patterns are not allowed", + )); + } + } + } + } + let mut parses = Vec::new(); + for (rule_name, binder) in singles_before_multiple.into_iter() { + parses.push(quote!( + let #binder = Self::#rule_name( + #i_inputs.next().unwrap() + )?; + )) + } + // Note the `rev()`: we are taking inputs from the end of the iterator in reverse order, so that + // only the unmatched inputs are left for the variable-length pattern, if any. + for (rule_name, binder) in singles_after_multiple.into_iter().rev() { + parses.push(quote!( + let #binder = Self::#rule_name( + #i_inputs.next_back().unwrap() + )?; + )) + } + if let Some((rule_name, binder)) = multiple { + parses.push(quote!( + let #binder = #i_inputs + .map(|i| Self::#rule_name(i)) + .collect::<Result<Vec<_>, _>>()? + .into_iter(); + )) + } + + Ok(quote!( + [#(#match_pat),*] if #(#match_filter)* true => { + #(#parses)* + #body + } + )) +} + +pub fn parse_children( + input: proc_macro::TokenStream, +) -> Result<proc_macro2::TokenStream> { + let input: ParseChildrenInput = syn::parse(input)?; + + let i_children_rules = Ident::new("___children_rules", Span::call_site()); + let i_inputs = Ident::new("___inputs", Span::call_site()); + + let input_expr = &input.input_expr; + let branches = input + .branches + .iter() + .map(|br| make_parser_branch(br, &i_inputs)) + .collect::<Result<Vec<_>>>()?; + + Ok(quote!({ + let #i_children_rules: Vec<_> = #input_expr.pair + .clone() + .into_inner() + .map(|p| p.as_rule()) + .collect(); + + #[allow(unused_mut)] + let mut #i_inputs = #input_expr + .pair + .clone() + .into_inner() + .map(|p| #input_expr.with_pair(p)); + + #[allow(unreachable_code)] + match #i_children_rules.as_slice() { + #(#branches,)* + [..] => return Err(#input_expr.error( + format!("Unexpected children: {:?}", #i_children_rules) + )), + } + })) +} |