From accaf45aa77099654f94319ed1fb12855dd568b4 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Mon, 9 Sep 2019 22:42:46 +0200 Subject: Move pest_consume macros into their own crate --- Cargo.lock | 11 +- Cargo.toml | 1 + dhall_proc_macros/src/lib.rs | 19 -- dhall_proc_macros/src/make_parser.rs | 334 ------------------------------ dhall_proc_macros/src/parse_children.rs | 215 ------------------- pest_consume/Cargo.toml | 2 +- pest_consume/src/lib.rs | 2 +- pest_consume_macros/Cargo.toml | 18 ++ pest_consume_macros/src/lib.rs | 28 +++ pest_consume_macros/src/make_parser.rs | 334 ++++++++++++++++++++++++++++++ pest_consume_macros/src/parse_children.rs | 215 +++++++++++++++++++ 11 files changed, 608 insertions(+), 571 deletions(-) delete mode 100644 dhall_proc_macros/src/make_parser.rs delete mode 100644 dhall_proc_macros/src/parse_children.rs create mode 100644 pest_consume_macros/Cargo.toml create mode 100644 pest_consume_macros/src/lib.rs create mode 100644 pest_consume_macros/src/make_parser.rs create mode 100644 pest_consume_macros/src/parse_children.rs diff --git a/Cargo.lock b/Cargo.lock index b9ca159..dd650ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -242,8 +242,17 @@ dependencies = [ name = "pest_consume" version = "0.1.0" dependencies = [ - "dhall_proc_macros 0.1.0", "pest 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)", + "pest_consume_macros 0.1.0", +] + +[[package]] +name = "pest_consume_macros" +version = "0.1.0" +dependencies = [ + "proc-macro2 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "quote 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", + "syn 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index b26b5c2..ec6a760 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,6 +9,7 @@ members = [ "dhall_proc_macros", "improved_slice_patterns", "pest_consume", + "pest_consume_macros", "serde_dhall" ] diff --git a/dhall_proc_macros/src/lib.rs b/dhall_proc_macros/src/lib.rs index 37e8f9f..5304429 100644 --- a/dhall_proc_macros/src/lib.rs +++ b/dhall_proc_macros/src/lib.rs @@ -1,4 +1,3 @@ -#![feature(drain_filter)] //! This crate contains the code-generation primitives for the [dhall-rust][dhall-rust] crate. //! This is highly unstable and breaks regularly; use at your own risk. //! @@ -7,8 +6,6 @@ extern crate proc_macro; mod derive; -mod make_parser; -mod parse_children; use proc_macro::TokenStream; @@ -16,19 +13,3 @@ use proc_macro::TokenStream; pub fn derive_static_type(input: TokenStream) -> TokenStream { derive::derive_static_type(input) } - -#[proc_macro_attribute] -pub fn make_parser(attrs: TokenStream, input: TokenStream) -> TokenStream { - TokenStream::from(match make_parser::make_parser(attrs, input) { - Ok(tokens) => tokens, - Err(err) => err.to_compile_error(), - }) -} - -#[proc_macro] -pub fn parse_children(input: TokenStream) -> TokenStream { - TokenStream::from(match parse_children::parse_children(input) { - Ok(tokens) => tokens, - Err(err) => err.to_compile_error(), - }) -} diff --git a/dhall_proc_macros/src/make_parser.rs b/dhall_proc_macros/src/make_parser.rs deleted file mode 100644 index c0594a5..0000000 --- a/dhall_proc_macros/src/make_parser.rs +++ /dev/null @@ -1,334 +0,0 @@ -use std::collections::HashMap; -use std::iter; - -use quote::quote; -use syn::parse::{Parse, ParseStream, Result}; -use syn::spanned::Spanned; -use syn::{ - parse_quote, Error, Expr, FnArg, Ident, ImplItem, ImplItemMethod, ItemImpl, - LitBool, Pat, Token, -}; - -mod kw { - syn::custom_keyword!(shortcut); -} - -struct AliasArgs { - target: Ident, - is_shortcut: bool, -} - -struct PrecClimbArgs { - child_rule: Ident, - climber: Expr, -} - -struct AliasSrc { - ident: Ident, - is_shortcut: bool, -} - -struct ParsedFn<'a> { - // Body of the function - function: &'a mut ImplItemMethod, - // Name of the function. - fn_name: Ident, - // Name of the first argument of the function, which should be of type `ParseInput`. - input_arg: Ident, - // List of aliases pointing to this function - alias_srcs: Vec, -} - -impl Parse for AliasArgs { - fn parse(input: ParseStream) -> Result { - let target = input.parse()?; - let is_shortcut = if input.peek(Token![,]) { - // #[alias(rule, shortcut = true)] - let _: Token![,] = input.parse()?; - let _: kw::shortcut = input.parse()?; - let _: Token![=] = input.parse()?; - let b: LitBool = input.parse()?; - b.value - } else { - // #[alias(rule)] - false - }; - Ok(AliasArgs { - target, - is_shortcut, - }) - } -} - -impl Parse for PrecClimbArgs { - fn parse(input: ParseStream) -> Result { - let child_rule = input.parse()?; - let _: Token![,] = input.parse()?; - let climber = input.parse()?; - Ok(PrecClimbArgs { - child_rule, - climber, - }) - } -} - -fn collect_aliases( - imp: &mut ItemImpl, -) -> Result>> { - let functions = imp.items.iter_mut().flat_map(|item| match item { - ImplItem::Method(m) => Some(m), - _ => None, - }); - - let mut alias_map = HashMap::new(); - for function in functions { - let fn_name = function.sig.ident.clone(); - let mut alias_attrs = function - .attrs - .drain_filter(|attr| attr.path.is_ident("alias")) - .collect::>() - .into_iter(); - - if let Some(attr) = alias_attrs.next() { - let args: AliasArgs = attr.parse_args()?; - alias_map.entry(args.target).or_insert_with(Vec::new).push( - AliasSrc { - ident: fn_name, - is_shortcut: args.is_shortcut, - }, - ); - } - if let Some(attr) = alias_attrs.next() { - return Err(Error::new( - attr.span(), - "expected at most one alias attribute", - )); - } - } - - Ok(alias_map) -} - -fn parse_fn<'a>( - function: &'a mut ImplItemMethod, - alias_map: &mut HashMap>, -) -> Result> { - let fn_name = function.sig.ident.clone(); - // Get the name of the first (`input`) function argument - let input_arg = function.sig.inputs.first().ok_or_else(|| { - Error::new( - function.sig.inputs.span(), - "a rule function needs an `input` argument", - ) - })?; - let input_arg = match &input_arg { - FnArg::Receiver(_) => return Err(Error::new( - input_arg.span(), - "a rule function should not have a `self` argument", - )), - FnArg::Typed(input_arg) => match &*input_arg.pat{ - Pat::Ident(ident) => ident.ident.clone(), - _ => return Err(Error::new( - input_arg.span(), - "this argument should be a plain identifier instead of a pattern", - )), - } - }; - - let alias_srcs = alias_map.remove(&fn_name).unwrap_or_else(Vec::new); - - Ok(ParsedFn { - function, - fn_name, - input_arg, - alias_srcs, - }) -} - -fn apply_special_attrs(f: &mut ParsedFn, rule_enum: &Ident) -> Result<()> { - let function = &mut *f.function; - let fn_name = &f.fn_name; - let input_arg = &f.input_arg; - - *function = parse_quote!( - #[allow(non_snake_case)] - #function - ); - - // `prec_climb` attr - let prec_climb_attrs: Vec<_> = function - .attrs - .drain_filter(|attr| attr.path.is_ident("prec_climb")) - .collect(); - - if prec_climb_attrs.len() > 1 { - return Err(Error::new( - prec_climb_attrs[1].span(), - "expected at most one prec_climb attribute", - )); - } else if prec_climb_attrs.is_empty() { - // do nothing - } else { - let attr = prec_climb_attrs.into_iter().next().unwrap(); - let PrecClimbArgs { - child_rule, - climber, - } = attr.parse_args()?; - - function.block = parse_quote!({ - #function - - #climber.climb( - #input_arg.as_pair().clone().into_inner(), - |p| Self::#child_rule(#input_arg.with_pair(p)), - |l, op, r| { - #fn_name(#input_arg.clone(), l?, op, r?) - }, - ) - }); - // Remove the 3 last arguments to keep only the `input` one - function.sig.inputs.pop(); - function.sig.inputs.pop(); - function.sig.inputs.pop(); - // Check that an argument remains - function.sig.inputs.first().ok_or_else(|| { - Error::new( - function.sig.inputs.span(), - "a prec_climb function needs 4 arguments", - ) - })?; - } - - // `alias` attr - if !f.alias_srcs.is_empty() { - let aliases = f.alias_srcs.iter().map(|src| &src.ident); - let block = &function.block; - function.block = parse_quote!({ - let mut #input_arg = #input_arg; - // While the current rule allows shortcutting, and there is a single child, and the - // child can still be parsed by the current function, then skip to that child. - while ::allows_shortcut(#input_arg.as_rule()) { - if let Some(child) = #input_arg.single_child() { - if &::rule_alias(child.as_rule()) - == stringify!(#fn_name) { - #input_arg = child; - continue; - } - } - break - } - - match #input_arg.as_rule() { - #(#rule_enum::#aliases => Self::#aliases(#input_arg),)* - #rule_enum::#fn_name => #block, - r => unreachable!( - "make_parser: called {} on {:?}", - stringify!(#fn_name), - r - ) - } - }); - } - - Ok(()) -} - -pub fn make_parser( - attrs: proc_macro::TokenStream, - input: proc_macro::TokenStream, -) -> Result { - let rule_enum: Ident = syn::parse(attrs)?; - let mut imp: ItemImpl = syn::parse(input)?; - - let mut alias_map = collect_aliases(&mut imp)?; - let rule_alias_branches: Vec<_> = alias_map - .iter() - .flat_map(|(tgt, srcs)| iter::repeat(tgt).zip(srcs)) - .map(|(tgt, src)| { - let ident = &src.ident; - quote!( - #rule_enum::#ident => stringify!(#tgt).to_string(), - ) - }) - .collect(); - let shortcut_branches: Vec<_> = alias_map - .iter() - .flat_map(|(_tgt, srcs)| srcs) - .map(|AliasSrc { ident, is_shortcut }| { - quote!( - #rule_enum::#ident => #is_shortcut, - ) - }) - .collect(); - - let fn_map: HashMap = imp - .items - .iter_mut() - .flat_map(|item| match item { - ImplItem::Method(m) => Some(m), - _ => None, - }) - .map(|method| { - let mut f = parse_fn(method, &mut alias_map)?; - apply_special_attrs(&mut f, &rule_enum)?; - Ok((f.fn_name.clone(), f)) - }) - .collect::>()?; - - // Entries that remain in the alias map don't have a matching method, so we create one. - let extra_fns: Vec<_> = alias_map - .iter() - .map(|(tgt, srcs)| { - // Get the signature of one of the functions that has this alias. They should all have - // essentially the same signature anyways. - let f = fn_map.get(&srcs.first().unwrap().ident).unwrap(); - let input_arg = f.input_arg.clone(); - let mut sig = f.function.sig.clone(); - sig.ident = tgt.clone(); - let srcs = srcs.iter().map(|src| &src.ident); - - Ok(parse_quote!( - #sig { - match #input_arg.as_rule() { - #(#rule_enum::#srcs => Self::#srcs(#input_arg),)* - // We can't match on #rule_enum::#tgt since `tgt` might be an arbitrary - // identifier. - r if &format!("{:?}", r) == stringify!(#tgt) => - return Err(#input_arg.error(format!( - "make_parser: missing method for rule {}", - stringify!(#tgt), - ))), - r => unreachable!( - "make_parser: called {} on {:?}", - stringify!(#tgt), - r - ) - } - } - )) - }) - .collect::>()?; - imp.items.extend(extra_fns); - - let ty = &imp.self_ty; - let (impl_generics, _, where_clause) = imp.generics.split_for_impl(); - Ok(quote!( - impl #impl_generics pest_consume::PestConsumer for #ty #where_clause { - type Rule = #rule_enum; - fn rule_alias(rule: Self::Rule) -> String { - match rule { - #(#rule_alias_branches)* - r => format!("{:?}", r), - } - } - fn allows_shortcut(rule: Self::Rule) -> bool { - match rule { - #(#shortcut_branches)* - _ => false, - } - } - } - - #imp - )) -} diff --git a/dhall_proc_macros/src/parse_children.rs b/dhall_proc_macros/src/parse_children.rs deleted file mode 100644 index d6474a7..0000000 --- a/dhall_proc_macros/src/parse_children.rs +++ /dev/null @@ -1,215 +0,0 @@ -use proc_macro2::{Span, TokenStream}; -use quote::quote; -use syn::parse::{Parse, ParseStream, Result}; -use syn::punctuated::Punctuated; -use syn::spanned::Spanned; -use syn::{bracketed, parenthesized, token, Error, Expr, Ident, Pat, Token}; - -#[derive(Debug, Clone)] -struct ChildrenBranch { - pattern_span: Span, - pattern: Punctuated, - body: Expr, -} - -#[derive(Debug, Clone)] -enum ChildrenBranchPatternItem { - Single { rule_name: Ident, binder: Pat }, - Multiple { rule_name: Ident, binder: Ident }, -} - -#[derive(Debug, Clone)] -struct ParseChildrenInput { - input_expr: Expr, - branches: Punctuated, -} - -impl Parse for ChildrenBranch { - fn parse(input: ParseStream) -> Result { - let contents; - let _: token::Bracket = bracketed!(contents in input); - let pattern_unparsed: TokenStream = contents.fork().parse()?; - let pattern_span = pattern_unparsed.span(); - let pattern = Punctuated::parse_terminated(&contents)?; - let _: Token![=>] = input.parse()?; - let body = input.parse()?; - - Ok(ChildrenBranch { - pattern_span, - pattern, - body, - }) - } -} - -impl Parse for ChildrenBranchPatternItem { - fn parse(input: ParseStream) -> Result { - let contents; - let rule_name = input.parse()?; - parenthesized!(contents in input); - if input.peek(Token![..]) { - let binder = contents.parse()?; - let _: Token![..] = input.parse()?; - Ok(ChildrenBranchPatternItem::Multiple { rule_name, binder }) - } else if input.is_empty() || input.peek(Token![,]) { - let binder = contents.parse()?; - Ok(ChildrenBranchPatternItem::Single { rule_name, binder }) - } else { - Err(input.error("expected `..` or nothing")) - } - } -} - -impl Parse for ParseChildrenInput { - fn parse(input: ParseStream) -> Result { - let input_expr = input.parse()?; - let _: Token![;] = input.parse()?; - let branches = Punctuated::parse_terminated(input)?; - - Ok(ParseChildrenInput { - input_expr, - branches, - }) - } -} - -fn make_parser_branch( - branch: &ChildrenBranch, - i_inputs: &Ident, -) -> Result { - use ChildrenBranchPatternItem::{Multiple, Single}; - - let body = &branch.body; - - // Convert the input pattern into a pattern-match on the Rules of the children. This uses - // slice_patterns. - // A single pattern just checks that the rule matches; a variable-length pattern binds the - // subslice and checks, in the if-guard, that its elements all match the chosen Rule. - let i_variable_pattern = - Ident::new("___variable_pattern", Span::call_site()); - let match_pat = branch.pattern.iter().map(|item| match item { - Single { rule_name, .. } => quote!(stringify!(#rule_name)), - Multiple { .. } => quote!(#i_variable_pattern @ ..), - }); - let match_filter = branch.pattern.iter().map(|item| match item { - Single { .. } => quote!(), - Multiple { rule_name, .. } => quote!( - { - // We can't use .all() directly in the pattern guard; see - // https://github.com/rust-lang/rust/issues/59803. - let all_match = |slice: &[_]| { - slice.iter().all(|r| - *r == stringify!(#rule_name) - ) - }; - all_match(#i_variable_pattern) - } && - ), - }); - - // Once we have found a branch that matches, we need to parse the children. - let mut singles_before_multiple = Vec::new(); - let mut multiple = None; - let mut singles_after_multiple = Vec::new(); - for item in &branch.pattern { - match item { - Single { - rule_name, binder, .. - } => { - if multiple.is_none() { - singles_before_multiple.push((rule_name, binder)) - } else { - singles_after_multiple.push((rule_name, binder)) - } - } - Multiple { - rule_name, binder, .. - } => { - if multiple.is_none() { - multiple = Some((rule_name, binder)) - } else { - return Err(Error::new( - branch.pattern_span.clone(), - "multiple variable-length patterns are not allowed", - )); - } - } - } - } - let mut parses = Vec::new(); - for (rule_name, binder) in singles_before_multiple.into_iter() { - parses.push(quote!( - let #binder = Self::#rule_name( - #i_inputs.next().unwrap() - )?; - )) - } - // Note the `rev()`: we are taking inputs from the end of the iterator in reverse order, so that - // only the unmatched inputs are left for the variable-length pattern, if any. - for (rule_name, binder) in singles_after_multiple.into_iter().rev() { - parses.push(quote!( - let #binder = Self::#rule_name( - #i_inputs.next_back().unwrap() - )?; - )) - } - if let Some((rule_name, binder)) = multiple { - parses.push(quote!( - let #binder = #i_inputs - .map(|i| Self::#rule_name(i)) - .collect::, _>>()? - .into_iter(); - )) - } - - Ok(quote!( - [#(#match_pat),*] if #(#match_filter)* true => { - #(#parses)* - #body - } - )) -} - -pub fn parse_children( - input: proc_macro::TokenStream, -) -> Result { - let input: ParseChildrenInput = syn::parse(input)?; - - let i_children_rules = Ident::new("___children_rules", Span::call_site()); - let i_inputs = Ident::new("___inputs", Span::call_site()); - - let input_expr = &input.input_expr; - let branches = input - .branches - .iter() - .map(|br| make_parser_branch(br, &i_inputs)) - .collect::>>()?; - - Ok(quote!({ - let #i_children_rules: Vec<_> = #input_expr.as_pair() - .clone() - .into_inner() - .map(|p| p.as_rule()) - .map(::rule_alias) - .collect(); - let #i_children_rules: Vec<&str> = #i_children_rules - .iter() - .map(String::as_str) - .collect(); - - #[allow(unused_mut)] - let mut #i_inputs = #input_expr - .as_pair() - .clone() - .into_inner() - .map(|p| #input_expr.with_pair(p)); - - #[allow(unreachable_code)] - match #i_children_rules.as_slice() { - #(#branches,)* - [..] => return Err(#input_expr.error( - format!("Unexpected children: {:?}", #i_children_rules) - )), - } - })) -} diff --git a/pest_consume/Cargo.toml b/pest_consume/Cargo.toml index b3f1db7..7b26d56 100644 --- a/pest_consume/Cargo.toml +++ b/pest_consume/Cargo.toml @@ -10,4 +10,4 @@ repository = "https://github.com/Nadrieril/dhall-rust" [dependencies] pest = "2.1" -dhall_proc_macros = { path = "../dhall_proc_macros" } +pest_consume_macros = { path = "../pest_consume_macros" } diff --git a/pest_consume/src/lib.rs b/pest_consume/src/lib.rs index 1cedc74..6e4b2e3 100644 --- a/pest_consume/src/lib.rs +++ b/pest_consume/src/lib.rs @@ -2,7 +2,7 @@ use pest::error::{Error, ErrorVariant}; use pest::iterators::Pair; use pest::Span; -pub use dhall_proc_macros::{make_parser, parse_children}; +pub use pest_consume_macros::{make_parser, parse_children}; /// Carries a pest Pair alongside custom user data. #[derive(Debug, Clone)] diff --git a/pest_consume_macros/Cargo.toml b/pest_consume_macros/Cargo.toml new file mode 100644 index 0000000..dd65d95 --- /dev/null +++ b/pest_consume_macros/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "pest_consume_macros" +version = "0.1.0" # remember to update html_root_url +authors = ["Nadrieril "] +license = "MIT OR Apache-2.0" +edition = "2018" +description = "Macros for pest_consume" +readme = "README.md" +repository = "https://github.com/Nadrieril/dhall-rust" + +[lib] +proc-macro = true +doctest = false + +[dependencies] +quote = "1.0.2" +proc-macro2 = "1.0.2" +syn = { version = "1.0.5", features = ["full", "extra-traits"] } diff --git a/pest_consume_macros/src/lib.rs b/pest_consume_macros/src/lib.rs new file mode 100644 index 0000000..dd437f6 --- /dev/null +++ b/pest_consume_macros/src/lib.rs @@ -0,0 +1,28 @@ +#![feature(drain_filter)] +//! This crate contains the code-generation primitives for the [dhall-rust][dhall-rust] crate. +//! This is highly unstable and breaks regularly; use at your own risk. +//! +//! [dhall-rust]: https://github.com/Nadrieril/dhall-rust + +extern crate proc_macro; + +mod make_parser; +mod parse_children; + +use proc_macro::TokenStream; + +#[proc_macro_attribute] +pub fn make_parser(attrs: TokenStream, input: TokenStream) -> TokenStream { + TokenStream::from(match make_parser::make_parser(attrs, input) { + Ok(tokens) => tokens, + Err(err) => err.to_compile_error(), + }) +} + +#[proc_macro] +pub fn parse_children(input: TokenStream) -> TokenStream { + TokenStream::from(match parse_children::parse_children(input) { + Ok(tokens) => tokens, + Err(err) => err.to_compile_error(), + }) +} diff --git a/pest_consume_macros/src/make_parser.rs b/pest_consume_macros/src/make_parser.rs new file mode 100644 index 0000000..c0594a5 --- /dev/null +++ b/pest_consume_macros/src/make_parser.rs @@ -0,0 +1,334 @@ +use std::collections::HashMap; +use std::iter; + +use quote::quote; +use syn::parse::{Parse, ParseStream, Result}; +use syn::spanned::Spanned; +use syn::{ + parse_quote, Error, Expr, FnArg, Ident, ImplItem, ImplItemMethod, ItemImpl, + LitBool, Pat, Token, +}; + +mod kw { + syn::custom_keyword!(shortcut); +} + +struct AliasArgs { + target: Ident, + is_shortcut: bool, +} + +struct PrecClimbArgs { + child_rule: Ident, + climber: Expr, +} + +struct AliasSrc { + ident: Ident, + is_shortcut: bool, +} + +struct ParsedFn<'a> { + // Body of the function + function: &'a mut ImplItemMethod, + // Name of the function. + fn_name: Ident, + // Name of the first argument of the function, which should be of type `ParseInput`. + input_arg: Ident, + // List of aliases pointing to this function + alias_srcs: Vec, +} + +impl Parse for AliasArgs { + fn parse(input: ParseStream) -> Result { + let target = input.parse()?; + let is_shortcut = if input.peek(Token![,]) { + // #[alias(rule, shortcut = true)] + let _: Token![,] = input.parse()?; + let _: kw::shortcut = input.parse()?; + let _: Token![=] = input.parse()?; + let b: LitBool = input.parse()?; + b.value + } else { + // #[alias(rule)] + false + }; + Ok(AliasArgs { + target, + is_shortcut, + }) + } +} + +impl Parse for PrecClimbArgs { + fn parse(input: ParseStream) -> Result { + let child_rule = input.parse()?; + let _: Token![,] = input.parse()?; + let climber = input.parse()?; + Ok(PrecClimbArgs { + child_rule, + climber, + }) + } +} + +fn collect_aliases( + imp: &mut ItemImpl, +) -> Result>> { + let functions = imp.items.iter_mut().flat_map(|item| match item { + ImplItem::Method(m) => Some(m), + _ => None, + }); + + let mut alias_map = HashMap::new(); + for function in functions { + let fn_name = function.sig.ident.clone(); + let mut alias_attrs = function + .attrs + .drain_filter(|attr| attr.path.is_ident("alias")) + .collect::>() + .into_iter(); + + if let Some(attr) = alias_attrs.next() { + let args: AliasArgs = attr.parse_args()?; + alias_map.entry(args.target).or_insert_with(Vec::new).push( + AliasSrc { + ident: fn_name, + is_shortcut: args.is_shortcut, + }, + ); + } + if let Some(attr) = alias_attrs.next() { + return Err(Error::new( + attr.span(), + "expected at most one alias attribute", + )); + } + } + + Ok(alias_map) +} + +fn parse_fn<'a>( + function: &'a mut ImplItemMethod, + alias_map: &mut HashMap>, +) -> Result> { + let fn_name = function.sig.ident.clone(); + // Get the name of the first (`input`) function argument + let input_arg = function.sig.inputs.first().ok_or_else(|| { + Error::new( + function.sig.inputs.span(), + "a rule function needs an `input` argument", + ) + })?; + let input_arg = match &input_arg { + FnArg::Receiver(_) => return Err(Error::new( + input_arg.span(), + "a rule function should not have a `self` argument", + )), + FnArg::Typed(input_arg) => match &*input_arg.pat{ + Pat::Ident(ident) => ident.ident.clone(), + _ => return Err(Error::new( + input_arg.span(), + "this argument should be a plain identifier instead of a pattern", + )), + } + }; + + let alias_srcs = alias_map.remove(&fn_name).unwrap_or_else(Vec::new); + + Ok(ParsedFn { + function, + fn_name, + input_arg, + alias_srcs, + }) +} + +fn apply_special_attrs(f: &mut ParsedFn, rule_enum: &Ident) -> Result<()> { + let function = &mut *f.function; + let fn_name = &f.fn_name; + let input_arg = &f.input_arg; + + *function = parse_quote!( + #[allow(non_snake_case)] + #function + ); + + // `prec_climb` attr + let prec_climb_attrs: Vec<_> = function + .attrs + .drain_filter(|attr| attr.path.is_ident("prec_climb")) + .collect(); + + if prec_climb_attrs.len() > 1 { + return Err(Error::new( + prec_climb_attrs[1].span(), + "expected at most one prec_climb attribute", + )); + } else if prec_climb_attrs.is_empty() { + // do nothing + } else { + let attr = prec_climb_attrs.into_iter().next().unwrap(); + let PrecClimbArgs { + child_rule, + climber, + } = attr.parse_args()?; + + function.block = parse_quote!({ + #function + + #climber.climb( + #input_arg.as_pair().clone().into_inner(), + |p| Self::#child_rule(#input_arg.with_pair(p)), + |l, op, r| { + #fn_name(#input_arg.clone(), l?, op, r?) + }, + ) + }); + // Remove the 3 last arguments to keep only the `input` one + function.sig.inputs.pop(); + function.sig.inputs.pop(); + function.sig.inputs.pop(); + // Check that an argument remains + function.sig.inputs.first().ok_or_else(|| { + Error::new( + function.sig.inputs.span(), + "a prec_climb function needs 4 arguments", + ) + })?; + } + + // `alias` attr + if !f.alias_srcs.is_empty() { + let aliases = f.alias_srcs.iter().map(|src| &src.ident); + let block = &function.block; + function.block = parse_quote!({ + let mut #input_arg = #input_arg; + // While the current rule allows shortcutting, and there is a single child, and the + // child can still be parsed by the current function, then skip to that child. + while ::allows_shortcut(#input_arg.as_rule()) { + if let Some(child) = #input_arg.single_child() { + if &::rule_alias(child.as_rule()) + == stringify!(#fn_name) { + #input_arg = child; + continue; + } + } + break + } + + match #input_arg.as_rule() { + #(#rule_enum::#aliases => Self::#aliases(#input_arg),)* + #rule_enum::#fn_name => #block, + r => unreachable!( + "make_parser: called {} on {:?}", + stringify!(#fn_name), + r + ) + } + }); + } + + Ok(()) +} + +pub fn make_parser( + attrs: proc_macro::TokenStream, + input: proc_macro::TokenStream, +) -> Result { + let rule_enum: Ident = syn::parse(attrs)?; + let mut imp: ItemImpl = syn::parse(input)?; + + let mut alias_map = collect_aliases(&mut imp)?; + let rule_alias_branches: Vec<_> = alias_map + .iter() + .flat_map(|(tgt, srcs)| iter::repeat(tgt).zip(srcs)) + .map(|(tgt, src)| { + let ident = &src.ident; + quote!( + #rule_enum::#ident => stringify!(#tgt).to_string(), + ) + }) + .collect(); + let shortcut_branches: Vec<_> = alias_map + .iter() + .flat_map(|(_tgt, srcs)| srcs) + .map(|AliasSrc { ident, is_shortcut }| { + quote!( + #rule_enum::#ident => #is_shortcut, + ) + }) + .collect(); + + let fn_map: HashMap = imp + .items + .iter_mut() + .flat_map(|item| match item { + ImplItem::Method(m) => Some(m), + _ => None, + }) + .map(|method| { + let mut f = parse_fn(method, &mut alias_map)?; + apply_special_attrs(&mut f, &rule_enum)?; + Ok((f.fn_name.clone(), f)) + }) + .collect::>()?; + + // Entries that remain in the alias map don't have a matching method, so we create one. + let extra_fns: Vec<_> = alias_map + .iter() + .map(|(tgt, srcs)| { + // Get the signature of one of the functions that has this alias. They should all have + // essentially the same signature anyways. + let f = fn_map.get(&srcs.first().unwrap().ident).unwrap(); + let input_arg = f.input_arg.clone(); + let mut sig = f.function.sig.clone(); + sig.ident = tgt.clone(); + let srcs = srcs.iter().map(|src| &src.ident); + + Ok(parse_quote!( + #sig { + match #input_arg.as_rule() { + #(#rule_enum::#srcs => Self::#srcs(#input_arg),)* + // We can't match on #rule_enum::#tgt since `tgt` might be an arbitrary + // identifier. + r if &format!("{:?}", r) == stringify!(#tgt) => + return Err(#input_arg.error(format!( + "make_parser: missing method for rule {}", + stringify!(#tgt), + ))), + r => unreachable!( + "make_parser: called {} on {:?}", + stringify!(#tgt), + r + ) + } + } + )) + }) + .collect::>()?; + imp.items.extend(extra_fns); + + let ty = &imp.self_ty; + let (impl_generics, _, where_clause) = imp.generics.split_for_impl(); + Ok(quote!( + impl #impl_generics pest_consume::PestConsumer for #ty #where_clause { + type Rule = #rule_enum; + fn rule_alias(rule: Self::Rule) -> String { + match rule { + #(#rule_alias_branches)* + r => format!("{:?}", r), + } + } + fn allows_shortcut(rule: Self::Rule) -> bool { + match rule { + #(#shortcut_branches)* + _ => false, + } + } + } + + #imp + )) +} diff --git a/pest_consume_macros/src/parse_children.rs b/pest_consume_macros/src/parse_children.rs new file mode 100644 index 0000000..d6474a7 --- /dev/null +++ b/pest_consume_macros/src/parse_children.rs @@ -0,0 +1,215 @@ +use proc_macro2::{Span, TokenStream}; +use quote::quote; +use syn::parse::{Parse, ParseStream, Result}; +use syn::punctuated::Punctuated; +use syn::spanned::Spanned; +use syn::{bracketed, parenthesized, token, Error, Expr, Ident, Pat, Token}; + +#[derive(Debug, Clone)] +struct ChildrenBranch { + pattern_span: Span, + pattern: Punctuated, + body: Expr, +} + +#[derive(Debug, Clone)] +enum ChildrenBranchPatternItem { + Single { rule_name: Ident, binder: Pat }, + Multiple { rule_name: Ident, binder: Ident }, +} + +#[derive(Debug, Clone)] +struct ParseChildrenInput { + input_expr: Expr, + branches: Punctuated, +} + +impl Parse for ChildrenBranch { + fn parse(input: ParseStream) -> Result { + let contents; + let _: token::Bracket = bracketed!(contents in input); + let pattern_unparsed: TokenStream = contents.fork().parse()?; + let pattern_span = pattern_unparsed.span(); + let pattern = Punctuated::parse_terminated(&contents)?; + let _: Token![=>] = input.parse()?; + let body = input.parse()?; + + Ok(ChildrenBranch { + pattern_span, + pattern, + body, + }) + } +} + +impl Parse for ChildrenBranchPatternItem { + fn parse(input: ParseStream) -> Result { + let contents; + let rule_name = input.parse()?; + parenthesized!(contents in input); + if input.peek(Token![..]) { + let binder = contents.parse()?; + let _: Token![..] = input.parse()?; + Ok(ChildrenBranchPatternItem::Multiple { rule_name, binder }) + } else if input.is_empty() || input.peek(Token![,]) { + let binder = contents.parse()?; + Ok(ChildrenBranchPatternItem::Single { rule_name, binder }) + } else { + Err(input.error("expected `..` or nothing")) + } + } +} + +impl Parse for ParseChildrenInput { + fn parse(input: ParseStream) -> Result { + let input_expr = input.parse()?; + let _: Token![;] = input.parse()?; + let branches = Punctuated::parse_terminated(input)?; + + Ok(ParseChildrenInput { + input_expr, + branches, + }) + } +} + +fn make_parser_branch( + branch: &ChildrenBranch, + i_inputs: &Ident, +) -> Result { + use ChildrenBranchPatternItem::{Multiple, Single}; + + let body = &branch.body; + + // Convert the input pattern into a pattern-match on the Rules of the children. This uses + // slice_patterns. + // A single pattern just checks that the rule matches; a variable-length pattern binds the + // subslice and checks, in the if-guard, that its elements all match the chosen Rule. + let i_variable_pattern = + Ident::new("___variable_pattern", Span::call_site()); + let match_pat = branch.pattern.iter().map(|item| match item { + Single { rule_name, .. } => quote!(stringify!(#rule_name)), + Multiple { .. } => quote!(#i_variable_pattern @ ..), + }); + let match_filter = branch.pattern.iter().map(|item| match item { + Single { .. } => quote!(), + Multiple { rule_name, .. } => quote!( + { + // We can't use .all() directly in the pattern guard; see + // https://github.com/rust-lang/rust/issues/59803. + let all_match = |slice: &[_]| { + slice.iter().all(|r| + *r == stringify!(#rule_name) + ) + }; + all_match(#i_variable_pattern) + } && + ), + }); + + // Once we have found a branch that matches, we need to parse the children. + let mut singles_before_multiple = Vec::new(); + let mut multiple = None; + let mut singles_after_multiple = Vec::new(); + for item in &branch.pattern { + match item { + Single { + rule_name, binder, .. + } => { + if multiple.is_none() { + singles_before_multiple.push((rule_name, binder)) + } else { + singles_after_multiple.push((rule_name, binder)) + } + } + Multiple { + rule_name, binder, .. + } => { + if multiple.is_none() { + multiple = Some((rule_name, binder)) + } else { + return Err(Error::new( + branch.pattern_span.clone(), + "multiple variable-length patterns are not allowed", + )); + } + } + } + } + let mut parses = Vec::new(); + for (rule_name, binder) in singles_before_multiple.into_iter() { + parses.push(quote!( + let #binder = Self::#rule_name( + #i_inputs.next().unwrap() + )?; + )) + } + // Note the `rev()`: we are taking inputs from the end of the iterator in reverse order, so that + // only the unmatched inputs are left for the variable-length pattern, if any. + for (rule_name, binder) in singles_after_multiple.into_iter().rev() { + parses.push(quote!( + let #binder = Self::#rule_name( + #i_inputs.next_back().unwrap() + )?; + )) + } + if let Some((rule_name, binder)) = multiple { + parses.push(quote!( + let #binder = #i_inputs + .map(|i| Self::#rule_name(i)) + .collect::, _>>()? + .into_iter(); + )) + } + + Ok(quote!( + [#(#match_pat),*] if #(#match_filter)* true => { + #(#parses)* + #body + } + )) +} + +pub fn parse_children( + input: proc_macro::TokenStream, +) -> Result { + let input: ParseChildrenInput = syn::parse(input)?; + + let i_children_rules = Ident::new("___children_rules", Span::call_site()); + let i_inputs = Ident::new("___inputs", Span::call_site()); + + let input_expr = &input.input_expr; + let branches = input + .branches + .iter() + .map(|br| make_parser_branch(br, &i_inputs)) + .collect::>>()?; + + Ok(quote!({ + let #i_children_rules: Vec<_> = #input_expr.as_pair() + .clone() + .into_inner() + .map(|p| p.as_rule()) + .map(::rule_alias) + .collect(); + let #i_children_rules: Vec<&str> = #i_children_rules + .iter() + .map(String::as_str) + .collect(); + + #[allow(unused_mut)] + let mut #i_inputs = #input_expr + .as_pair() + .clone() + .into_inner() + .map(|p| #input_expr.with_pair(p)); + + #[allow(unreachable_code)] + match #i_children_rules.as_slice() { + #(#branches,)* + [..] => return Err(#input_expr.error( + format!("Unexpected children: {:?}", #i_children_rules) + )), + } + })) +} -- cgit v1.2.3