summaryrefslogtreecommitdiff
path: root/pest_consume_macros
diff options
context:
space:
mode:
authorNadrieril2019-09-09 22:42:46 +0200
committerNadrieril2019-09-09 22:42:46 +0200
commitaccaf45aa77099654f94319ed1fb12855dd568b4 (patch)
treea56ca7ee9fc2b7a5dc7bb7fb5451f8b0255129c3 /pest_consume_macros
parent595a52018e31126ecbf1be49794750f1a59a66b1 (diff)
Move pest_consume macros into their own crate
Diffstat (limited to 'pest_consume_macros')
-rw-r--r--pest_consume_macros/Cargo.toml18
-rw-r--r--pest_consume_macros/src/lib.rs28
-rw-r--r--pest_consume_macros/src/make_parser.rs334
-rw-r--r--pest_consume_macros/src/parse_children.rs215
4 files changed, 595 insertions, 0 deletions
diff --git a/pest_consume_macros/Cargo.toml b/pest_consume_macros/Cargo.toml
new file mode 100644
index 0000000..dd65d95
--- /dev/null
+++ b/pest_consume_macros/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "pest_consume_macros"
+version = "0.1.0" # remember to update html_root_url
+authors = ["Nadrieril <nadrieril@users.noreply.github.com>"]
+license = "MIT OR Apache-2.0"
+edition = "2018"
+description = "Macros for pest_consume"
+readme = "README.md"
+repository = "https://github.com/Nadrieril/dhall-rust"
+
+[lib]
+proc-macro = true
+doctest = false
+
+[dependencies]
+quote = "1.0.2"
+proc-macro2 = "1.0.2"
+syn = { version = "1.0.5", features = ["full", "extra-traits"] }
diff --git a/pest_consume_macros/src/lib.rs b/pest_consume_macros/src/lib.rs
new file mode 100644
index 0000000..dd437f6
--- /dev/null
+++ b/pest_consume_macros/src/lib.rs
@@ -0,0 +1,28 @@
+#![feature(drain_filter)]
+//! This crate contains the code-generation primitives for the [dhall-rust][dhall-rust] crate.
+//! This is highly unstable and breaks regularly; use at your own risk.
+//!
+//! [dhall-rust]: https://github.com/Nadrieril/dhall-rust
+
+extern crate proc_macro;
+
+mod make_parser;
+mod parse_children;
+
+use proc_macro::TokenStream;
+
+#[proc_macro_attribute]
+pub fn make_parser(attrs: TokenStream, input: TokenStream) -> TokenStream {
+ TokenStream::from(match make_parser::make_parser(attrs, input) {
+ Ok(tokens) => tokens,
+ Err(err) => err.to_compile_error(),
+ })
+}
+
+#[proc_macro]
+pub fn parse_children(input: TokenStream) -> TokenStream {
+ TokenStream::from(match parse_children::parse_children(input) {
+ Ok(tokens) => tokens,
+ Err(err) => err.to_compile_error(),
+ })
+}
diff --git a/pest_consume_macros/src/make_parser.rs b/pest_consume_macros/src/make_parser.rs
new file mode 100644
index 0000000..c0594a5
--- /dev/null
+++ b/pest_consume_macros/src/make_parser.rs
@@ -0,0 +1,334 @@
+use std::collections::HashMap;
+use std::iter;
+
+use quote::quote;
+use syn::parse::{Parse, ParseStream, Result};
+use syn::spanned::Spanned;
+use syn::{
+ parse_quote, Error, Expr, FnArg, Ident, ImplItem, ImplItemMethod, ItemImpl,
+ LitBool, Pat, Token,
+};
+
+mod kw {
+ syn::custom_keyword!(shortcut);
+}
+
+struct AliasArgs {
+ target: Ident,
+ is_shortcut: bool,
+}
+
+struct PrecClimbArgs {
+ child_rule: Ident,
+ climber: Expr,
+}
+
+struct AliasSrc {
+ ident: Ident,
+ is_shortcut: bool,
+}
+
+struct ParsedFn<'a> {
+ // Body of the function
+ function: &'a mut ImplItemMethod,
+ // Name of the function.
+ fn_name: Ident,
+ // Name of the first argument of the function, which should be of type `ParseInput`.
+ input_arg: Ident,
+ // List of aliases pointing to this function
+ alias_srcs: Vec<AliasSrc>,
+}
+
+impl Parse for AliasArgs {
+ fn parse(input: ParseStream) -> Result<Self> {
+ let target = input.parse()?;
+ let is_shortcut = if input.peek(Token![,]) {
+ // #[alias(rule, shortcut = true)]
+ let _: Token![,] = input.parse()?;
+ let _: kw::shortcut = input.parse()?;
+ let _: Token![=] = input.parse()?;
+ let b: LitBool = input.parse()?;
+ b.value
+ } else {
+ // #[alias(rule)]
+ false
+ };
+ Ok(AliasArgs {
+ target,
+ is_shortcut,
+ })
+ }
+}
+
+impl Parse for PrecClimbArgs {
+ fn parse(input: ParseStream) -> Result<Self> {
+ let child_rule = input.parse()?;
+ let _: Token![,] = input.parse()?;
+ let climber = input.parse()?;
+ Ok(PrecClimbArgs {
+ child_rule,
+ climber,
+ })
+ }
+}
+
+fn collect_aliases(
+ imp: &mut ItemImpl,
+) -> Result<HashMap<Ident, Vec<AliasSrc>>> {
+ let functions = imp.items.iter_mut().flat_map(|item| match item {
+ ImplItem::Method(m) => Some(m),
+ _ => None,
+ });
+
+ let mut alias_map = HashMap::new();
+ for function in functions {
+ let fn_name = function.sig.ident.clone();
+ let mut alias_attrs = function
+ .attrs
+ .drain_filter(|attr| attr.path.is_ident("alias"))
+ .collect::<Vec<_>>()
+ .into_iter();
+
+ if let Some(attr) = alias_attrs.next() {
+ let args: AliasArgs = attr.parse_args()?;
+ alias_map.entry(args.target).or_insert_with(Vec::new).push(
+ AliasSrc {
+ ident: fn_name,
+ is_shortcut: args.is_shortcut,
+ },
+ );
+ }
+ if let Some(attr) = alias_attrs.next() {
+ return Err(Error::new(
+ attr.span(),
+ "expected at most one alias attribute",
+ ));
+ }
+ }
+
+ Ok(alias_map)
+}
+
+fn parse_fn<'a>(
+ function: &'a mut ImplItemMethod,
+ alias_map: &mut HashMap<Ident, Vec<AliasSrc>>,
+) -> Result<ParsedFn<'a>> {
+ let fn_name = function.sig.ident.clone();
+ // Get the name of the first (`input`) function argument
+ let input_arg = function.sig.inputs.first().ok_or_else(|| {
+ Error::new(
+ function.sig.inputs.span(),
+ "a rule function needs an `input` argument",
+ )
+ })?;
+ let input_arg = match &input_arg {
+ FnArg::Receiver(_) => return Err(Error::new(
+ input_arg.span(),
+ "a rule function should not have a `self` argument",
+ )),
+ FnArg::Typed(input_arg) => match &*input_arg.pat{
+ Pat::Ident(ident) => ident.ident.clone(),
+ _ => return Err(Error::new(
+ input_arg.span(),
+ "this argument should be a plain identifier instead of a pattern",
+ )),
+ }
+ };
+
+ let alias_srcs = alias_map.remove(&fn_name).unwrap_or_else(Vec::new);
+
+ Ok(ParsedFn {
+ function,
+ fn_name,
+ input_arg,
+ alias_srcs,
+ })
+}
+
+fn apply_special_attrs(f: &mut ParsedFn, rule_enum: &Ident) -> Result<()> {
+ let function = &mut *f.function;
+ let fn_name = &f.fn_name;
+ let input_arg = &f.input_arg;
+
+ *function = parse_quote!(
+ #[allow(non_snake_case)]
+ #function
+ );
+
+ // `prec_climb` attr
+ let prec_climb_attrs: Vec<_> = function
+ .attrs
+ .drain_filter(|attr| attr.path.is_ident("prec_climb"))
+ .collect();
+
+ if prec_climb_attrs.len() > 1 {
+ return Err(Error::new(
+ prec_climb_attrs[1].span(),
+ "expected at most one prec_climb attribute",
+ ));
+ } else if prec_climb_attrs.is_empty() {
+ // do nothing
+ } else {
+ let attr = prec_climb_attrs.into_iter().next().unwrap();
+ let PrecClimbArgs {
+ child_rule,
+ climber,
+ } = attr.parse_args()?;
+
+ function.block = parse_quote!({
+ #function
+
+ #climber.climb(
+ #input_arg.as_pair().clone().into_inner(),
+ |p| Self::#child_rule(#input_arg.with_pair(p)),
+ |l, op, r| {
+ #fn_name(#input_arg.clone(), l?, op, r?)
+ },
+ )
+ });
+ // Remove the 3 last arguments to keep only the `input` one
+ function.sig.inputs.pop();
+ function.sig.inputs.pop();
+ function.sig.inputs.pop();
+ // Check that an argument remains
+ function.sig.inputs.first().ok_or_else(|| {
+ Error::new(
+ function.sig.inputs.span(),
+ "a prec_climb function needs 4 arguments",
+ )
+ })?;
+ }
+
+ // `alias` attr
+ if !f.alias_srcs.is_empty() {
+ let aliases = f.alias_srcs.iter().map(|src| &src.ident);
+ let block = &function.block;
+ function.block = parse_quote!({
+ let mut #input_arg = #input_arg;
+ // While the current rule allows shortcutting, and there is a single child, and the
+ // child can still be parsed by the current function, then skip to that child.
+ while <Self as pest_consume::PestConsumer>::allows_shortcut(#input_arg.as_rule()) {
+ if let Some(child) = #input_arg.single_child() {
+ if &<Self as pest_consume::PestConsumer>::rule_alias(child.as_rule())
+ == stringify!(#fn_name) {
+ #input_arg = child;
+ continue;
+ }
+ }
+ break
+ }
+
+ match #input_arg.as_rule() {
+ #(#rule_enum::#aliases => Self::#aliases(#input_arg),)*
+ #rule_enum::#fn_name => #block,
+ r => unreachable!(
+ "make_parser: called {} on {:?}",
+ stringify!(#fn_name),
+ r
+ )
+ }
+ });
+ }
+
+ Ok(())
+}
+
+pub fn make_parser(
+ attrs: proc_macro::TokenStream,
+ input: proc_macro::TokenStream,
+) -> Result<proc_macro2::TokenStream> {
+ let rule_enum: Ident = syn::parse(attrs)?;
+ let mut imp: ItemImpl = syn::parse(input)?;
+
+ let mut alias_map = collect_aliases(&mut imp)?;
+ let rule_alias_branches: Vec<_> = alias_map
+ .iter()
+ .flat_map(|(tgt, srcs)| iter::repeat(tgt).zip(srcs))
+ .map(|(tgt, src)| {
+ let ident = &src.ident;
+ quote!(
+ #rule_enum::#ident => stringify!(#tgt).to_string(),
+ )
+ })
+ .collect();
+ let shortcut_branches: Vec<_> = alias_map
+ .iter()
+ .flat_map(|(_tgt, srcs)| srcs)
+ .map(|AliasSrc { ident, is_shortcut }| {
+ quote!(
+ #rule_enum::#ident => #is_shortcut,
+ )
+ })
+ .collect();
+
+ let fn_map: HashMap<Ident, ParsedFn> = imp
+ .items
+ .iter_mut()
+ .flat_map(|item| match item {
+ ImplItem::Method(m) => Some(m),
+ _ => None,
+ })
+ .map(|method| {
+ let mut f = parse_fn(method, &mut alias_map)?;
+ apply_special_attrs(&mut f, &rule_enum)?;
+ Ok((f.fn_name.clone(), f))
+ })
+ .collect::<Result<_>>()?;
+
+ // Entries that remain in the alias map don't have a matching method, so we create one.
+ let extra_fns: Vec<_> = alias_map
+ .iter()
+ .map(|(tgt, srcs)| {
+ // Get the signature of one of the functions that has this alias. They should all have
+ // essentially the same signature anyways.
+ let f = fn_map.get(&srcs.first().unwrap().ident).unwrap();
+ let input_arg = f.input_arg.clone();
+ let mut sig = f.function.sig.clone();
+ sig.ident = tgt.clone();
+ let srcs = srcs.iter().map(|src| &src.ident);
+
+ Ok(parse_quote!(
+ #sig {
+ match #input_arg.as_rule() {
+ #(#rule_enum::#srcs => Self::#srcs(#input_arg),)*
+ // We can't match on #rule_enum::#tgt since `tgt` might be an arbitrary
+ // identifier.
+ r if &format!("{:?}", r) == stringify!(#tgt) =>
+ return Err(#input_arg.error(format!(
+ "make_parser: missing method for rule {}",
+ stringify!(#tgt),
+ ))),
+ r => unreachable!(
+ "make_parser: called {} on {:?}",
+ stringify!(#tgt),
+ r
+ )
+ }
+ }
+ ))
+ })
+ .collect::<Result<_>>()?;
+ imp.items.extend(extra_fns);
+
+ let ty = &imp.self_ty;
+ let (impl_generics, _, where_clause) = imp.generics.split_for_impl();
+ Ok(quote!(
+ impl #impl_generics pest_consume::PestConsumer for #ty #where_clause {
+ type Rule = #rule_enum;
+ fn rule_alias(rule: Self::Rule) -> String {
+ match rule {
+ #(#rule_alias_branches)*
+ r => format!("{:?}", r),
+ }
+ }
+ fn allows_shortcut(rule: Self::Rule) -> bool {
+ match rule {
+ #(#shortcut_branches)*
+ _ => false,
+ }
+ }
+ }
+
+ #imp
+ ))
+}
diff --git a/pest_consume_macros/src/parse_children.rs b/pest_consume_macros/src/parse_children.rs
new file mode 100644
index 0000000..d6474a7
--- /dev/null
+++ b/pest_consume_macros/src/parse_children.rs
@@ -0,0 +1,215 @@
+use proc_macro2::{Span, TokenStream};
+use quote::quote;
+use syn::parse::{Parse, ParseStream, Result};
+use syn::punctuated::Punctuated;
+use syn::spanned::Spanned;
+use syn::{bracketed, parenthesized, token, Error, Expr, Ident, Pat, Token};
+
+#[derive(Debug, Clone)]
+struct ChildrenBranch {
+ pattern_span: Span,
+ pattern: Punctuated<ChildrenBranchPatternItem, Token![,]>,
+ body: Expr,
+}
+
+#[derive(Debug, Clone)]
+enum ChildrenBranchPatternItem {
+ Single { rule_name: Ident, binder: Pat },
+ Multiple { rule_name: Ident, binder: Ident },
+}
+
+#[derive(Debug, Clone)]
+struct ParseChildrenInput {
+ input_expr: Expr,
+ branches: Punctuated<ChildrenBranch, Token![,]>,
+}
+
+impl Parse for ChildrenBranch {
+ fn parse(input: ParseStream) -> Result<Self> {
+ let contents;
+ let _: token::Bracket = bracketed!(contents in input);
+ let pattern_unparsed: TokenStream = contents.fork().parse()?;
+ let pattern_span = pattern_unparsed.span();
+ let pattern = Punctuated::parse_terminated(&contents)?;
+ let _: Token![=>] = input.parse()?;
+ let body = input.parse()?;
+
+ Ok(ChildrenBranch {
+ pattern_span,
+ pattern,
+ body,
+ })
+ }
+}
+
+impl Parse for ChildrenBranchPatternItem {
+ fn parse(input: ParseStream) -> Result<Self> {
+ let contents;
+ let rule_name = input.parse()?;
+ parenthesized!(contents in input);
+ if input.peek(Token![..]) {
+ let binder = contents.parse()?;
+ let _: Token![..] = input.parse()?;
+ Ok(ChildrenBranchPatternItem::Multiple { rule_name, binder })
+ } else if input.is_empty() || input.peek(Token![,]) {
+ let binder = contents.parse()?;
+ Ok(ChildrenBranchPatternItem::Single { rule_name, binder })
+ } else {
+ Err(input.error("expected `..` or nothing"))
+ }
+ }
+}
+
+impl Parse for ParseChildrenInput {
+ fn parse(input: ParseStream) -> Result<Self> {
+ let input_expr = input.parse()?;
+ let _: Token![;] = input.parse()?;
+ let branches = Punctuated::parse_terminated(input)?;
+
+ Ok(ParseChildrenInput {
+ input_expr,
+ branches,
+ })
+ }
+}
+
+fn make_parser_branch(
+ branch: &ChildrenBranch,
+ i_inputs: &Ident,
+) -> Result<TokenStream> {
+ use ChildrenBranchPatternItem::{Multiple, Single};
+
+ let body = &branch.body;
+
+ // Convert the input pattern into a pattern-match on the Rules of the children. This uses
+ // slice_patterns.
+ // A single pattern just checks that the rule matches; a variable-length pattern binds the
+ // subslice and checks, in the if-guard, that its elements all match the chosen Rule.
+ let i_variable_pattern =
+ Ident::new("___variable_pattern", Span::call_site());
+ let match_pat = branch.pattern.iter().map(|item| match item {
+ Single { rule_name, .. } => quote!(stringify!(#rule_name)),
+ Multiple { .. } => quote!(#i_variable_pattern @ ..),
+ });
+ let match_filter = branch.pattern.iter().map(|item| match item {
+ Single { .. } => quote!(),
+ Multiple { rule_name, .. } => quote!(
+ {
+ // We can't use .all() directly in the pattern guard; see
+ // https://github.com/rust-lang/rust/issues/59803.
+ let all_match = |slice: &[_]| {
+ slice.iter().all(|r|
+ *r == stringify!(#rule_name)
+ )
+ };
+ all_match(#i_variable_pattern)
+ } &&
+ ),
+ });
+
+ // Once we have found a branch that matches, we need to parse the children.
+ let mut singles_before_multiple = Vec::new();
+ let mut multiple = None;
+ let mut singles_after_multiple = Vec::new();
+ for item in &branch.pattern {
+ match item {
+ Single {
+ rule_name, binder, ..
+ } => {
+ if multiple.is_none() {
+ singles_before_multiple.push((rule_name, binder))
+ } else {
+ singles_after_multiple.push((rule_name, binder))
+ }
+ }
+ Multiple {
+ rule_name, binder, ..
+ } => {
+ if multiple.is_none() {
+ multiple = Some((rule_name, binder))
+ } else {
+ return Err(Error::new(
+ branch.pattern_span.clone(),
+ "multiple variable-length patterns are not allowed",
+ ));
+ }
+ }
+ }
+ }
+ let mut parses = Vec::new();
+ for (rule_name, binder) in singles_before_multiple.into_iter() {
+ parses.push(quote!(
+ let #binder = Self::#rule_name(
+ #i_inputs.next().unwrap()
+ )?;
+ ))
+ }
+ // Note the `rev()`: we are taking inputs from the end of the iterator in reverse order, so that
+ // only the unmatched inputs are left for the variable-length pattern, if any.
+ for (rule_name, binder) in singles_after_multiple.into_iter().rev() {
+ parses.push(quote!(
+ let #binder = Self::#rule_name(
+ #i_inputs.next_back().unwrap()
+ )?;
+ ))
+ }
+ if let Some((rule_name, binder)) = multiple {
+ parses.push(quote!(
+ let #binder = #i_inputs
+ .map(|i| Self::#rule_name(i))
+ .collect::<Result<Vec<_>, _>>()?
+ .into_iter();
+ ))
+ }
+
+ Ok(quote!(
+ [#(#match_pat),*] if #(#match_filter)* true => {
+ #(#parses)*
+ #body
+ }
+ ))
+}
+
+pub fn parse_children(
+ input: proc_macro::TokenStream,
+) -> Result<proc_macro2::TokenStream> {
+ let input: ParseChildrenInput = syn::parse(input)?;
+
+ let i_children_rules = Ident::new("___children_rules", Span::call_site());
+ let i_inputs = Ident::new("___inputs", Span::call_site());
+
+ let input_expr = &input.input_expr;
+ let branches = input
+ .branches
+ .iter()
+ .map(|br| make_parser_branch(br, &i_inputs))
+ .collect::<Result<Vec<_>>>()?;
+
+ Ok(quote!({
+ let #i_children_rules: Vec<_> = #input_expr.as_pair()
+ .clone()
+ .into_inner()
+ .map(|p| p.as_rule())
+ .map(<Self as pest_consume::PestConsumer>::rule_alias)
+ .collect();
+ let #i_children_rules: Vec<&str> = #i_children_rules
+ .iter()
+ .map(String::as_str)
+ .collect();
+
+ #[allow(unused_mut)]
+ let mut #i_inputs = #input_expr
+ .as_pair()
+ .clone()
+ .into_inner()
+ .map(|p| #input_expr.with_pair(p));
+
+ #[allow(unreachable_code)]
+ match #i_children_rules.as_slice() {
+ #(#branches,)*
+ [..] => return Err(#input_expr.error(
+ format!("Unexpected children: {:?}", #i_children_rules)
+ )),
+ }
+ }))
+}