summaryrefslogtreecommitdiff
path: root/pest_consume_macros
diff options
context:
space:
mode:
authorNadrieril2019-09-18 22:37:30 +0200
committerNadrieril2019-09-18 22:37:30 +0200
commitbf417fadb206d6d2351a13cd7c6988977a46dd33 (patch)
tree06effc0f10795a208e929bdf9fdbbbfa6d94cc31 /pest_consume_macros
parentf8341503c778db92f46fa9f6f368a2013e4c0c1a (diff)
Extract pest_consume into its own crate
Diffstat (limited to 'pest_consume_macros')
-rw-r--r--pest_consume_macros/Cargo.toml19
-rw-r--r--pest_consume_macros/src/lib.rs27
-rw-r--r--pest_consume_macros/src/make_parser.rs424
-rw-r--r--pest_consume_macros/src/match_nodes.rs248
4 files changed, 0 insertions, 718 deletions
diff --git a/pest_consume_macros/Cargo.toml b/pest_consume_macros/Cargo.toml
deleted file mode 100644
index aaa0b10..0000000
--- a/pest_consume_macros/Cargo.toml
+++ /dev/null
@@ -1,19 +0,0 @@
-[package]
-name = "pest_consume_macros"
-version = "0.1.0" # remember to update html_root_url
-authors = ["Nadrieril <nadrieril@users.noreply.github.com>"]
-license = "MIT OR Apache-2.0"
-edition = "2018"
-description = "Macros for pest_consume"
-readme = "README.md"
-repository = "https://github.com/Nadrieril/dhall-rust"
-
-[lib]
-proc-macro = true
-doctest = false
-
-[dependencies]
-quote = "1.0.2"
-proc-macro2 = "1.0.2"
-proc-macro-hack = "0.5.9"
-syn = { version = "1.0.5", features = ["full"] }
diff --git a/pest_consume_macros/src/lib.rs b/pest_consume_macros/src/lib.rs
deleted file mode 100644
index d726b5d..0000000
--- a/pest_consume_macros/src/lib.rs
+++ /dev/null
@@ -1,27 +0,0 @@
-//! This crate contains the code-generation primitives for the [dhall-rust][dhall-rust] crate.
-//! This is highly unstable and breaks regularly; use at your own risk.
-//!
-//! [dhall-rust]: https://github.com/Nadrieril/dhall-rust
-
-extern crate proc_macro;
-
-mod make_parser;
-mod match_nodes;
-
-use proc_macro::TokenStream;
-
-#[proc_macro_attribute]
-pub fn parser(attrs: TokenStream, input: TokenStream) -> TokenStream {
- TokenStream::from(match make_parser::make_parser(attrs, input) {
- Ok(tokens) => tokens,
- Err(err) => err.to_compile_error(),
- })
-}
-
-#[proc_macro_hack::proc_macro_hack]
-pub fn match_nodes(input: TokenStream) -> TokenStream {
- TokenStream::from(match match_nodes::match_nodes(input) {
- Ok(tokens) => tokens,
- Err(err) => err.to_compile_error(),
- })
-}
diff --git a/pest_consume_macros/src/make_parser.rs b/pest_consume_macros/src/make_parser.rs
deleted file mode 100644
index 5bf8fe3..0000000
--- a/pest_consume_macros/src/make_parser.rs
+++ /dev/null
@@ -1,424 +0,0 @@
-use std::collections::HashMap;
-use std::iter;
-
-use quote::quote;
-use syn::parse::{Parse, ParseStream, Result};
-use syn::spanned::Spanned;
-use syn::{
- parse_quote, Error, Expr, FnArg, Ident, ImplItem, ImplItemMethod, ItemImpl,
- LitBool, Pat, Path, Token,
-};
-
-/// Ext. trait adding `partition_filter` to `Vec`. Would like to use `Vec::drain_filter`
-/// but it's unstable for now.
-pub trait VecPartitionFilterExt<Item> {
- fn partition_filter<F>(&mut self, predicate: F) -> Vec<Item>
- where
- F: FnMut(&mut Item) -> bool;
-}
-
-impl<Item> VecPartitionFilterExt<Item> for Vec<Item> {
- fn partition_filter<F>(&mut self, mut predicate: F) -> Vec<Item>
- where
- F: FnMut(&mut Item) -> bool,
- {
- let mut ret = Vec::new();
- let mut i = 0;
- while i != self.len() {
- if predicate(&mut self[i]) {
- ret.push(self.remove(i))
- } else {
- i += 1;
- }
- }
- ret
- }
-}
-
-mod kw {
- syn::custom_keyword!(shortcut);
- syn::custom_keyword!(rule);
- syn::custom_keyword!(parser);
-}
-
-struct MakeParserAttrs {
- parser: Path,
- rule_enum: Path,
-}
-
-struct AliasArgs {
- target: Ident,
- is_shortcut: bool,
-}
-
-struct PrecClimbArgs {
- child_rule: Ident,
- climber: Expr,
-}
-
-struct AliasSrc {
- ident: Ident,
- is_shortcut: bool,
-}
-
-struct ParsedFn<'a> {
- // Body of the function
- function: &'a mut ImplItemMethod,
- // Name of the function.
- fn_name: Ident,
- // Name of the first argument of the function, which should be of type `Node`.
- input_arg: Ident,
- // List of aliases pointing to this function
- alias_srcs: Vec<AliasSrc>,
-}
-
-impl Parse for MakeParserAttrs {
- fn parse(input: ParseStream) -> Result<Self> {
- // By default, the pest parser is the same type as the pest_consume one
- let mut parser = parse_quote!(Self);
- // By default, use the `Rule` type in scope
- let mut rule_enum = parse_quote!(Rule);
-
- while !input.is_empty() {
- let lookahead = input.lookahead1();
- if lookahead.peek(kw::parser) {
- let _: kw::parser = input.parse()?;
- let _: Token![=] = input.parse()?;
- parser = input.parse()?;
- } else if lookahead.peek(kw::rule) {
- let _: kw::rule = input.parse()?;
- let _: Token![=] = input.parse()?;
- rule_enum = input.parse()?;
- } else {
- return Err(lookahead.error());
- }
-
- if input.peek(Token![,]) {
- let _: Token![,] = input.parse()?;
- } else {
- break;
- }
- }
-
- Ok(MakeParserAttrs { parser, rule_enum })
- }
-}
-
-impl Parse for AliasArgs {
- fn parse(input: ParseStream) -> Result<Self> {
- let target = input.parse()?;
- let is_shortcut = if input.peek(Token![,]) {
- // #[alias(rule, shortcut = true)]
- let _: Token![,] = input.parse()?;
- let _: kw::shortcut = input.parse()?;
- let _: Token![=] = input.parse()?;
- let b: LitBool = input.parse()?;
- b.value
- } else {
- // #[alias(rule)]
- false
- };
- Ok(AliasArgs {
- target,
- is_shortcut,
- })
- }
-}
-
-impl Parse for PrecClimbArgs {
- fn parse(input: ParseStream) -> Result<Self> {
- let child_rule = input.parse()?;
- let _: Token![,] = input.parse()?;
- let climber = input.parse()?;
- Ok(PrecClimbArgs {
- child_rule,
- climber,
- })
- }
-}
-
-fn collect_aliases(
- imp: &mut ItemImpl,
-) -> Result<HashMap<Ident, Vec<AliasSrc>>> {
- let functions = imp.items.iter_mut().flat_map(|item| match item {
- ImplItem::Method(m) => Some(m),
- _ => None,
- });
-
- let mut alias_map = HashMap::new();
- for function in functions {
- let fn_name = function.sig.ident.clone();
- let mut alias_attrs = function
- .attrs
- .partition_filter(|attr| attr.path.is_ident("alias"))
- .into_iter();
-
- if let Some(attr) = alias_attrs.next() {
- let args: AliasArgs = attr.parse_args()?;
- alias_map.entry(args.target).or_insert_with(Vec::new).push(
- AliasSrc {
- ident: fn_name,
- is_shortcut: args.is_shortcut,
- },
- );
- } else {
- // Self entry
- alias_map
- .entry(fn_name.clone())
- .or_insert_with(Vec::new)
- .push(AliasSrc {
- ident: fn_name,
- is_shortcut: false,
- });
- }
- if let Some(attr) = alias_attrs.next() {
- return Err(Error::new(
- attr.span(),
- "expected at most one alias attribute",
- ));
- }
- }
-
- Ok(alias_map)
-}
-
-fn parse_fn<'a>(
- function: &'a mut ImplItemMethod,
- alias_map: &mut HashMap<Ident, Vec<AliasSrc>>,
-) -> Result<ParsedFn<'a>> {
- let fn_name = function.sig.ident.clone();
- // Get the name of the first (`input`) function argument
- let input_arg = function.sig.inputs.first().ok_or_else(|| {
- Error::new(
- function.sig.inputs.span(),
- "a rule function needs an `input` argument",
- )
- })?;
- let input_arg = match &input_arg {
- FnArg::Receiver(_) => return Err(Error::new(
- input_arg.span(),
- "a rule function should not have a `self` argument",
- )),
- FnArg::Typed(input_arg) => match &*input_arg.pat{
- Pat::Ident(ident) => ident.ident.clone(),
- _ => return Err(Error::new(
- input_arg.span(),
- "this argument should be a plain identifier instead of a pattern",
- )),
- }
- };
-
- let alias_srcs = alias_map.remove(&fn_name).unwrap_or_else(Vec::new);
-
- Ok(ParsedFn {
- function,
- fn_name,
- input_arg,
- alias_srcs,
- })
-}
-
-fn apply_special_attrs(f: &mut ParsedFn, rule_enum: &Path) -> Result<()> {
- let function = &mut *f.function;
- let fn_name = &f.fn_name;
- let input_arg = &f.input_arg;
-
- *function = parse_quote!(
- #[allow(non_snake_case)]
- #function
- );
-
- // `prec_climb` attr
- let prec_climb_attrs: Vec<_> = function
- .attrs
- .partition_filter(|attr| attr.path.is_ident("prec_climb"));
-
- if prec_climb_attrs.len() > 1 {
- return Err(Error::new(
- prec_climb_attrs[1].span(),
- "expected at most one prec_climb attribute",
- ));
- } else if prec_climb_attrs.is_empty() {
- // do nothing
- } else {
- let attr = prec_climb_attrs.into_iter().next().unwrap();
- let PrecClimbArgs {
- child_rule,
- climber,
- } = attr.parse_args()?;
-
- function.block = parse_quote!({
- #function
-
- #climber.climb(
- #input_arg.as_pair().clone().into_inner(),
- |p| Self::#child_rule(#input_arg.with_pair(p)),
- |l, op, r| {
- #fn_name(#input_arg.clone(), l?, op, r?)
- },
- )
- });
- // Remove the 3 last arguments to keep only the `input` one
- function.sig.inputs.pop();
- function.sig.inputs.pop();
- function.sig.inputs.pop();
- // Check that an argument remains
- function.sig.inputs.first().ok_or_else(|| {
- Error::new(
- function.sig.inputs.span(),
- "a prec_climb function needs 4 arguments",
- )
- })?;
- }
-
- // `alias` attr
- // f.alias_srcs has always at least 1 element because it has an entry pointing from itself.
- if f.alias_srcs.len() > 1 {
- let aliases = f
- .alias_srcs
- .iter()
- .map(|src| &src.ident)
- .filter(|i| i != &fn_name);
- let block = &function.block;
- let self_ty = quote!(<Self as ::pest_consume::Parser>);
- function.block = parse_quote!({
- let mut #input_arg = #input_arg;
- // While the current rule allows shortcutting, and there is a single child, and the
- // child can still be parsed by the current function, then skip to that child.
- while #self_ty::allows_shortcut(#input_arg.as_rule()) {
- if let ::std::option::Option::Some(child) = #input_arg.single_child() {
- if child.as_aliased_rule::<Self>() == #self_ty::AliasedRule::#fn_name {
- #input_arg = child;
- continue;
- }
- }
- break
- }
-
- match #input_arg.as_rule() {
- #(#rule_enum::#aliases => Self::#aliases(#input_arg),)*
- #rule_enum::#fn_name => #block,
- r => ::std::unreachable!(
- "make_parser: called {} on {:?}",
- ::std::stringify!(#fn_name),
- r
- )
- }
- });
- }
-
- Ok(())
-}
-
-pub fn make_parser(
- attrs: proc_macro::TokenStream,
- input: proc_macro::TokenStream,
-) -> Result<proc_macro2::TokenStream> {
- let attrs: MakeParserAttrs = syn::parse(attrs)?;
- let parser = &attrs.parser;
- let rule_enum = &attrs.rule_enum;
- let mut imp: ItemImpl = syn::parse(input)?;
-
- let mut alias_map = collect_aliases(&mut imp)?;
- let rule_alias_branches: Vec<_> = alias_map
- .iter()
- .flat_map(|(tgt, srcs)| iter::repeat(tgt).zip(srcs))
- .map(|(tgt, src)| {
- let ident = &src.ident;
- quote!(
- #rule_enum::#ident => Self::AliasedRule::#tgt,
- )
- })
- .collect();
- let aliased_rule_variants: Vec<_> =
- alias_map.iter().map(|(tgt, _)| tgt.clone()).collect();
- let shortcut_branches: Vec<_> = alias_map
- .iter()
- .flat_map(|(_tgt, srcs)| srcs)
- .map(|AliasSrc { ident, is_shortcut }| {
- quote!(
- #rule_enum::#ident => #is_shortcut,
- )
- })
- .collect();
-
- let fn_map: HashMap<Ident, ParsedFn> = imp
- .items
- .iter_mut()
- .flat_map(|item| match item {
- ImplItem::Method(m) => Some(m),
- _ => None,
- })
- .map(|method| {
- let mut f = parse_fn(method, &mut alias_map)?;
- apply_special_attrs(&mut f, &rule_enum)?;
- Ok((f.fn_name.clone(), f))
- })
- .collect::<Result<_>>()?;
-
- // Entries that remain in the alias map don't have a matching method, so we create one.
- let extra_fns: Vec<_> = alias_map
- .iter()
- .map(|(tgt, srcs)| {
- // Get the signature of one of the functions that has this alias. They should all have
- // essentially the same signature anyways.
- let f = fn_map.get(&srcs.first().unwrap().ident).unwrap();
- let input_arg = f.input_arg.clone();
- let mut sig = f.function.sig.clone();
- sig.ident = tgt.clone();
- let srcs = srcs.iter().map(|src| &src.ident);
-
- Ok(parse_quote!(
- #sig {
- match #input_arg.as_rule() {
- #(#rule_enum::#srcs => Self::#srcs(#input_arg),)*
- // We can't match on #rule_enum::#tgt since `tgt` might be an arbitrary
- // identifier.
- r if &::std::format!("{:?}", r) == ::std::stringify!(#tgt) =>
- return ::std::result::Result::Err(#input_arg.error(::std::format!(
- "make_parser: missing method for rule {}",
- ::std::stringify!(#tgt),
- ))),
- r => ::std::unreachable!(
- "make_parser: called {} on {:?}",
- ::std::stringify!(#tgt),
- r
- )
- }
- }
- ))
- })
- .collect::<Result<_>>()?;
- imp.items.extend(extra_fns);
-
- let ty = &imp.self_ty;
- let (impl_generics, _, where_clause) = imp.generics.split_for_impl();
- Ok(quote!(
- #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
- #[allow(non_camel_case_types)]
- enum AliasedRule {
- #(#aliased_rule_variants,)*
- }
-
- impl #impl_generics ::pest_consume::Parser for #ty #where_clause {
- type Rule = #rule_enum;
- type AliasedRule = AliasedRule;
- type Parser = #parser;
- fn rule_alias(rule: Self::Rule) -> Self::AliasedRule {
- match rule {
- #(#rule_alias_branches)*
- // TODO: return a proper error ?
- r => ::std::unreachable!("Rule {:?} does not have a corresponding parsing method", r),
- }
- }
- fn allows_shortcut(rule: Self::Rule) -> bool {
- match rule {
- #(#shortcut_branches)*
- _ => false,
- }
- }
- }
-
- #imp
- ))
-}
diff --git a/pest_consume_macros/src/match_nodes.rs b/pest_consume_macros/src/match_nodes.rs
deleted file mode 100644
index a91d1f4..0000000
--- a/pest_consume_macros/src/match_nodes.rs
+++ /dev/null
@@ -1,248 +0,0 @@
-use proc_macro2::{Span, TokenStream};
-use quote::quote;
-use syn::parse::{Parse, ParseStream, Result};
-use syn::punctuated::Punctuated;
-use syn::spanned::Spanned;
-use syn::{
- bracketed, parenthesized, parse_quote, token, Error, Expr, Ident, Pat,
- Token, Type,
-};
-
-#[derive(Clone)]
-struct MatchBranch {
- // Patterns all have the form [a, b, c.., d], with a bunch of simple patterns,
- // optionally a multiple pattern, and then some more simple patterns.
- singles_before_multiple: Vec<(Ident, Pat)>,
- multiple: Option<(Ident, Ident)>,
- singles_after_multiple: Vec<(Ident, Pat)>,
-
- body: Expr,
-}
-
-#[derive(Clone)]
-enum MatchBranchPatternItem {
- Single {
- rule_name: Ident,
- binder: Pat,
- },
- Multiple {
- rule_name: Ident,
- binder: Ident,
- slice_token: Token![..],
- },
-}
-
-#[derive(Clone)]
-struct MacroInput {
- parser: Type,
- input_expr: Expr,
- branches: Punctuated<MatchBranch, Token![,]>,
-}
-
-impl Parse for MatchBranch {
- fn parse(input: ParseStream) -> Result<Self> {
- let contents;
- let _: token::Bracket = bracketed!(contents in input);
-
- let pattern: Punctuated<MatchBranchPatternItem, Token![,]> =
- Punctuated::parse_terminated(&contents)?;
- use MatchBranchPatternItem::{Multiple, Single};
- let mut singles_before_multiple = Vec::new();
- let mut multiple = None;
- let mut singles_after_multiple = Vec::new();
- for item in pattern.clone() {
- match item {
- Single { rule_name, binder } => {
- if multiple.is_none() {
- singles_before_multiple.push((rule_name, binder))
- } else {
- singles_after_multiple.push((rule_name, binder))
- }
- }
- Multiple {
- rule_name,
- binder,
- slice_token,
- } => {
- if multiple.is_none() {
- multiple = Some((rule_name, binder))
- } else {
- return Err(Error::new(
- slice_token.span(),
- "multiple variable-length patterns are not allowed",
- ));
- }
- }
- }
- }
-
- let _: Token![=>] = input.parse()?;
- let body = input.parse()?;
-
- Ok(MatchBranch {
- singles_before_multiple,
- multiple,
- singles_after_multiple,
- body,
- })
- }
-}
-
-impl Parse for MatchBranchPatternItem {
- fn parse(input: ParseStream) -> Result<Self> {
- let contents;
- let rule_name = input.parse()?;
- parenthesized!(contents in input);
- if input.peek(Token![..]) {
- let binder = contents.parse()?;
- let slice_token = input.parse()?;
- Ok(MatchBranchPatternItem::Multiple {
- rule_name,
- binder,
- slice_token,
- })
- } else if input.is_empty() || input.peek(Token![,]) {
- let binder = contents.parse()?;
- Ok(MatchBranchPatternItem::Single { rule_name, binder })
- } else {
- Err(input.error("expected `..` or nothing"))
- }
- }
-}
-
-impl Parse for MacroInput {
- fn parse(input: ParseStream) -> Result<Self> {
- let parser = if input.peek(token::Lt) {
- let _: token::Lt = input.parse()?;
- let parser = input.parse()?;
- let _: token::Gt = input.parse()?;
- let _: Token![;] = input.parse()?;
- parser
- } else {
- parse_quote!(Self)
- };
- let input_expr = input.parse()?;
- let _: Token![;] = input.parse()?;
- let branches = Punctuated::parse_terminated(input)?;
-
- Ok(MacroInput {
- parser,
- input_expr,
- branches,
- })
- }
-}
-
-fn make_branch(
- branch: &MatchBranch,
- i_nodes: &Ident,
- i_node_rules: &Ident,
- parser: &Type,
-) -> Result<TokenStream> {
- let aliased_rule = quote!(<#parser as ::pest_consume::Parser>::AliasedRule);
-
- // Find which branch to take
- let mut conditions = Vec::new();
- let start = branch.singles_before_multiple.len();
- let end = branch.singles_after_multiple.len();
- conditions.push(quote!(
- #start + #end <= #i_node_rules.len()
- ));
- for (i, (rule_name, _)) in branch.singles_before_multiple.iter().enumerate()
- {
- conditions.push(quote!(
- #i_node_rules[#i] == #aliased_rule::#rule_name
- ))
- }
- for (i, (rule_name, _)) in branch.singles_after_multiple.iter().enumerate()
- {
- conditions.push(quote!(
- #i_node_rules[#i_node_rules.len()-1 - #i] == #aliased_rule::#rule_name
- ))
- }
- if let Some((rule_name, _)) = &branch.multiple {
- conditions.push(quote!(
- {
- // We can't use .all() directly in the pattern guard; see
- // https://github.com/rust-lang/rust/issues/59803.
- let all_match = |slice: &[_]| {
- slice.iter().all(|r|
- *r == #aliased_rule::#rule_name
- )
- };
- all_match(&#i_node_rules[#start..#i_node_rules.len() - #end])
- }
- ))
- } else {
- // No variable-length pattern, so the size must be exactly the number of patterns
- conditions.push(quote!(
- #start + #end == #i_node_rules.len()
- ))
- }
-
- // Once we have found a branch that matches, we need to parse the nodes.
- let mut parses = Vec::new();
- for (rule_name, binder) in branch.singles_before_multiple.iter() {
- parses.push(quote!(
- let #binder = #parser::#rule_name(
- #i_nodes.next().unwrap()
- )?;
- ))
- }
- // Note the `rev()`: we are taking nodes from the end of the iterator in reverse order, so that
- // only the unmatched nodes are left in the iterator for the variable-length pattern, if any.
- for (rule_name, binder) in branch.singles_after_multiple.iter().rev() {
- parses.push(quote!(
- let #binder = #parser::#rule_name(
- #i_nodes.next_back().unwrap()
- )?;
- ))
- }
- if let Some((rule_name, binder)) = &branch.multiple {
- parses.push(quote!(
- let #binder = #i_nodes
- .map(|i| #parser::#rule_name(i))
- .collect::<::std::result::Result<::std::vec::Vec<_>, _>>()?
- .into_iter();
- ))
- }
-
- let body = &branch.body;
- Ok(quote!(
- _ if #(#conditions &&)* true => {
- #(#parses)*
- #body
- }
- ))
-}
-
-pub fn match_nodes(
- input: proc_macro::TokenStream,
-) -> Result<proc_macro2::TokenStream> {
- let input: MacroInput = syn::parse(input)?;
-
- let i_nodes = Ident::new("___nodes", input.input_expr.span());
- let i_node_rules = Ident::new("___node_rules", Span::call_site());
-
- let input_expr = &input.input_expr;
- let parser = &input.parser;
- let branches = input
- .branches
- .iter()
- .map(|br| make_branch(br, &i_nodes, &i_node_rules, parser))
- .collect::<Result<Vec<_>>>()?;
-
- Ok(quote!({
- #[allow(unused_mut)]
- let mut #i_nodes = #input_expr;
- let #i_node_rules = #i_nodes.aliased_rules::<#parser>();
-
- #[allow(unreachable_code)]
- match () {
- #(#branches,)*
- _ => return ::std::result::Result::Err(#i_nodes.error(
- std::format!("Nodes didn't match any pattern: {:?}", #i_node_rules)
- )),
- }
- }))
-}