From bf417fadb206d6d2351a13cd7c6988977a46dd33 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Wed, 18 Sep 2019 22:37:30 +0200 Subject: Extract pest_consume into its own crate --- Cargo.lock | 13 +- Cargo.toml | 2 - dhall_syntax/Cargo.toml | 3 +- dhall_syntax/src/parser.rs | 62 +++-- pest_consume/Cargo.toml | 15 -- pest_consume/examples/csv/csv.pest | 15 -- pest_consume/examples/csv/main.rs | 69 ------ pest_consume/src/lib.rs | 325 ------------------------- pest_consume_macros/Cargo.toml | 19 -- pest_consume_macros/src/lib.rs | 27 --- pest_consume_macros/src/make_parser.rs | 424 --------------------------------- pest_consume_macros/src/match_nodes.rs | 248 ------------------- 12 files changed, 36 insertions(+), 1186 deletions(-) delete mode 100644 pest_consume/Cargo.toml delete mode 100644 pest_consume/examples/csv/csv.pest delete mode 100644 pest_consume/examples/csv/main.rs delete mode 100644 pest_consume/src/lib.rs delete mode 100644 pest_consume_macros/Cargo.toml delete mode 100644 pest_consume_macros/src/lib.rs delete mode 100644 pest_consume_macros/src/make_parser.rs delete mode 100644 pest_consume_macros/src/match_nodes.rs diff --git a/Cargo.lock b/Cargo.lock index 82212fe..5e81f7f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -116,8 +116,7 @@ dependencies = [ "lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)", "percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "pest 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "pest_consume 0.1.0", - "pest_derive 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", + "pest_consume 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "take_mut 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", ] @@ -240,17 +239,19 @@ dependencies = [ [[package]] name = "pest_consume" -version = "0.1.0" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "pest 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)", - "pest_consume_macros 0.1.0", + "pest_consume_macros 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)", "pest_derive 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro-hack 0.5.9 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] name = "pest_consume_macros" -version = "0.1.0" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "proc-macro-hack 0.5.9 (registry+https://github.com/rust-lang/crates.io-index)", "proc-macro2 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -522,6 +523,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum output_vt100 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "53cdc5b785b7a58c5aad8216b3dfa114df64b0b06ae6e1501cef91df2fbdf8f9" "checksum percent-encoding 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "d4fd5641d01c8f18a23da7b6fe29298ff4b55afcccdf78973b24cf3175fee32e" "checksum pest 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7e4fb201c5c22a55d8b24fef95f78be52738e5e1361129be1b5e862ecdb6894a" +"checksum pest_consume 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "db19e2b6df75694d2a73accd716c3e2b28d6241ad88ec140a5588eb4486eeb40" +"checksum pest_consume_macros 1.0.1 (registry+https://github.com/rust-lang/crates.io-index)" = "36dc3a65f772c034446335f2a09fa4ea7a3cc471f130acdb06e96225f0ee6da0" "checksum pest_derive 2.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "833d1ae558dc601e9a60366421196a8d94bc0ac980476d0b67e1d0988d72b2d0" "checksum pest_generator 2.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "7b9fcf299b5712d06ee128a556c94709aaa04512c4dffb8ead07c5c998447fc0" "checksum pest_meta 2.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "df43fd99896fd72c485fe47542c7b500e4ac1e8700bf995544d1317a60ded547" diff --git a/Cargo.toml b/Cargo.toml index ec6a760..1e40748 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,8 +8,6 @@ members = [ "dhall_syntax", "dhall_proc_macros", "improved_slice_patterns", - "pest_consume", - "pest_consume_macros", "serde_dhall" ] diff --git a/dhall_syntax/Cargo.toml b/dhall_syntax/Cargo.toml index 8d9cca6..b98c4a4 100644 --- a/dhall_syntax/Cargo.toml +++ b/dhall_syntax/Cargo.toml @@ -12,11 +12,10 @@ doctest = false itertools = "0.8.0" percent-encoding = "2.1.0" pest = "2.1" -pest_derive = "2.1" either = "1.5.2" take_mut = "0.2.2" hex = "0.3.2" lazy_static = "1.4.0" dhall_generated_parser = { path = "../dhall_generated_parser" } dhall_proc_macros = { path = "../dhall_proc_macros" } -pest_consume = { path = "../pest_consume" } +pest_consume = "1.0" diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs index 2af2d92..71fab0f 100644 --- a/dhall_syntax/src/parser.rs +++ b/dhall_syntax/src/parser.rs @@ -1,5 +1,4 @@ use itertools::Itertools; -use pest::iterators::Pair; use pest::prec_climber as pcl; use pest::prec_climber::PrecClimber; use std::rc::Rc; @@ -167,7 +166,7 @@ impl DhallParser { fn double_quote_literal( input: ParseInput, ) -> ParseResult> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [double_quote_chunk(chunks)..] => { chunks.collect() } @@ -177,7 +176,7 @@ impl DhallParser { fn double_quote_chunk( input: ParseInput, ) -> ParseResult> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [expression(e)] => { InterpolatedTextContents::Expr(e) }, @@ -264,7 +263,7 @@ impl DhallParser { fn single_quote_literal( input: ParseInput, ) -> ParseResult> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [single_quote_continue(lines)] => { let newline: ParsedText = "\n".to_string().into(); @@ -301,7 +300,7 @@ impl DhallParser { fn single_quote_continue( input: ParseInput, ) -> ParseResult>>> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [expression(e), single_quote_continue(lines)] => { let c = InterpolatedTextContents::Expr(e); let mut lines = lines; @@ -387,21 +386,15 @@ impl DhallParser { #[alias(expression, shortcut = true)] fn identifier(input: ParseInput) -> ParseResult> { Ok(match_nodes!(input.children(); - [variable(v)] => { - spanned(input, Var(v)) - }, + [variable(v)] => spanned(input, Var(v)), [expression(e)] => e, )) } fn variable(input: ParseInput) -> ParseResult> { - Ok(match_nodes!(input.children(); - [label(l), natural_literal(idx)] => { - V(l, idx) - }, - [label(l)] => { - V(l, 0) - }, + Ok(match_nodes!(input.into_children(); + [label(l), natural_literal(idx)] => V(l, idx), + [label(l)] => V(l, 0), )) } @@ -437,7 +430,7 @@ impl DhallParser { .collect()) } fn path(input: ParseInput) -> ParseResult> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [path_component(components)..] => { components.collect() } @@ -448,7 +441,7 @@ impl DhallParser { fn local( input: ParseInput, ) -> ParseResult>> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [local_path((prefix, p))] => ImportLocation::Local(prefix, p), )) } @@ -457,19 +450,19 @@ impl DhallParser { fn parent_path( input: ParseInput, ) -> ParseResult<(FilePrefix, Vec)> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [path(p)] => (FilePrefix::Parent, p) )) } #[alias(local_path)] fn here_path(input: ParseInput) -> ParseResult<(FilePrefix, Vec)> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [path(p)] => (FilePrefix::Here, p) )) } #[alias(local_path)] fn home_path(input: ParseInput) -> ParseResult<(FilePrefix, Vec)> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [path(p)] => (FilePrefix::Home, p) )) } @@ -477,7 +470,7 @@ impl DhallParser { fn absolute_path( input: ParseInput, ) -> ParseResult<(FilePrefix, Vec)> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [path(p)] => (FilePrefix::Absolute, p) )) } @@ -491,7 +484,7 @@ impl DhallParser { } fn http_raw(input: ParseInput) -> ParseResult>> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [scheme(sch), authority(auth), path(p)] => URL { scheme: sch, authority: auth, @@ -521,7 +514,7 @@ impl DhallParser { fn http( input: ParseInput, ) -> ParseResult>> { - Ok(ImportLocation::Remote(match_nodes!(input.children(); + Ok(ImportLocation::Remote(match_nodes!(input.into_children(); [http_raw(url)] => url, [http_raw(url), expression(e)] => URL { headers: Some(e), ..url }, ))) @@ -531,7 +524,7 @@ impl DhallParser { fn env( input: ParseInput, ) -> ParseResult>> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [environment_variable(v)] => ImportLocation::Env(v), )) } @@ -541,7 +534,7 @@ impl DhallParser { } #[alias(environment_variable)] fn posix_environment_variable(input: ParseInput) -> ParseResult { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [posix_environment_variable_character(chars)..] => { chars.collect() }, @@ -586,7 +579,7 @@ impl DhallParser { ) -> ParseResult>> { use crate::Import; let mode = ImportMode::Code; - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [import_type(location)] => Import { mode, location, hash: None }, [import_type(location), hash(h)] => Import { mode, location, hash: Some(h) }, )) @@ -703,9 +696,8 @@ impl DhallParser { #[alias(expression, shortcut = true)] #[prec_climb(expression, PRECCLIMBER)] fn operator_expression( - input: ParseInput, l: Expr, - op: Pair, + op: ParseInput, r: Expr, ) -> ParseResult> { use crate::BinOp::*; @@ -724,7 +716,7 @@ impl DhallParser { bool_eq => BoolEQ, bool_ne => BoolNE, equivalent => Equivalence, - r => Err(input.error(format!("Rule {:?} isn't an operator", r)))?, + r => Err(op.error(format!("Rule {:?} isn't an operator", r)))?, }; Ok(spanned_union( @@ -812,7 +804,7 @@ impl DhallParser { } fn labels(input: ParseInput) -> ParseResult> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [label(ls)..] => ls.collect(), )) } @@ -865,7 +857,7 @@ impl DhallParser { fn non_empty_record_type( input: ParseInput, ) -> ParseResult<(Expr, DupTreeMap>)> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [expression(expr), record_type_entry(entries)..] => { (expr, entries.collect()) } @@ -875,7 +867,7 @@ impl DhallParser { fn record_type_entry( input: ParseInput, ) -> ParseResult<(Label, Expr)> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [label(name), expression(expr)] => (name, expr) )) } @@ -883,7 +875,7 @@ impl DhallParser { fn non_empty_record_literal( input: ParseInput, ) -> ParseResult<(Expr, DupTreeMap>)> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [expression(expr), record_literal_entry(entries)..] => { (expr, entries.collect()) } @@ -893,7 +885,7 @@ impl DhallParser { fn record_literal_entry( input: ParseInput, ) -> ParseResult<(Label, Expr)> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [label(name), expression(expr)] => (name, expr) )) } @@ -934,7 +926,7 @@ impl DhallParser { #[alias(expression)] fn final_expression(input: ParseInput) -> ParseResult> { - Ok(match_nodes!(input.children(); + Ok(match_nodes!(input.into_children(); [expression(e), EOI(_)] => e )) } diff --git a/pest_consume/Cargo.toml b/pest_consume/Cargo.toml deleted file mode 100644 index 1b8ebf6..0000000 --- a/pest_consume/Cargo.toml +++ /dev/null @@ -1,15 +0,0 @@ -[package] -name = "pest_consume" -version = "0.1.0" # remember to update html_root_url -authors = ["Nadrieril "] -license = "MIT OR Apache-2.0" -edition = "2018" -description = "A framework for processing the output of a pest-generated parser" -readme = "README.md" -repository = "https://github.com/Nadrieril/dhall-rust" - -[dependencies] -pest = "2.1" -pest_derive = "2.1" -proc-macro-hack = "0.5.9" -pest_consume_macros = { path = "../pest_consume_macros" } diff --git a/pest_consume/examples/csv/csv.pest b/pest_consume/examples/csv/csv.pest deleted file mode 100644 index a67af8c..0000000 --- a/pest_consume/examples/csv/csv.pest +++ /dev/null @@ -1,15 +0,0 @@ -WHITESPACE = _{ " "+ } -newline = _{ "\r\n" | "\n" } -number = { "-"? ~ (ASCII_DIGIT | ".")+ } -string = { (!"'" ~ ASCII)* } -field = ${ - number - | "'" ~ string ~ "'" -} -record = { - field ~ ("," ~ field)* - | "" -} -file = { - SOI ~ record ~ (newline ~ record)* ~ newline? ~ EOI -} diff --git a/pest_consume/examples/csv/main.rs b/pest_consume/examples/csv/main.rs deleted file mode 100644 index 88621db..0000000 --- a/pest_consume/examples/csv/main.rs +++ /dev/null @@ -1,69 +0,0 @@ -use pest_consume::{match_nodes, Error, Parser}; - -#[derive(Debug)] -enum CSVField<'a> { - Number(f64), - String(&'a str), -} -type CSVRecord<'a> = Vec>; -type CSVFile<'a> = Vec>; - -type Result = std::result::Result>; -type Node<'i> = pest_consume::Node<'i, Rule, ()>; - -#[derive(Parser)] -#[grammar = "../examples/csv/csv.pest"] -struct CSVParser; - -#[pest_consume::parser] -impl CSVParser { - fn EOI(_input: Node) -> Result<()> { - Ok(()) - } - - fn number(input: Node) -> Result { - input - .as_str() - .parse::() - // `input.error` links the error to the location in the input file where it occurred. - .map_err(|e| input.error(e.to_string())) - } - - fn string(input: Node) -> Result<&str> { - Ok(input.as_str()) - } - - fn field(input: Node) -> Result { - Ok(match_nodes!(input.children(); - [number(n)] => CSVField::Number(n), - [string(s)] => CSVField::String(s), - )) - } - - fn record(input: Node) -> Result { - Ok(match_nodes!(input.children(); - [field(fields)..] => fields.collect(), - )) - } - - fn file(input: Node) -> Result { - Ok(match_nodes!(input.children(); - [record(records).., EOI(_)] => records.collect(), - )) - } -} - -fn parse_csv(input_str: &str) -> Result { - let inputs = CSVParser::parse(Rule::file, input_str)?; - Ok(match_nodes!(; inputs; - [file(e)] => e, - )) -} - -fn main() { - let successful_parse = parse_csv("-273.15, ' a string '\n\n42, 0"); - println!("success: {:?}", successful_parse.unwrap()); - - let unsuccessful_parse = parse_csv("0, 273.15.12"); - println!("failure: {}", unsuccessful_parse.unwrap_err()); -} diff --git a/pest_consume/src/lib.rs b/pest_consume/src/lib.rs deleted file mode 100644 index c1d62e5..0000000 --- a/pest_consume/src/lib.rs +++ /dev/null @@ -1,325 +0,0 @@ -//! `pest_consume` extends [pest] to make it easy to consume a pest parse tree. -//! Given a grammar file, pest generates a parser that outputs an untyped parse tree. Then that -//! parse tree needs to be transformed into whatever datastructures your application uses. -//! `pest_consume` provides two macros to make this easy. -//! -//! Features of `pest_consume` include: -//! - strong types; -//! - consuming children uses an intuitive syntax; -//! - error handling is well integrated. -//! -//! # Example -//! -//! Here is the [CSV example from the doc](https://pest.rs/book/examples/csv.html), -//! using `pest_consume`. -//! -//! The pest grammar file contains: -//! ```text -//! field = { (ASCII_DIGIT | "." | "-")+ } -//! record = { field ~ ("," ~ field)* } -//! file = { SOI ~ (record ~ ("\r\n" | "\n"))* ~ EOI } -//! ``` -//! -//! ```no_run -//! use pest_consume::{match_nodes, Error, Parser}; -//! -//! type Result = std::result::Result>; -//! type Node<'i> = pest_consume::Node<'i, Rule, ()>; -//! -//! // Construct the first half of the parser using pest as usual. -//! #[derive(Parser)] -//! #[grammar = "../examples/csv/csv.pest"] -//! struct CSVParser; -//! -//! // This is the other half of the parser, using pest_consume. -//! #[pest_consume::parser] -//! impl CSVParser { -//! fn EOI(_input: Node) -> Result<()> { -//! Ok(()) -//! } -//! fn field(input: Node) -> Result { -//! input -//! .as_str() -//! .parse::() -//! .map_err(|e| input.error(e.to_string())) -//! } -//! fn record(input: Node) -> Result> { -//! Ok(match_nodes!(input.children(); -//! [field(fields)..] => fields.collect(), -//! )) -//! } -//! fn file(input: Node) -> Result>> { -//! Ok(match_nodes!(input.children(); -//! [record(records).., EOI(_)] => records.collect(), -//! )) -//! } -//! } -//! -//! fn parse_csv(input_str: &str) -> Result>> { -//! let inputs = CSVParser::parse(Rule::file, input_str)?; -//! Ok(match_nodes!(; inputs; -//! [file(e)] => e, -//! )) -//! } -//! -//! fn main() { -//! let parsed = parse_csv("-273.15, 12\n42, 0").unwrap(); -//! let mut sum = 0.; -//! for record in parsed { -//! for field in record { -//! sum += field; -//! } -//! } -//! println!("{}", sum); -//! } -//! ``` -//! -//! There are several things to note: -//! - we use two macros provided by `pest_consume`: `parser` and `match_nodes`; -//! - there is one `fn` item per (non-silent) rule in the grammar; -//! - we associate an output type to every rule; -//! - there is no need to fiddle with `.into_inner()`, `.next()` or `.unwrap()`, as is common when using pest -//! -//! # How it works -//! -//! The main types of this crate ([Node], [Nodes] and [Parser]) are mostly wrappers around -//! the corresponding [pest] types. -//! -//! The `pest_consume::parser` macro does almost nothing when not using advanced features; -//! most of the magic happens in `match_nodes`. -//! `match_nodes` desugars rather straightforwardly into calls to the `fn` items corresponding to -//! the rules matched on. -//! For example: -//! ```ignore -//! match_nodes!(input.children(); -//! [field(fields)..] => fields.collect(), -//! ) -//! ``` -//! desugars into: -//! ```ignore -//! let nodes = { input.children() }; -//! if ... { // check that all rules in `nodes` are the `field` rule -//! let fields = nodes -//! .map(|node| Self::field(node)) // Recursively parse children nodes -//! ... // Propagate errors -//! { fields.collect() } -//! } else { -//! ... // error because we got unexpected rules -//! } -//! ``` -//! -//! # Advanced features -//! -//! TODO -//! -//! - rule aliasing -//! - rule shortcutting -//! - user data - -pub use pest::error::Error; -use pest::Parser as PestParser; -use pest::RuleType; -pub use pest_derive::Parser; - -#[proc_macro_hack::proc_macro_hack] -pub use pest_consume_macros::match_nodes; -pub use pest_consume_macros::parser; - -mod node { - use super::Parser; - use pest::error::{Error, ErrorVariant}; - use pest::iterators::{Pair, Pairs}; - use pest::Parser as PestParser; - use pest::{RuleType, Span}; - - /// Carries a pest Pair alongside custom user data. - #[derive(Debug, Clone)] - pub struct Node<'input, Rule: RuleType, Data> { - pair: Pair<'input, Rule>, - user_data: Data, - } - - /// Iterator over `Node`s. It is created by `Node::children` or `Nodes::new`. - #[derive(Debug, Clone)] - pub struct Nodes<'input, Rule: RuleType, Data> { - pairs: Pairs<'input, Rule>, - span: Span<'input>, - user_data: Data, - } - - impl<'i, R: RuleType, D> Node<'i, R, D> { - pub fn new(pair: Pair<'i, R>, user_data: D) -> Self { - Node { pair, user_data } - } - /// Create an error that points to the span of the input. - pub fn error(&self, message: String) -> Error { - Error::new_from_span( - ErrorVariant::CustomError { message }, - self.as_span(), - ) - } - /// Reconstruct the input with a new pair, passing the user data along. - pub fn with_pair(&self, new_pair: Pair<'i, R>) -> Self - where - D: Clone, - { - Node { - pair: new_pair, - user_data: self.user_data.clone(), - } - } - /// If the contained pair has exactly one child, return a new Self containing it. - pub fn single_child(&self) -> Option - where - D: Clone, - { - let mut children = self.pair.clone().into_inner(); - if let Some(child) = children.next() { - if children.next().is_none() { - return Some(self.with_pair(child)); - } - } - None - } - /// Return an iterator over the children of this input - // Can't use `-> impl Iterator` because of weird lifetime limitations - // (see https://github.com/rust-lang/rust/issues/61997). - pub fn children(&self) -> Nodes<'i, R, D> - where - D: Clone, - { - Nodes { - pairs: self.as_pair().clone().into_inner(), - span: self.as_span(), - user_data: self.user_data(), - } - } - - pub fn user_data(&self) -> D - where - D: Clone, - { - self.user_data.clone() - } - pub fn as_pair(&self) -> &Pair<'i, R> { - &self.pair - } - pub fn into_pair(self) -> Pair<'i, R> { - self.pair - } - pub fn as_span(&self) -> Span<'i> { - self.pair.as_span() - } - pub fn as_str(&self) -> &'i str { - self.pair.as_str() - } - pub fn as_rule(&self) -> R { - self.pair.as_rule() - } - pub fn as_aliased_rule(&self) -> C::AliasedRule - where - C: Parser, - ::Parser: PestParser, - { - C::rule_alias(self.as_rule()) - } - } - - impl<'i, R: RuleType, D> Nodes<'i, R, D> { - /// `input` must be the _original_ input that `pairs` is pointing to. - pub fn new(input: &'i str, pairs: Pairs<'i, R>, user_data: D) -> Self { - let span = Span::new(input, 0, input.len()).unwrap(); - Nodes { - pairs, - span, - user_data, - } - } - /// Create an error that points to the span of the input. - pub fn error(&self, message: String) -> Error { - Error::new_from_span( - ErrorVariant::CustomError { message }, - self.span.clone(), - ) - } - pub fn aliased_rules(&self) -> Vec - where - D: Clone, - C: Parser, - ::Parser: PestParser, - { - self.clone().map(|p| p.as_aliased_rule::()).collect() - } - /// Reconstruct the input with a new pair, passing the user data along. - fn with_pair(&self, new_pair: Pair<'i, R>) -> Node<'i, R, D> - where - D: Clone, - { - Node::new(new_pair, self.user_data.clone()) - } - - pub fn as_pairs(&self) -> &Pairs<'i, R> { - &self.pairs - } - pub fn into_pairs(self) -> Pairs<'i, R> { - self.pairs - } - } - - impl<'i, R, D> Iterator for Nodes<'i, R, D> - where - R: RuleType, - D: Clone, - { - type Item = Node<'i, R, D>; - - fn next(&mut self) -> Option { - let child_pair = self.pairs.next()?; - let child = self.with_pair(child_pair); - Some(child) - } - } - - impl<'i, R, D> DoubleEndedIterator for Nodes<'i, R, D> - where - R: RuleType, - D: Clone, - { - fn next_back(&mut self) -> Option { - let child_pair = self.pairs.next_back()?; - let child = self.with_pair(child_pair); - Some(child) - } - } -} - -pub use node::{Node, Nodes}; - -/// Used by the macros. -/// Do not implement manually. -pub trait Parser { - type Rule: RuleType; - type AliasedRule: RuleType; - type Parser: PestParser; - - fn rule_alias(rule: Self::Rule) -> Self::AliasedRule; - fn allows_shortcut(rule: Self::Rule) -> bool; - - /// Parses a `&str` starting from `rule` - fn parse<'i>( - rule: Self::Rule, - input_str: &'i str, - ) -> Result, Error> { - Self::parse_with_userdata(rule, input_str, ()) - } - - /// Parses a `&str` starting from `rule`, carrying `user_data` through the parser methods. - fn parse_with_userdata<'i, D>( - rule: Self::Rule, - input_str: &'i str, - user_data: D, - ) -> Result, Error> { - let pairs = Self::Parser::parse(rule, input_str)?; - Ok(Nodes::new(input_str, pairs, user_data)) - } -} diff --git a/pest_consume_macros/Cargo.toml b/pest_consume_macros/Cargo.toml deleted file mode 100644 index aaa0b10..0000000 --- a/pest_consume_macros/Cargo.toml +++ /dev/null @@ -1,19 +0,0 @@ -[package] -name = "pest_consume_macros" -version = "0.1.0" # remember to update html_root_url -authors = ["Nadrieril "] -license = "MIT OR Apache-2.0" -edition = "2018" -description = "Macros for pest_consume" -readme = "README.md" -repository = "https://github.com/Nadrieril/dhall-rust" - -[lib] -proc-macro = true -doctest = false - -[dependencies] -quote = "1.0.2" -proc-macro2 = "1.0.2" -proc-macro-hack = "0.5.9" -syn = { version = "1.0.5", features = ["full"] } diff --git a/pest_consume_macros/src/lib.rs b/pest_consume_macros/src/lib.rs deleted file mode 100644 index d726b5d..0000000 --- a/pest_consume_macros/src/lib.rs +++ /dev/null @@ -1,27 +0,0 @@ -//! This crate contains the code-generation primitives for the [dhall-rust][dhall-rust] crate. -//! This is highly unstable and breaks regularly; use at your own risk. -//! -//! [dhall-rust]: https://github.com/Nadrieril/dhall-rust - -extern crate proc_macro; - -mod make_parser; -mod match_nodes; - -use proc_macro::TokenStream; - -#[proc_macro_attribute] -pub fn parser(attrs: TokenStream, input: TokenStream) -> TokenStream { - TokenStream::from(match make_parser::make_parser(attrs, input) { - Ok(tokens) => tokens, - Err(err) => err.to_compile_error(), - }) -} - -#[proc_macro_hack::proc_macro_hack] -pub fn match_nodes(input: TokenStream) -> TokenStream { - TokenStream::from(match match_nodes::match_nodes(input) { - Ok(tokens) => tokens, - Err(err) => err.to_compile_error(), - }) -} diff --git a/pest_consume_macros/src/make_parser.rs b/pest_consume_macros/src/make_parser.rs deleted file mode 100644 index 5bf8fe3..0000000 --- a/pest_consume_macros/src/make_parser.rs +++ /dev/null @@ -1,424 +0,0 @@ -use std::collections::HashMap; -use std::iter; - -use quote::quote; -use syn::parse::{Parse, ParseStream, Result}; -use syn::spanned::Spanned; -use syn::{ - parse_quote, Error, Expr, FnArg, Ident, ImplItem, ImplItemMethod, ItemImpl, - LitBool, Pat, Path, Token, -}; - -/// Ext. trait adding `partition_filter` to `Vec`. Would like to use `Vec::drain_filter` -/// but it's unstable for now. -pub trait VecPartitionFilterExt { - fn partition_filter(&mut self, predicate: F) -> Vec - where - F: FnMut(&mut Item) -> bool; -} - -impl VecPartitionFilterExt for Vec { - fn partition_filter(&mut self, mut predicate: F) -> Vec - where - F: FnMut(&mut Item) -> bool, - { - let mut ret = Vec::new(); - let mut i = 0; - while i != self.len() { - if predicate(&mut self[i]) { - ret.push(self.remove(i)) - } else { - i += 1; - } - } - ret - } -} - -mod kw { - syn::custom_keyword!(shortcut); - syn::custom_keyword!(rule); - syn::custom_keyword!(parser); -} - -struct MakeParserAttrs { - parser: Path, - rule_enum: Path, -} - -struct AliasArgs { - target: Ident, - is_shortcut: bool, -} - -struct PrecClimbArgs { - child_rule: Ident, - climber: Expr, -} - -struct AliasSrc { - ident: Ident, - is_shortcut: bool, -} - -struct ParsedFn<'a> { - // Body of the function - function: &'a mut ImplItemMethod, - // Name of the function. - fn_name: Ident, - // Name of the first argument of the function, which should be of type `Node`. - input_arg: Ident, - // List of aliases pointing to this function - alias_srcs: Vec, -} - -impl Parse for MakeParserAttrs { - fn parse(input: ParseStream) -> Result { - // By default, the pest parser is the same type as the pest_consume one - let mut parser = parse_quote!(Self); - // By default, use the `Rule` type in scope - let mut rule_enum = parse_quote!(Rule); - - while !input.is_empty() { - let lookahead = input.lookahead1(); - if lookahead.peek(kw::parser) { - let _: kw::parser = input.parse()?; - let _: Token![=] = input.parse()?; - parser = input.parse()?; - } else if lookahead.peek(kw::rule) { - let _: kw::rule = input.parse()?; - let _: Token![=] = input.parse()?; - rule_enum = input.parse()?; - } else { - return Err(lookahead.error()); - } - - if input.peek(Token![,]) { - let _: Token![,] = input.parse()?; - } else { - break; - } - } - - Ok(MakeParserAttrs { parser, rule_enum }) - } -} - -impl Parse for AliasArgs { - fn parse(input: ParseStream) -> Result { - let target = input.parse()?; - let is_shortcut = if input.peek(Token![,]) { - // #[alias(rule, shortcut = true)] - let _: Token![,] = input.parse()?; - let _: kw::shortcut = input.parse()?; - let _: Token![=] = input.parse()?; - let b: LitBool = input.parse()?; - b.value - } else { - // #[alias(rule)] - false - }; - Ok(AliasArgs { - target, - is_shortcut, - }) - } -} - -impl Parse for PrecClimbArgs { - fn parse(input: ParseStream) -> Result { - let child_rule = input.parse()?; - let _: Token![,] = input.parse()?; - let climber = input.parse()?; - Ok(PrecClimbArgs { - child_rule, - climber, - }) - } -} - -fn collect_aliases( - imp: &mut ItemImpl, -) -> Result>> { - let functions = imp.items.iter_mut().flat_map(|item| match item { - ImplItem::Method(m) => Some(m), - _ => None, - }); - - let mut alias_map = HashMap::new(); - for function in functions { - let fn_name = function.sig.ident.clone(); - let mut alias_attrs = function - .attrs - .partition_filter(|attr| attr.path.is_ident("alias")) - .into_iter(); - - if let Some(attr) = alias_attrs.next() { - let args: AliasArgs = attr.parse_args()?; - alias_map.entry(args.target).or_insert_with(Vec::new).push( - AliasSrc { - ident: fn_name, - is_shortcut: args.is_shortcut, - }, - ); - } else { - // Self entry - alias_map - .entry(fn_name.clone()) - .or_insert_with(Vec::new) - .push(AliasSrc { - ident: fn_name, - is_shortcut: false, - }); - } - if let Some(attr) = alias_attrs.next() { - return Err(Error::new( - attr.span(), - "expected at most one alias attribute", - )); - } - } - - Ok(alias_map) -} - -fn parse_fn<'a>( - function: &'a mut ImplItemMethod, - alias_map: &mut HashMap>, -) -> Result> { - let fn_name = function.sig.ident.clone(); - // Get the name of the first (`input`) function argument - let input_arg = function.sig.inputs.first().ok_or_else(|| { - Error::new( - function.sig.inputs.span(), - "a rule function needs an `input` argument", - ) - })?; - let input_arg = match &input_arg { - FnArg::Receiver(_) => return Err(Error::new( - input_arg.span(), - "a rule function should not have a `self` argument", - )), - FnArg::Typed(input_arg) => match &*input_arg.pat{ - Pat::Ident(ident) => ident.ident.clone(), - _ => return Err(Error::new( - input_arg.span(), - "this argument should be a plain identifier instead of a pattern", - )), - } - }; - - let alias_srcs = alias_map.remove(&fn_name).unwrap_or_else(Vec::new); - - Ok(ParsedFn { - function, - fn_name, - input_arg, - alias_srcs, - }) -} - -fn apply_special_attrs(f: &mut ParsedFn, rule_enum: &Path) -> Result<()> { - let function = &mut *f.function; - let fn_name = &f.fn_name; - let input_arg = &f.input_arg; - - *function = parse_quote!( - #[allow(non_snake_case)] - #function - ); - - // `prec_climb` attr - let prec_climb_attrs: Vec<_> = function - .attrs - .partition_filter(|attr| attr.path.is_ident("prec_climb")); - - if prec_climb_attrs.len() > 1 { - return Err(Error::new( - prec_climb_attrs[1].span(), - "expected at most one prec_climb attribute", - )); - } else if prec_climb_attrs.is_empty() { - // do nothing - } else { - let attr = prec_climb_attrs.into_iter().next().unwrap(); - let PrecClimbArgs { - child_rule, - climber, - } = attr.parse_args()?; - - function.block = parse_quote!({ - #function - - #climber.climb( - #input_arg.as_pair().clone().into_inner(), - |p| Self::#child_rule(#input_arg.with_pair(p)), - |l, op, r| { - #fn_name(#input_arg.clone(), l?, op, r?) - }, - ) - }); - // Remove the 3 last arguments to keep only the `input` one - function.sig.inputs.pop(); - function.sig.inputs.pop(); - function.sig.inputs.pop(); - // Check that an argument remains - function.sig.inputs.first().ok_or_else(|| { - Error::new( - function.sig.inputs.span(), - "a prec_climb function needs 4 arguments", - ) - })?; - } - - // `alias` attr - // f.alias_srcs has always at least 1 element because it has an entry pointing from itself. - if f.alias_srcs.len() > 1 { - let aliases = f - .alias_srcs - .iter() - .map(|src| &src.ident) - .filter(|i| i != &fn_name); - let block = &function.block; - let self_ty = quote!(); - function.block = parse_quote!({ - let mut #input_arg = #input_arg; - // While the current rule allows shortcutting, and there is a single child, and the - // child can still be parsed by the current function, then skip to that child. - while #self_ty::allows_shortcut(#input_arg.as_rule()) { - if let ::std::option::Option::Some(child) = #input_arg.single_child() { - if child.as_aliased_rule::() == #self_ty::AliasedRule::#fn_name { - #input_arg = child; - continue; - } - } - break - } - - match #input_arg.as_rule() { - #(#rule_enum::#aliases => Self::#aliases(#input_arg),)* - #rule_enum::#fn_name => #block, - r => ::std::unreachable!( - "make_parser: called {} on {:?}", - ::std::stringify!(#fn_name), - r - ) - } - }); - } - - Ok(()) -} - -pub fn make_parser( - attrs: proc_macro::TokenStream, - input: proc_macro::TokenStream, -) -> Result { - let attrs: MakeParserAttrs = syn::parse(attrs)?; - let parser = &attrs.parser; - let rule_enum = &attrs.rule_enum; - let mut imp: ItemImpl = syn::parse(input)?; - - let mut alias_map = collect_aliases(&mut imp)?; - let rule_alias_branches: Vec<_> = alias_map - .iter() - .flat_map(|(tgt, srcs)| iter::repeat(tgt).zip(srcs)) - .map(|(tgt, src)| { - let ident = &src.ident; - quote!( - #rule_enum::#ident => Self::AliasedRule::#tgt, - ) - }) - .collect(); - let aliased_rule_variants: Vec<_> = - alias_map.iter().map(|(tgt, _)| tgt.clone()).collect(); - let shortcut_branches: Vec<_> = alias_map - .iter() - .flat_map(|(_tgt, srcs)| srcs) - .map(|AliasSrc { ident, is_shortcut }| { - quote!( - #rule_enum::#ident => #is_shortcut, - ) - }) - .collect(); - - let fn_map: HashMap = imp - .items - .iter_mut() - .flat_map(|item| match item { - ImplItem::Method(m) => Some(m), - _ => None, - }) - .map(|method| { - let mut f = parse_fn(method, &mut alias_map)?; - apply_special_attrs(&mut f, &rule_enum)?; - Ok((f.fn_name.clone(), f)) - }) - .collect::>()?; - - // Entries that remain in the alias map don't have a matching method, so we create one. - let extra_fns: Vec<_> = alias_map - .iter() - .map(|(tgt, srcs)| { - // Get the signature of one of the functions that has this alias. They should all have - // essentially the same signature anyways. - let f = fn_map.get(&srcs.first().unwrap().ident).unwrap(); - let input_arg = f.input_arg.clone(); - let mut sig = f.function.sig.clone(); - sig.ident = tgt.clone(); - let srcs = srcs.iter().map(|src| &src.ident); - - Ok(parse_quote!( - #sig { - match #input_arg.as_rule() { - #(#rule_enum::#srcs => Self::#srcs(#input_arg),)* - // We can't match on #rule_enum::#tgt since `tgt` might be an arbitrary - // identifier. - r if &::std::format!("{:?}", r) == ::std::stringify!(#tgt) => - return ::std::result::Result::Err(#input_arg.error(::std::format!( - "make_parser: missing method for rule {}", - ::std::stringify!(#tgt), - ))), - r => ::std::unreachable!( - "make_parser: called {} on {:?}", - ::std::stringify!(#tgt), - r - ) - } - } - )) - }) - .collect::>()?; - imp.items.extend(extra_fns); - - let ty = &imp.self_ty; - let (impl_generics, _, where_clause) = imp.generics.split_for_impl(); - Ok(quote!( - #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] - #[allow(non_camel_case_types)] - enum AliasedRule { - #(#aliased_rule_variants,)* - } - - impl #impl_generics ::pest_consume::Parser for #ty #where_clause { - type Rule = #rule_enum; - type AliasedRule = AliasedRule; - type Parser = #parser; - fn rule_alias(rule: Self::Rule) -> Self::AliasedRule { - match rule { - #(#rule_alias_branches)* - // TODO: return a proper error ? - r => ::std::unreachable!("Rule {:?} does not have a corresponding parsing method", r), - } - } - fn allows_shortcut(rule: Self::Rule) -> bool { - match rule { - #(#shortcut_branches)* - _ => false, - } - } - } - - #imp - )) -} diff --git a/pest_consume_macros/src/match_nodes.rs b/pest_consume_macros/src/match_nodes.rs deleted file mode 100644 index a91d1f4..0000000 --- a/pest_consume_macros/src/match_nodes.rs +++ /dev/null @@ -1,248 +0,0 @@ -use proc_macro2::{Span, TokenStream}; -use quote::quote; -use syn::parse::{Parse, ParseStream, Result}; -use syn::punctuated::Punctuated; -use syn::spanned::Spanned; -use syn::{ - bracketed, parenthesized, parse_quote, token, Error, Expr, Ident, Pat, - Token, Type, -}; - -#[derive(Clone)] -struct MatchBranch { - // Patterns all have the form [a, b, c.., d], with a bunch of simple patterns, - // optionally a multiple pattern, and then some more simple patterns. - singles_before_multiple: Vec<(Ident, Pat)>, - multiple: Option<(Ident, Ident)>, - singles_after_multiple: Vec<(Ident, Pat)>, - - body: Expr, -} - -#[derive(Clone)] -enum MatchBranchPatternItem { - Single { - rule_name: Ident, - binder: Pat, - }, - Multiple { - rule_name: Ident, - binder: Ident, - slice_token: Token![..], - }, -} - -#[derive(Clone)] -struct MacroInput { - parser: Type, - input_expr: Expr, - branches: Punctuated, -} - -impl Parse for MatchBranch { - fn parse(input: ParseStream) -> Result { - let contents; - let _: token::Bracket = bracketed!(contents in input); - - let pattern: Punctuated = - Punctuated::parse_terminated(&contents)?; - use MatchBranchPatternItem::{Multiple, Single}; - let mut singles_before_multiple = Vec::new(); - let mut multiple = None; - let mut singles_after_multiple = Vec::new(); - for item in pattern.clone() { - match item { - Single { rule_name, binder } => { - if multiple.is_none() { - singles_before_multiple.push((rule_name, binder)) - } else { - singles_after_multiple.push((rule_name, binder)) - } - } - Multiple { - rule_name, - binder, - slice_token, - } => { - if multiple.is_none() { - multiple = Some((rule_name, binder)) - } else { - return Err(Error::new( - slice_token.span(), - "multiple variable-length patterns are not allowed", - )); - } - } - } - } - - let _: Token![=>] = input.parse()?; - let body = input.parse()?; - - Ok(MatchBranch { - singles_before_multiple, - multiple, - singles_after_multiple, - body, - }) - } -} - -impl Parse for MatchBranchPatternItem { - fn parse(input: ParseStream) -> Result { - let contents; - let rule_name = input.parse()?; - parenthesized!(contents in input); - if input.peek(Token![..]) { - let binder = contents.parse()?; - let slice_token = input.parse()?; - Ok(MatchBranchPatternItem::Multiple { - rule_name, - binder, - slice_token, - }) - } else if input.is_empty() || input.peek(Token![,]) { - let binder = contents.parse()?; - Ok(MatchBranchPatternItem::Single { rule_name, binder }) - } else { - Err(input.error("expected `..` or nothing")) - } - } -} - -impl Parse for MacroInput { - fn parse(input: ParseStream) -> Result { - let parser = if input.peek(token::Lt) { - let _: token::Lt = input.parse()?; - let parser = input.parse()?; - let _: token::Gt = input.parse()?; - let _: Token![;] = input.parse()?; - parser - } else { - parse_quote!(Self) - }; - let input_expr = input.parse()?; - let _: Token![;] = input.parse()?; - let branches = Punctuated::parse_terminated(input)?; - - Ok(MacroInput { - parser, - input_expr, - branches, - }) - } -} - -fn make_branch( - branch: &MatchBranch, - i_nodes: &Ident, - i_node_rules: &Ident, - parser: &Type, -) -> Result { - let aliased_rule = quote!(<#parser as ::pest_consume::Parser>::AliasedRule); - - // Find which branch to take - let mut conditions = Vec::new(); - let start = branch.singles_before_multiple.len(); - let end = branch.singles_after_multiple.len(); - conditions.push(quote!( - #start + #end <= #i_node_rules.len() - )); - for (i, (rule_name, _)) in branch.singles_before_multiple.iter().enumerate() - { - conditions.push(quote!( - #i_node_rules[#i] == #aliased_rule::#rule_name - )) - } - for (i, (rule_name, _)) in branch.singles_after_multiple.iter().enumerate() - { - conditions.push(quote!( - #i_node_rules[#i_node_rules.len()-1 - #i] == #aliased_rule::#rule_name - )) - } - if let Some((rule_name, _)) = &branch.multiple { - conditions.push(quote!( - { - // We can't use .all() directly in the pattern guard; see - // https://github.com/rust-lang/rust/issues/59803. - let all_match = |slice: &[_]| { - slice.iter().all(|r| - *r == #aliased_rule::#rule_name - ) - }; - all_match(&#i_node_rules[#start..#i_node_rules.len() - #end]) - } - )) - } else { - // No variable-length pattern, so the size must be exactly the number of patterns - conditions.push(quote!( - #start + #end == #i_node_rules.len() - )) - } - - // Once we have found a branch that matches, we need to parse the nodes. - let mut parses = Vec::new(); - for (rule_name, binder) in branch.singles_before_multiple.iter() { - parses.push(quote!( - let #binder = #parser::#rule_name( - #i_nodes.next().unwrap() - )?; - )) - } - // Note the `rev()`: we are taking nodes from the end of the iterator in reverse order, so that - // only the unmatched nodes are left in the iterator for the variable-length pattern, if any. - for (rule_name, binder) in branch.singles_after_multiple.iter().rev() { - parses.push(quote!( - let #binder = #parser::#rule_name( - #i_nodes.next_back().unwrap() - )?; - )) - } - if let Some((rule_name, binder)) = &branch.multiple { - parses.push(quote!( - let #binder = #i_nodes - .map(|i| #parser::#rule_name(i)) - .collect::<::std::result::Result<::std::vec::Vec<_>, _>>()? - .into_iter(); - )) - } - - let body = &branch.body; - Ok(quote!( - _ if #(#conditions &&)* true => { - #(#parses)* - #body - } - )) -} - -pub fn match_nodes( - input: proc_macro::TokenStream, -) -> Result { - let input: MacroInput = syn::parse(input)?; - - let i_nodes = Ident::new("___nodes", input.input_expr.span()); - let i_node_rules = Ident::new("___node_rules", Span::call_site()); - - let input_expr = &input.input_expr; - let parser = &input.parser; - let branches = input - .branches - .iter() - .map(|br| make_branch(br, &i_nodes, &i_node_rules, parser)) - .collect::>>()?; - - Ok(quote!({ - #[allow(unused_mut)] - let mut #i_nodes = #input_expr; - let #i_node_rules = #i_nodes.aliased_rules::<#parser>(); - - #[allow(unreachable_code)] - match () { - #(#branches,)* - _ => return ::std::result::Result::Err(#i_nodes.error( - std::format!("Nodes didn't match any pattern: {:?}", #i_node_rules) - )), - } - })) -} -- cgit v1.2.3