diff options
author | Nadrieril | 2019-09-01 18:38:39 +0200 |
---|---|---|
committer | Nadrieril | 2019-09-01 22:41:23 +0200 |
commit | 1baef509afe52ab285e73469fc597de8f4e166b6 (patch) | |
tree | 7e298d5b58c96cdb2e9a56be9469711d952df96d /dhall | |
parent | be51899f7d5f1f9ede689ca0a9707a0aca3d31c4 (diff) |
Change parser macros to use a function-like syntax
This makes the parser code look much less magical.
Diffstat (limited to '')
-rw-r--r-- | dhall_proc_macros/src/lib.rs | 8 | ||||
-rw-r--r-- | dhall_proc_macros/src/parser.rs | 494 | ||||
-rw-r--r-- | dhall_syntax/Cargo.toml | 1 | ||||
-rw-r--r-- | dhall_syntax/src/lib.rs | 1 | ||||
-rw-r--r-- | dhall_syntax/src/parser.rs | 1675 |
5 files changed, 1181 insertions, 998 deletions
diff --git a/dhall_proc_macros/src/lib.rs b/dhall_proc_macros/src/lib.rs index 37c9985..46d93e9 100644 --- a/dhall_proc_macros/src/lib.rs +++ b/dhall_proc_macros/src/lib.rs @@ -22,3 +22,11 @@ pub fn make_parser(input: TokenStream) -> TokenStream { Err(err) => err.to_compile_error(), }) } + +#[proc_macro] +pub fn parse_children(input: TokenStream) -> TokenStream { + TokenStream::from(match parser::parse_children(input) { + Ok(tokens) => tokens, + Err(err) => err.to_compile_error(), + }) +} diff --git a/dhall_proc_macros/src/parser.rs b/dhall_proc_macros/src/parser.rs index bb4e894..2618bec 100644 --- a/dhall_proc_macros/src/parser.rs +++ b/dhall_proc_macros/src/parser.rs @@ -2,7 +2,11 @@ use proc_macro2::{Span, TokenStream}; use quote::quote; use syn::parse::{Parse, ParseStream, Result}; use syn::punctuated::Punctuated; -use syn::{bracketed, parenthesized, token, Expr, Ident, Pat, Token, Type}; +use syn::spanned::Spanned; +use syn::{ + bracketed, parenthesized, parse_quote, token, Error, Expr, Ident, ItemFn, + Pat, ReturnType, Token, Type, +}; mod rule_kw { syn::custom_keyword!(rule); @@ -16,73 +20,40 @@ struct Rules(Vec<Rule>); #[derive(Debug, Clone)] struct Rule { - rule_token: rule_kw::rule, - bang_token: Token![!], - paren_token: token::Paren, name: Ident, - lt_token: token::Lt, output_type: Type, - gt_token: token::Gt, contents: RuleContents, - semi_token: Token![;], } #[derive(Debug, Clone)] enum RuleContents { - Empty, - CapturedString { - span: Option<Ident>, - captured_str_token: rule_kw::captured_str, - bang_token: Token![!], - paren_token: token::Paren, - pattern: Pat, - fat_arrow_token: Token![=>], - body: Expr, - }, - Children { - span: Option<Ident>, - children_token: rule_kw::children, - bang_token: Token![!], - paren_token: token::Paren, - branches: Punctuated<ChildrenBranch, Token![,]>, - }, PrecClimb { - span: Option<Ident>, - prec_climb_token: rule_kw::prec_climb, - bang_token: Token![!], - paren_token: token::Paren, child_rule: Ident, - comma_token: Token![,], climber: Expr, - comma_token2: Token![,], - pattern: Pat, - fat_arrow_token: Token![=>], - body: Expr, + function: ItemFn, + }, + Function { + function: ItemFn, }, } #[derive(Debug, Clone)] struct ChildrenBranch { - bracket_token: token::Bracket, - pattern_unparsed: TokenStream, + pattern_span: Span, pattern: Punctuated<ChildrenBranchPatternItem, Token![,]>, - fat_arrow_token: Token![=>], body: Expr, } #[derive(Debug, Clone)] enum ChildrenBranchPatternItem { - Single { - rule_name: Ident, - paren_token: token::Paren, - binder: Pat, - }, - Multiple { - rule_name: Ident, - paren_token: token::Paren, - binder: Ident, - slice_token: Token![..], - }, + Single { rule_name: Ident, binder: Pat }, + Multiple { rule_name: Ident, binder: Ident }, +} + +#[derive(Debug, Clone)] +struct ParseChildrenInput { + input_expr: Expr, + branches: Punctuated<ChildrenBranch, Token![,]>, } impl Parse for Rules { @@ -97,73 +68,50 @@ impl Parse for Rules { impl Parse for Rule { fn parse(input: ParseStream) -> Result<Self> { - let contents; - Ok(Rule { - rule_token: input.parse()?, - bang_token: input.parse()?, - paren_token: parenthesized!(contents in input), - name: contents.parse()?, - lt_token: contents.parse()?, - output_type: contents.parse()?, - gt_token: contents.parse()?, - contents: contents.parse()?, - semi_token: input.parse()?, - }) - } -} + let function: ItemFn = input.parse()?; + let (recognized_attrs, remaining_attrs) = function + .attrs + .iter() + .cloned() + .partition::<Vec<_>, _>(|attr| attr.path.is_ident("prec_climb")); + let function = ItemFn { + attrs: remaining_attrs, + ..(function.clone()) + }; -impl Parse for RuleContents { - fn parse(input: ParseStream) -> Result<Self> { - if input.is_empty() { - return Ok(RuleContents::Empty); - } - let _: Token![;] = input.parse()?; - let span = if input.peek(Ident) && input.peek2(Token![;]) { - let span: Ident = input.parse()?; - let _: Token![;] = input.parse()?; - Some(span) - } else { - None + let name = function.sig.ident.clone(); + let output_type = match &function.sig.output { + ReturnType::Default => parse_quote!(()), + ReturnType::Type(_, t) => (**t).clone(), }; - let lookahead = input.lookahead1(); - if lookahead.peek(rule_kw::captured_str) { - let contents; - Ok(RuleContents::CapturedString { - span, - captured_str_token: input.parse()?, - bang_token: input.parse()?, - paren_token: parenthesized!(contents in input), - pattern: contents.parse()?, - fat_arrow_token: input.parse()?, - body: input.parse()?, - }) - } else if lookahead.peek(rule_kw::children) { - let contents; - Ok(RuleContents::Children { - span, - children_token: input.parse()?, - bang_token: input.parse()?, - paren_token: parenthesized!(contents in input), - branches: Punctuated::parse_terminated(&contents)?, - }) - } else if lookahead.peek(rule_kw::prec_climb) { - let contents; - Ok(RuleContents::PrecClimb { - span, - prec_climb_token: input.parse()?, - bang_token: input.parse()?, - paren_token: parenthesized!(contents in input), - child_rule: contents.parse()?, - comma_token: contents.parse()?, - climber: contents.parse()?, - comma_token2: contents.parse()?, - pattern: contents.parse()?, - fat_arrow_token: contents.parse()?, - body: contents.parse()?, + if recognized_attrs.is_empty() { + Ok(Rule { + name, + output_type, + contents: RuleContents::Function { function }, }) + } else if recognized_attrs.len() != 1 { + Err(input.error("expected a prec_climb attribute")) } else { - Err(lookahead.error()) + let attr = recognized_attrs.into_iter().next().unwrap(); + let (child_rule, climber) = + attr.parse_args_with(|input: ParseStream| { + let child_rule: Ident = input.parse()?; + let _: Token![,] = input.parse()?; + let climber: Expr = input.parse()?; + Ok((child_rule, climber)) + })?; + + Ok(Rule { + name, + output_type, + contents: RuleContents::PrecClimb { + child_rule, + climber, + function, + }, + }) } } } @@ -171,40 +119,52 @@ impl Parse for RuleContents { impl Parse for ChildrenBranch { fn parse(input: ParseStream) -> Result<Self> { let contents; + let _: token::Bracket = bracketed!(contents in input); + let pattern_unparsed: TokenStream = contents.fork().parse()?; + let pattern_span = pattern_unparsed.span(); + let pattern = Punctuated::parse_terminated(&contents)?; + let _: Token![=>] = input.parse()?; + let body = input.parse()?; + Ok(ChildrenBranch { - bracket_token: bracketed!(contents in input), - pattern_unparsed: contents.fork().parse()?, - pattern: Punctuated::parse_terminated(&contents)?, - fat_arrow_token: input.parse()?, - body: input.parse()?, + pattern_span, + pattern, + body, }) } } impl Parse for ChildrenBranchPatternItem { fn parse(input: ParseStream) -> Result<Self> { - let rule_name = input.parse()?; let contents; - let paren_token = parenthesized!(contents in input); + let rule_name = input.parse()?; + parenthesized!(contents in input); if input.peek(Token![..]) { - Ok(ChildrenBranchPatternItem::Multiple { - rule_name, - paren_token, - binder: contents.parse()?, - slice_token: input.parse()?, - }) + let binder = contents.parse()?; + let _: Token![..] = input.parse()?; + Ok(ChildrenBranchPatternItem::Multiple { rule_name, binder }) } else if input.is_empty() || input.peek(Token![,]) { - Ok(ChildrenBranchPatternItem::Single { - rule_name, - paren_token, - binder: contents.parse()?, - }) + let binder = contents.parse()?; + Ok(ChildrenBranchPatternItem::Single { rule_name, binder }) } else { Err(input.error("expected `..` or nothing")) } } } +impl Parse for ParseChildrenInput { + fn parse(input: ParseStream) -> Result<Self> { + let input_expr = input.parse()?; + let _: Token![;] = input.parse()?; + let branches = Punctuated::parse_terminated(input)?; + + Ok(ParseChildrenInput { + input_expr, + branches, + }) + } +} + fn make_construct_precclimbers(rules: &Rules) -> Result<TokenStream> { let mut entries: Vec<TokenStream> = Vec::new(); for rule in &rules.0 { @@ -233,11 +193,16 @@ fn make_entrypoints(rules: &Rules) -> Result<TokenStream> { entries.push(quote!( #[allow(non_snake_case, dead_code)] fn #name<'a>( - input: Rc<str>, + input_str: &str, pair: Pair<'a, Rule>, - ) -> ParseResult<#output_type> { + ) -> #output_type { let climbers = construct_precclimbers(); - Parsers::#name((&climbers, input), pair) + let input = ParseInput { + climbers: &climbers, + original_input_str: input_str.to_string().into(), + pair + }; + Parsers::#name(input) } )) } @@ -250,128 +215,37 @@ fn make_entrypoints(rules: &Rules) -> Result<TokenStream> { )) } -fn make_parser_branch(branch: &ChildrenBranch) -> TokenStream { - let ChildrenBranch { - pattern, - body, - pattern_unparsed, - .. - } = branch; - let variable_pattern = Ident::new("variable_pattern", Span::call_site()); - let match_pat = pattern.iter().map(|item| match item { - ChildrenBranchPatternItem::Single { rule_name, .. } => { - quote!(Rule::#rule_name) - } - ChildrenBranchPatternItem::Multiple { .. } => { - quote!(#variable_pattern..) - } - }); - let match_filter = pattern.iter().map(|item| match item { - ChildrenBranchPatternItem::Single { .. } => quote!(true &&), - ChildrenBranchPatternItem::Multiple { rule_name, .. } => { - quote!(#variable_pattern.iter().all(|r| r == &Rule::#rule_name) &&) - } - }); - quote!( - [#(#match_pat),*] if #(#match_filter)* true => { - parse_children!((climbers, input.clone()), iter; - [#pattern_unparsed] => { - #[allow(unused_variables)] - let res: Result<_, String> = try { #body }; - res.map_err(|msg| - custom_parse_error(&pair, msg) - ) - } - ) - } - ) -} - -fn make_parser_expr(rule: &Rule) -> Result<TokenStream> { - let name = &rule.name; - let expr = match &rule.contents { - RuleContents::Empty => quote!(Ok(())), - RuleContents::CapturedString { pattern, body, .. } => quote!( - let #pattern = pair.as_str(); - let res: Result<_, String> = try { #body }; - res.map_err(|msg| custom_parse_error(&pair, msg)) - ), - RuleContents::PrecClimb { - child_rule, - pattern, - body, - .. - } => quote!( - let climber = climbers.get(&Rule::#name).unwrap(); - climber.climb( - pair.clone().into_inner(), - |p| Parsers::#child_rule((climbers, input.clone()), p), - |l, op, r| { - let #pattern = (l?, op, r?); - let res: Result<_, String> = try { #body }; - res.map_err(|msg| custom_parse_error(&pair, msg)) - }, - ) - ), - RuleContents::Children { branches, .. } => { - let branches = branches.iter().map(make_parser_branch); - quote!( - let children_rules: Vec<Rule> = pair - .clone() - .into_inner() - .map(|p| p.as_rule()) - .collect(); - - #[allow(unused_mut)] - let mut iter = pair.clone().into_inner(); - - #[allow(unreachable_code)] - match children_rules.as_slice() { - #(#branches,)* - [..] => Err(custom_parse_error( - &pair, - format!("Unexpected children: {:?}", children_rules) - )), - } - ) - } - }; - Ok(expr) -} - fn make_parsers(rules: &Rules) -> Result<TokenStream> { - let mut entries: Vec<TokenStream> = Vec::new(); - for rule in &rules.0 { - let span_def = match &rule.contents { - RuleContents::CapturedString { - span: Some(span), .. - } - | RuleContents::Children { - span: Some(span), .. - } - | RuleContents::PrecClimb { - span: Some(span), .. - } => Some(quote!( - let #span = Span::make(input.clone(), pair.as_span()); - )), - _ => None, - }; - + let entries = rules.0.iter().map(|rule| { let name = &rule.name; let output_type = &rule.output_type; - let expr = make_parser_expr(rule)?; - - entries.push(quote!( - #[allow(non_snake_case, dead_code)] - fn #name<'a>( - (climbers, input): (&HashMap<Rule, PrecClimber<Rule>>, Rc<str>), - pair: Pair<'a, Rule>, - ) -> ParseResult<#output_type> { - #span_def - #expr - } - )) - } + match &rule.contents { + RuleContents::PrecClimb { + child_rule, + function, + .. + } => quote!( + #[allow(non_snake_case, dead_code)] + fn #name<'a, 'climbers>( + input: ParseInput<'a, 'climbers, Rule>, + ) -> #output_type { + #function + let climber = input.climbers.get(&Rule::#name).unwrap(); + climber.climb( + input.pair.clone().into_inner(), + |p| Parsers::#child_rule(input.with_pair(p)), + |l, op, r| { + #name(input.clone(), l?, op, r?) + }, + ) + } + ), + RuleContents::Function { function } => quote!( + #[allow(non_snake_case, dead_code)] + #function + ), + } + }); Ok(quote!( struct Parsers; @@ -384,7 +258,7 @@ fn make_parsers(rules: &Rules) -> Result<TokenStream> { pub fn make_parser( input: proc_macro::TokenStream, ) -> Result<proc_macro2::TokenStream> { - let rules: Rules = syn::parse_macro_input::parse(input.clone())?; + let rules: Rules = syn::parse(input.clone())?; let construct_precclimbers = make_construct_precclimbers(&rules)?; let entrypoints = make_entrypoints(&rules)?; @@ -396,3 +270,123 @@ pub fn make_parser( #parsers )) } + +fn make_parser_branch(branch: &ChildrenBranch) -> Result<TokenStream> { + use ChildrenBranchPatternItem::{Multiple, Single}; + + let body = &branch.body; + + // Convert the input pattern into a pattern-match on the Rules of the children. This uses + // slice_patterns. + // A single pattern just checks that the rule matches; a variable-length pattern binds the + // subslice and checks that they all match the chosen Rule in the `if`-condition. + let variable_pattern_ident = + Ident::new("variable_pattern", Span::call_site()); + let match_pat = branch.pattern.iter().map(|item| match item { + Single { rule_name, .. } => quote!(Rule::#rule_name), + Multiple { .. } => quote!(#variable_pattern_ident..), + }); + let match_filter = branch.pattern.iter().map(|item| match item { + Single { .. } => quote!(), + Multiple { rule_name, .. } => quote!( + #variable_pattern_ident.iter().all(|r| r == &Rule::#rule_name) && + ), + }); + + // Once we have found a branch that matches, we need to parse the children. + let mut singles_before_multiple = Vec::new(); + let mut multiple = None; + let mut singles_after_multiple = Vec::new(); + for item in &branch.pattern { + match item { + Single { + rule_name, binder, .. + } => { + if multiple.is_none() { + singles_before_multiple.push((rule_name, binder)) + } else { + singles_after_multiple.push((rule_name, binder)) + } + } + Multiple { + rule_name, binder, .. + } => { + if multiple.is_none() { + multiple = Some((rule_name, binder)) + } else { + return Err(Error::new( + branch.pattern_span.clone(), + "multiple variable-length patterns are not allowed", + )); + } + } + } + } + let mut parses = Vec::new(); + for (rule_name, binder) in singles_before_multiple.into_iter() { + parses.push(quote!( + let #binder = Parsers::#rule_name( + inputs.next().unwrap() + )?; + )) + } + // Note the `rev()`: we are taking inputs from the end of the iterator in reverse order, so that + // only the unmatched inputs are left for the variable-length pattern, if any. + for (rule_name, binder) in singles_after_multiple.into_iter().rev() { + parses.push(quote!( + let #binder = Parsers::#rule_name( + inputs.next_back().unwrap() + )?; + )) + } + if let Some((rule_name, binder)) = multiple { + parses.push(quote!( + let #binder = inputs + .map(|i| Parsers::#rule_name(i)) + .collect::<Result<Vec<_>, _>>()? + .into_iter(); + )) + } + + Ok(quote!( + [#(#match_pat),*] if #(#match_filter)* true => { + #(#parses)* + #body + } + )) +} + +pub fn parse_children( + input: proc_macro::TokenStream, +) -> Result<proc_macro2::TokenStream> { + let input: ParseChildrenInput = syn::parse(input)?; + + let input_expr = &input.input_expr; + let branches = input + .branches + .iter() + .map(make_parser_branch) + .collect::<Result<Vec<_>>>()?; + Ok(quote!({ + let children_rules: Vec<Rule> = #input_expr.pair + .clone() + .into_inner() + .map(|p| p.as_rule()) + .collect(); + + #[allow(unused_mut)] + let mut inputs = #input_expr + .pair + .clone() + .into_inner() + .map(|p| #input_expr.with_pair(p)); + + #[allow(unreachable_code)] + match children_rules.as_slice() { + #(#branches,)* + [..] => return Err(#input_expr.error( + format!("Unexpected children: {:?}", children_rules) + )), + } + })) +} diff --git a/dhall_syntax/Cargo.toml b/dhall_syntax/Cargo.toml index 62ecced..eb492d0 100644 --- a/dhall_syntax/Cargo.toml +++ b/dhall_syntax/Cargo.toml @@ -15,5 +15,6 @@ pest = "2.1" either = "1.5.2" take_mut = "0.2.2" hex = "0.3.2" +lazy_static = "1.4.0" dhall_generated_parser = { path = "../dhall_generated_parser" } dhall_proc_macros = { path = "../dhall_proc_macros" } diff --git a/dhall_syntax/src/lib.rs b/dhall_syntax/src/lib.rs index e4a6077..290f53c 100644 --- a/dhall_syntax/src/lib.rs +++ b/dhall_syntax/src/lib.rs @@ -3,6 +3,7 @@ #![feature(try_blocks)] #![feature(never_type)] #![feature(bind_by_move_pattern_guards)] +#![feature(proc_macro_hygiene)] #![allow( clippy::many_single_char_names, clippy::should_implement_trait, diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs index 4fd6f57..fa2d7c5 100644 --- a/dhall_syntax/src/parser.rs +++ b/dhall_syntax/src/parser.rs @@ -8,6 +8,7 @@ use std::collections::HashMap; use std::rc::Rc; use dhall_generated_parser::{DhallParser, Rule}; +use dhall_proc_macros::{make_parser, parse_children}; use crate::map::{DupTreeMap, DupTreeSet}; use crate::ExprF::*; @@ -27,55 +28,39 @@ pub type ParseError = pest::error::Error<Rule>; pub type ParseResult<T> = Result<T, ParseError>; -#[derive(Debug)] -enum Either<A, B> { - Left(A), - Right(B), +#[derive(Debug, Clone)] +struct ParseInput<'input, 'climbers, Rule> +where + Rule: std::fmt::Debug + Copy + std::hash::Hash + Ord, +{ + pair: Pair<'input, Rule>, + climbers: &'climbers HashMap<Rule, PrecClimber<Rule>>, + original_input_str: Rc<str>, } -impl crate::Builtin { - pub fn parse(s: &str) -> Option<Self> { - use crate::Builtin::*; - match s { - "Bool" => Some(Bool), - "Natural" => Some(Natural), - "Integer" => Some(Integer), - "Double" => Some(Double), - "Text" => Some(Text), - "List" => Some(List), - "Optional" => Some(Optional), - "None" => Some(OptionalNone), - "Natural/build" => Some(NaturalBuild), - "Natural/fold" => Some(NaturalFold), - "Natural/isZero" => Some(NaturalIsZero), - "Natural/even" => Some(NaturalEven), - "Natural/odd" => Some(NaturalOdd), - "Natural/toInteger" => Some(NaturalToInteger), - "Natural/show" => Some(NaturalShow), - "Natural/subtract" => Some(NaturalSubtract), - "Integer/toDouble" => Some(IntegerToDouble), - "Integer/show" => Some(IntegerShow), - "Double/show" => Some(DoubleShow), - "List/build" => Some(ListBuild), - "List/fold" => Some(ListFold), - "List/length" => Some(ListLength), - "List/head" => Some(ListHead), - "List/last" => Some(ListLast), - "List/indexed" => Some(ListIndexed), - "List/reverse" => Some(ListReverse), - "Optional/fold" => Some(OptionalFold), - "Optional/build" => Some(OptionalBuild), - "Text/show" => Some(TextShow), - _ => None, +impl<'input, 'climbers> ParseInput<'input, 'climbers, Rule> { + fn error(&self, message: String) -> ParseError { + let message = format!( + "{} while matching on:\n{}", + message, + debug_pair(self.pair.clone()) + ); + let e = pest::error::ErrorVariant::CustomError { message }; + pest::error::Error::new_from_span(e, self.pair.as_span()) + } + fn with_pair(&self, new_pair: Pair<'input, Rule>) -> Self { + ParseInput { + pair: new_pair, + climbers: self.climbers, + original_input_str: self.original_input_str.clone(), } } -} - -pub fn custom_parse_error(pair: &Pair<Rule>, msg: String) -> ParseError { - let msg = - format!("{} while matching on:\n{}", msg, debug_pair(pair.clone())); - let e = pest::error::ErrorVariant::CustomError { message: msg }; - pest::error::Error::new_from_span(e, pair.as_span()) + fn as_span(&self) -> Span { + Span::make(self.original_input_str.clone(), self.pair.as_span()) + } + fn as_str(&self) -> &'input str { + self.pair.as_str() + } } fn debug_pair(pair: Pair<Rule>) -> String { @@ -119,74 +104,48 @@ fn debug_pair(pair: Pair<Rule>) -> String { s } -macro_rules! parse_children { - // Variable length pattern with a common unary variant - (@match_forwards, - $parse_args:expr, - $iter:expr, - ($body:expr), - $variant:ident ($x:ident).., - $($rest:tt)* - ) => { - parse_children!(@match_backwards, - $parse_args, $iter, - ({ - let $x = $iter - .map(|x| Parsers::$variant($parse_args, x)) - .collect::<Result<Vec<_>, _>>()? - .into_iter(); - $body - }), - $($rest)* - ) - }; - // Single item pattern - (@match_forwards, - $parse_args:expr, - $iter:expr, - ($body:expr), - $variant:ident ($x:pat), - $($rest:tt)* - ) => {{ - let p = $iter.next().unwrap(); - let $x = Parsers::$variant($parse_args, p)?; - parse_children!(@match_forwards, - $parse_args, $iter, - ($body), - $($rest)* - ) - }}; - // Single item pattern after a variable length one: declare reversed and take from the end - (@match_backwards, - $parse_args:expr, - $iter:expr, - ($body:expr), - $variant:ident ($x:pat), - $($rest:tt)* - ) => { - parse_children!(@match_backwards, $parse_args, $iter, ({ - let p = $iter.next_back().unwrap(); - let $x = Parsers::$variant($parse_args, p)?; - $body - }), $($rest)*) - }; - - // Check no elements remain - (@match_forwards, $parse_args:expr, $iter:expr, ($body:expr) $(,)*) => { - $body - }; - // After a variable length pattern, everything has already been consumed - (@match_backwards, $parse_args:expr, $iter:expr, ($body:expr) $(,)*) => { - $body - }; +#[derive(Debug)] +enum Either<A, B> { + Left(A), + Right(B), +} - ($parse_args:expr, $iter:expr; [$($args:tt)*] => $body:expr) => { - parse_children!(@match_forwards, - $parse_args, $iter, - ($body), - $($args)*, - ) - }; +impl crate::Builtin { + pub fn parse(s: &str) -> Option<Self> { + use crate::Builtin::*; + match s { + "Bool" => Some(Bool), + "Natural" => Some(Natural), + "Integer" => Some(Integer), + "Double" => Some(Double), + "Text" => Some(Text), + "List" => Some(List), + "Optional" => Some(Optional), + "None" => Some(OptionalNone), + "Natural/build" => Some(NaturalBuild), + "Natural/fold" => Some(NaturalFold), + "Natural/isZero" => Some(NaturalIsZero), + "Natural/even" => Some(NaturalEven), + "Natural/odd" => Some(NaturalOdd), + "Natural/toInteger" => Some(NaturalToInteger), + "Natural/show" => Some(NaturalShow), + "Natural/subtract" => Some(NaturalSubtract), + "Integer/toDouble" => Some(IntegerToDouble), + "Integer/show" => Some(IntegerShow), + "Double/show" => Some(DoubleShow), + "List/build" => Some(ListBuild), + "List/fold" => Some(ListFold), + "List/length" => Some(ListLength), + "List/head" => Some(ListHead), + "List/last" => Some(ListLast), + "List/indexed" => Some(ListIndexed), + "List/reverse" => Some(ListReverse), + "Optional/fold" => Some(OptionalFold), + "Optional/build" => Some(OptionalBuild), + "Text/show" => Some(TextShow), + _ => None, + } + } } // Trim the shared indent off of a vec of lines, as defined by the Dhall semantics of multiline @@ -230,674 +189,894 @@ fn trim_indent(lines: &mut Vec<ParsedText>) { } } -dhall_proc_macros::make_parser! { - rule!(EOI<()>); - - rule!(simple_label<Label>; - captured_str!(s) => Label::from(s.trim().to_owned()) - ); - rule!(quoted_label<Label>; - captured_str!(s) => Label::from(s.trim().to_owned()) - ); - rule!(label<Label>; children!( - [simple_label(l)] => l, - [quoted_label(l)] => l, - )); - - rule!(double_quote_literal<ParsedText>; children!( - [double_quote_chunk(chunks)..] => { - chunks.collect() - } - )); - - rule!(double_quote_chunk<ParsedTextContents>; children!( - [interpolation(e)] => { - InterpolatedTextContents::Expr(e) - }, - [double_quote_escaped(s)] => { - InterpolatedTextContents::Text(s) - }, - [double_quote_char(s)] => { - InterpolatedTextContents::Text(s.to_owned()) - }, - )); - rule!(double_quote_escaped<String>; - captured_str!(s) => { - match s { - "\"" => "\"".to_owned(), - "$" => "$".to_owned(), - "\\" => "\\".to_owned(), - "/" => "/".to_owned(), - "b" => "\u{0008}".to_owned(), - "f" => "\u{000C}".to_owned(), - "n" => "\n".to_owned(), - "r" => "\r".to_owned(), - "t" => "\t".to_owned(), - // "uXXXX" or "u{XXXXX}" - _ => { - use std::convert::{TryFrom, TryInto}; - - let s = &s[1..]; - let s = if &s[0..1] == "{" { - &s[1..s.len()-1] - } else { - &s[0..s.len()] - }; - - if s.len() > 8 { - Err(format!("Escape sequences can't have more than 8 chars: \"{}\"", s))? - } +fn make_precclimber() -> PrecClimber<Rule> { + use Rule::*; + // In order of precedence + let operators = vec![ + import_alt, + bool_or, + natural_plus, + text_append, + list_append, + bool_and, + combine, + prefer, + combine_types, + natural_times, + bool_eq, + bool_ne, + equivalent, + ]; + PrecClimber::new( + operators + .into_iter() + .map(|op| pcl::Operator::new(op, pcl::Assoc::Left)) + .collect(), + ) +} + +make_parser! { + fn EOI(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + + fn simple_label(input: ParseInput<Rule>) -> ParseResult<Label> { + Ok(Label::from(input.as_str().trim().to_owned())) + } + fn quoted_label(input: ParseInput<Rule>) -> ParseResult<Label> { + Ok(Label::from(input.as_str().trim().to_owned())) + } + fn label(input: ParseInput<Rule>) -> ParseResult<Label> { + Ok(parse_children!(input; + [simple_label(l)] => l, + [quoted_label(l)] => l, + )) + } + + fn double_quote_literal( + input: ParseInput<Rule>, + ) -> ParseResult<ParsedText> { + Ok(parse_children!(input; + [double_quote_chunk(chunks)..] => { + chunks.collect() + } + )) + } - // pad with zeroes - let s: String = std::iter::repeat('0') - .take(8 - s.len()) - .chain(s.chars()) - .collect(); - - // `s` has length 8, so `bytes` has length 4 - let bytes: &[u8] = &hex::decode(s).unwrap(); - let i = u32::from_be_bytes(bytes.try_into().unwrap()); - let c = char::try_from(i).unwrap(); - match i { - 0xD800..=0xDFFF => { - let c_ecapsed = c.escape_unicode(); - Err(format!("Escape sequences can't contain surrogate pairs: \"{}\"", c_ecapsed))? - }, - 0x0FFFE..=0x0FFFF | 0x1FFFE..=0x1FFFF | - 0x2FFFE..=0x2FFFF | 0x3FFFE..=0x3FFFF | - 0x4FFFE..=0x4FFFF | 0x5FFFE..=0x5FFFF | - 0x6FFFE..=0x6FFFF | 0x7FFFE..=0x7FFFF | - 0x8FFFE..=0x8FFFF | 0x9FFFE..=0x9FFFF | - 0xAFFFE..=0xAFFFF | 0xBFFFE..=0xBFFFF | - 0xCFFFE..=0xCFFFF | 0xDFFFE..=0xDFFFF | - 0xEFFFE..=0xEFFFF | 0xFFFFE..=0xFFFFF | - 0x10_FFFE..=0x10_FFFF => { - let c_ecapsed = c.escape_unicode(); - Err(format!("Escape sequences can't contain non-characters: \"{}\"", c_ecapsed))? - }, - _ => {} + fn double_quote_chunk( + input: ParseInput<Rule>, + ) -> ParseResult<ParsedTextContents> { + Ok(parse_children!(input; + [interpolation(e)] => { + InterpolatedTextContents::Expr(e) + }, + [double_quote_escaped(s)] => { + InterpolatedTextContents::Text(s) + }, + [double_quote_char(s)] => { + InterpolatedTextContents::Text(s.to_owned()) + }, + )) + } + fn double_quote_escaped(input: ParseInput<Rule>) -> ParseResult<String> { + Ok(match input.as_str() { + "\"" => "\"".to_owned(), + "$" => "$".to_owned(), + "\\" => "\\".to_owned(), + "/" => "/".to_owned(), + "b" => "\u{0008}".to_owned(), + "f" => "\u{000C}".to_owned(), + "n" => "\n".to_owned(), + "r" => "\r".to_owned(), + "t" => "\t".to_owned(), + // "uXXXX" or "u{XXXXX}" + s => { + use std::convert::{TryFrom, TryInto}; + + let s = &s[1..]; + let s = if &s[0..1] == "{" { + &s[1..s.len() - 1] + } else { + &s[0..s.len()] + }; + + if s.len() > 8 { + Err(input.error(format!( + "Escape sequences can't have more than 8 chars: \"{}\"", + s + )))? + } + + // pad with zeroes + let s: String = std::iter::repeat('0') + .take(8 - s.len()) + .chain(s.chars()) + .collect(); + + // `s` has length 8, so `bytes` has length 4 + let bytes: &[u8] = &hex::decode(s).unwrap(); + let i = u32::from_be_bytes(bytes.try_into().unwrap()); + let c = char::try_from(i).unwrap(); + match i { + 0xD800..=0xDFFF => { + let c_ecapsed = c.escape_unicode(); + Err(input.error(format!("Escape sequences can't contain surrogate pairs: \"{}\"", c_ecapsed)))? + } + 0x0FFFE..=0x0FFFF + | 0x1FFFE..=0x1FFFF + | 0x2FFFE..=0x2FFFF + | 0x3FFFE..=0x3FFFF + | 0x4FFFE..=0x4FFFF + | 0x5FFFE..=0x5FFFF + | 0x6FFFE..=0x6FFFF + | 0x7FFFE..=0x7FFFF + | 0x8FFFE..=0x8FFFF + | 0x9FFFE..=0x9FFFF + | 0xAFFFE..=0xAFFFF + | 0xBFFFE..=0xBFFFF + | 0xCFFFE..=0xCFFFF + | 0xDFFFE..=0xDFFFF + | 0xEFFFE..=0xEFFFF + | 0xFFFFE..=0xFFFFF + | 0x10_FFFE..=0x10_FFFF => { + let c_ecapsed = c.escape_unicode(); + Err(input.error(format!("Escape sequences can't contain non-characters: \"{}\"", c_ecapsed)))? } - std::iter::once(c).collect() + _ => {} } + std::iter::once(c).collect() } - } - ); - rule!(double_quote_char<&'a str>; - captured_str!(s) => s - ); - - rule!(single_quote_literal<ParsedText>; children!( - [single_quote_continue(lines)] => { - let newline: ParsedText = "\n".to_string().into(); - - let mut lines: Vec<ParsedText> = lines - .into_iter() - .rev() - .map(|l| l.into_iter().rev().collect::<ParsedText>()) - .collect(); - - trim_indent(&mut lines); - - lines - .into_iter() - .intersperse(newline) - .flat_map(InterpolatedText::into_iter) - .collect::<ParsedText>() - } - )); - rule!(single_quote_char<&'a str>; - captured_str!(s) => s - ); - rule!(escaped_quote_pair<&'a str>; - captured_str!(_) => "''" - ); - rule!(escaped_interpolation<&'a str>; - captured_str!(_) => "${" - ); - rule!(interpolation<ParsedExpr>; children!( - [expression(e)] => e - )); + }) + } + fn double_quote_char<'a>( + input: ParseInput<'a, '_, Rule>, + ) -> ParseResult<&'a str> { + Ok(input.as_str()) + } + + fn single_quote_literal( + input: ParseInput<Rule>, + ) -> ParseResult<ParsedText> { + Ok(parse_children!(input; + [single_quote_continue(lines)] => { + let newline: ParsedText = "\n".to_string().into(); + + let mut lines: Vec<ParsedText> = lines + .into_iter() + .rev() + .map(|l| l.into_iter().rev().collect::<ParsedText>()) + .collect(); + + trim_indent(&mut lines); + + lines + .into_iter() + .intersperse(newline) + .flat_map(InterpolatedText::into_iter) + .collect::<ParsedText>() + } + )) + } + fn single_quote_char<'a>( + input: ParseInput<'a, '_, Rule>, + ) -> ParseResult<&'a str> { + Ok(input.as_str()) + } + fn escaped_quote_pair<'a>( + _: ParseInput<'a, '_, Rule>, + ) -> ParseResult<&'a str> { + Ok("''") + } + fn escaped_interpolation<'a>( + _: ParseInput<'a, '_, Rule>, + ) -> ParseResult<&'a str> { + Ok("${") + } + fn interpolation(input: ParseInput<Rule>) -> ParseResult<ParsedExpr> { + Ok(parse_children!(input; + [expression(e)] => e + )) + } // Returns a vec of lines in reversed order, where each line is also in reversed order. - rule!(single_quote_continue<Vec<Vec<ParsedTextContents>>>; children!( - [interpolation(c), single_quote_continue(lines)] => { - let c = InterpolatedTextContents::Expr(c); - let mut lines = lines; - lines.last_mut().unwrap().push(c); - lines - }, - [escaped_quote_pair(c), single_quote_continue(lines)] => { - let mut lines = lines; - // TODO: don't allocate for every char - let c = InterpolatedTextContents::Text(c.to_owned()); - lines.last_mut().unwrap().push(c); - lines - }, - [escaped_interpolation(c), single_quote_continue(lines)] => { - let mut lines = lines; - // TODO: don't allocate for every char - let c = InterpolatedTextContents::Text(c.to_owned()); - lines.last_mut().unwrap().push(c); - lines - }, - [single_quote_char(c), single_quote_continue(lines)] => { - let mut lines = lines; - if c == "\n" || c == "\r\n" { - lines.push(vec![]); - } else { + fn single_quote_continue( + input: ParseInput<Rule>, + ) -> ParseResult<Vec<Vec<ParsedTextContents>>> { + Ok(parse_children!(input; + [interpolation(c), single_quote_continue(lines)] => { + let c = InterpolatedTextContents::Expr(c); + let mut lines = lines; + lines.last_mut().unwrap().push(c); + lines + }, + [escaped_quote_pair(c), single_quote_continue(lines)] => { + let mut lines = lines; // TODO: don't allocate for every char let c = InterpolatedTextContents::Text(c.to_owned()); lines.last_mut().unwrap().push(c); - } - lines - }, - [] => { - vec![vec![]] - }, - )); - - rule!(builtin<ParsedExpr>; span; - captured_str!(s) => { - spanned(span, match crate::Builtin::parse(s) { + lines + }, + [escaped_interpolation(c), single_quote_continue(lines)] => { + let mut lines = lines; + // TODO: don't allocate for every char + let c = InterpolatedTextContents::Text(c.to_owned()); + lines.last_mut().unwrap().push(c); + lines + }, + [single_quote_char(c), single_quote_continue(lines)] => { + let mut lines = lines; + if c == "\n" || c == "\r\n" { + lines.push(vec![]); + } else { + // TODO: don't allocate for every char + let c = InterpolatedTextContents::Text(c.to_owned()); + lines.last_mut().unwrap().push(c); + } + lines + }, + [] => { + vec![vec![]] + }, + )) + } + + fn builtin(input: ParseInput<Rule>) -> ParseResult<ParsedExpr> { + let s = input.as_str(); + let span = input.as_span(); + Ok(spanned( + span, + match crate::Builtin::parse(s) { Some(b) => Builtin(b), - None => match s { - "True" => BoolLit(true), - "False" => BoolLit(false), - "Type" => Const(crate::Const::Type), - "Kind" => Const(crate::Const::Kind), - "Sort" => Const(crate::Const::Sort), - _ => Err( - format!("Unrecognized builtin: '{}'", s) - )?, + None => { + match s { + "True" => BoolLit(true), + "False" => BoolLit(false), + "Type" => Const(crate::Const::Type), + "Kind" => Const(crate::Const::Kind), + "Sort" => Const(crate::Const::Sort), + _ => Err(input + .error(format!("Unrecognized builtin: '{}'", s)))?, + } } - }) + }, + )) + } + + fn NaN(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + fn minus_infinity_literal(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + fn plus_infinity_literal(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + + fn numeric_double_literal( + input: ParseInput<Rule>, + ) -> ParseResult<core::Double> { + let s = input.as_str().trim(); + match s.parse::<f64>() { + Ok(x) if x.is_infinite() => Err(input.error(format!( + "Overflow while parsing double literal '{}'", + s + ))), + Ok(x) => Ok(NaiveDouble::from(x)), + Err(e) => Err(input.error(format!("{}", e))), } - ); - - rule!(NaN<()>); - rule!(minus_infinity_literal<()>); - rule!(plus_infinity_literal<()>); - - rule!(numeric_double_literal<core::Double>; - captured_str!(s) => { - let s = s.trim(); - match s.parse::<f64>() { - Ok(x) if x.is_infinite() => - Err(format!("Overflow while parsing double literal '{}'", s))?, - Ok(x) => NaiveDouble::from(x), - Err(e) => Err(format!("{}", e))?, + } + + fn double_literal(input: ParseInput<Rule>) -> ParseResult<core::Double> { + Ok(parse_children!(input; + [numeric_double_literal(n)] => n, + [minus_infinity_literal(_)] => std::f64::NEG_INFINITY.into(), + [plus_infinity_literal(_)] => std::f64::INFINITY.into(), + [NaN(_)] => std::f64::NAN.into(), + )) + } + + fn natural_literal(input: ParseInput<Rule>) -> ParseResult<core::Natural> { + input + .as_str() + .trim() + .parse() + .map_err(|e| input.error(format!("{}", e))) + } + + fn integer_literal(input: ParseInput<Rule>) -> ParseResult<core::Integer> { + input + .as_str() + .trim() + .parse() + .map_err(|e| input.error(format!("{}", e))) + } + + fn identifier(input: ParseInput<Rule>) -> ParseResult<ParsedExpr> { + let span = input.as_span(); + Ok(parse_children!(input; + [variable(v)] => { + spanned(span, Var(v)) + }, + [builtin(e)] => e, + )) + } + + fn variable(input: ParseInput<Rule>) -> ParseResult<V<Label>> { + Ok(parse_children!(input; + [label(l), natural_literal(idx)] => { + V(l, idx) + }, + [label(l)] => { + V(l, 0) + }, + )) + } + + fn unquoted_path_component<'a>( + input: ParseInput<'a, '_, Rule>, + ) -> ParseResult<&'a str> { + Ok(input.as_str()) + } + fn quoted_path_component<'a>( + input: ParseInput<'a, '_, Rule>, + ) -> ParseResult<&'a str> { + Ok(input.as_str()) + } + fn path_component(input: ParseInput<Rule>) -> ParseResult<String> { + Ok(parse_children!(input; + [unquoted_path_component(s)] => s.to_string(), + [quoted_path_component(s)] => { + const RESERVED: &percent_encoding::AsciiSet = + &percent_encoding::CONTROLS + .add(b'=').add(b':').add(b'/').add(b'?') + .add(b'#').add(b'[').add(b']').add(b'@') + .add(b'!').add(b'$').add(b'&').add(b'\'') + .add(b'(').add(b')').add(b'*').add(b'+') + .add(b',').add(b';'); + s.chars() + .map(|c| { + // Percent-encode ascii chars + if c.is_ascii() { + percent_encoding::utf8_percent_encode( + &c.to_string(), + RESERVED, + ).to_string() + } else { + c.to_string() + } + }) + .collect() + }, + )) + } + fn path(input: ParseInput<Rule>) -> ParseResult<Vec<String>> { + Ok(parse_children!(input; + [path_component(components)..] => { + components.collect() } - } - ); - - rule!(double_literal<core::Double>; children!( - [numeric_double_literal(n)] => n, - [minus_infinity_literal(_)] => std::f64::NEG_INFINITY.into(), - [plus_infinity_literal(_)] => std::f64::INFINITY.into(), - [NaN(_)] => std::f64::NAN.into(), - )); - - rule!(natural_literal<core::Natural>; - captured_str!(s) => { - s.trim() - .parse() - .map_err(|e| format!("{}", e))? - } - ); + )) + } - rule!(integer_literal<core::Integer>; - captured_str!(s) => { - s.trim() - .parse() - .map_err(|e| format!("{}", e))? - } - ); - - rule!(identifier<ParsedExpr>; span; children!( - [variable(v)] => { - spanned(span, Var(v)) - }, - [builtin(e)] => e, - )); - - rule!(variable<V<Label>>; children!( - [label(l), natural_literal(idx)] => { - V(l, idx) - }, - [label(l)] => { - V(l, 0) - }, - )); - - rule!(unquoted_path_component<&'a str>; captured_str!(s) => s); - rule!(quoted_path_component<&'a str>; captured_str!(s) => s); - rule!(path_component<String>; children!( - [unquoted_path_component(s)] => s.to_string(), - [quoted_path_component(s)] => { - const RESERVED: &percent_encoding::AsciiSet = - &percent_encoding::CONTROLS - .add(b'=').add(b':').add(b'/').add(b'?') - .add(b'#').add(b'[').add(b']').add(b'@') - .add(b'!').add(b'$').add(b'&').add(b'\'') - .add(b'(').add(b')').add(b'*').add(b'+') - .add(b',').add(b';'); - s.chars() - .map(|c| { - // Percent-encode ascii chars - if c.is_ascii() { - percent_encoding::utf8_percent_encode( - &c.to_string(), - RESERVED, - ).to_string() - } else { - c.to_string() - } - }) - .collect() - }, - )); - rule!(path<Vec<String>>; children!( - [path_component(components)..] => { - components.collect() - } - )); - - rule!(local<(FilePrefix, Vec<String>)>; children!( - [parent_path(l)] => l, - [here_path(l)] => l, - [home_path(l)] => l, - [absolute_path(l)] => l, - )); - - rule!(parent_path<(FilePrefix, Vec<String>)>; children!( - [path(p)] => (FilePrefix::Parent, p) - )); - rule!(here_path<(FilePrefix, Vec<String>)>; children!( - [path(p)] => (FilePrefix::Here, p) - )); - rule!(home_path<(FilePrefix, Vec<String>)>; children!( - [path(p)] => (FilePrefix::Home, p) - )); - rule!(absolute_path<(FilePrefix, Vec<String>)>; children!( - [path(p)] => (FilePrefix::Absolute, p) - )); - - rule!(scheme<Scheme>; captured_str!(s) => match s { - "http" => Scheme::HTTP, - "https" => Scheme::HTTPS, - _ => unreachable!(), - }); - - rule!(http_raw<URL<ParsedExpr>>; children!( - [scheme(sch), authority(auth), path(p)] => URL { - scheme: sch, - authority: auth, - path: p, - query: None, - headers: None, - }, - [scheme(sch), authority(auth), path(p), query(q)] => URL { - scheme: sch, - authority: auth, - path: p, - query: Some(q), - headers: None, - }, - )); - - rule!(authority<String>; captured_str!(s) => s.to_owned()); - - rule!(query<String>; captured_str!(s) => s.to_owned()); - - rule!(http<URL<ParsedExpr>>; children!( + fn local( + input: ParseInput<Rule>, + ) -> ParseResult<(FilePrefix, Vec<String>)> { + Ok(parse_children!(input; + [parent_path(l)] => l, + [here_path(l)] => l, + [home_path(l)] => l, + [absolute_path(l)] => l, + )) + } + + fn parent_path( + input: ParseInput<Rule>, + ) -> ParseResult<(FilePrefix, Vec<String>)> { + Ok(parse_children!(input; + [path(p)] => (FilePrefix::Parent, p) + )) + } + fn here_path( + input: ParseInput<Rule>, + ) -> ParseResult<(FilePrefix, Vec<String>)> { + Ok(parse_children!(input; + [path(p)] => (FilePrefix::Here, p) + )) + } + fn home_path( + input: ParseInput<Rule>, + ) -> ParseResult<(FilePrefix, Vec<String>)> { + Ok(parse_children!(input; + [path(p)] => (FilePrefix::Home, p) + )) + } + fn absolute_path( + input: ParseInput<Rule>, + ) -> ParseResult<(FilePrefix, Vec<String>)> { + Ok(parse_children!(input; + [path(p)] => (FilePrefix::Absolute, p) + )) + } + + fn scheme(input: ParseInput<Rule>) -> ParseResult<Scheme> { + Ok(match input.as_str() { + "http" => Scheme::HTTP, + "https" => Scheme::HTTPS, + _ => unreachable!(), + }) + } + + fn http_raw(input: ParseInput<Rule>) -> ParseResult<URL<ParsedExpr>> { + Ok(parse_children!(input; + [scheme(sch), authority(auth), path(p)] => URL { + scheme: sch, + authority: auth, + path: p, + query: None, + headers: None, + }, + [scheme(sch), authority(auth), path(p), query(q)] => URL { + scheme: sch, + authority: auth, + path: p, + query: Some(q), + headers: None, + }, + )) + } + + fn authority(input: ParseInput<Rule>) -> ParseResult<String> { + Ok(input.as_str().to_owned()) + } + + fn query(input: ParseInput<Rule>) -> ParseResult<String> { + Ok(input.as_str().to_owned()) + } + + fn http(input: ParseInput<Rule>) -> ParseResult<URL<ParsedExpr>> { + Ok(parse_children!(input; [http_raw(url)] => url, [http_raw(url), import_expression(e)] => URL { headers: Some(e), ..url }, - )); - - rule!(env<String>; children!( - [bash_environment_variable(s)] => s, - [posix_environment_variable(s)] => s, - )); - rule!(bash_environment_variable<String>; captured_str!(s) => s.to_owned()); - rule!(posix_environment_variable<String>; children!( - [posix_environment_variable_character(chars)..] => { - chars.collect() - }, - )); - rule!(posix_environment_variable_character<Cow<'a, str>>; - captured_str!(s) => { - match s { - "\\\"" => Cow::Owned("\"".to_owned()), - "\\\\" => Cow::Owned("\\".to_owned()), - "\\a" => Cow::Owned("\u{0007}".to_owned()), - "\\b" => Cow::Owned("\u{0008}".to_owned()), - "\\f" => Cow::Owned("\u{000C}".to_owned()), - "\\n" => Cow::Owned("\n".to_owned()), - "\\r" => Cow::Owned("\r".to_owned()), - "\\t" => Cow::Owned("\t".to_owned()), - "\\v" => Cow::Owned("\u{000B}".to_owned()), - _ => Cow::Borrowed(s) - } - } - ); - - rule!(missing<()>); - - rule!(import_type<ImportLocation<ParsedExpr>>; children!( - [missing(_)] => { - ImportLocation::Missing - }, - [env(e)] => { - ImportLocation::Env(e) - }, - [http(url)] => { - ImportLocation::Remote(url) - }, - [local((prefix, p))] => { - ImportLocation::Local(prefix, p) - }, - )); - - rule!(hash<Hash>; captured_str!(s) => { - let s = s.trim(); + )) + } + + fn env(input: ParseInput<Rule>) -> ParseResult<String> { + Ok(parse_children!(input; + [bash_environment_variable(s)] => s, + [posix_environment_variable(s)] => s, + )) + } + fn bash_environment_variable( + input: ParseInput<Rule>, + ) -> ParseResult<String> { + Ok(input.as_str().to_owned()) + } + fn posix_environment_variable( + input: ParseInput<Rule>, + ) -> ParseResult<String> { + Ok(parse_children!(input; + [posix_environment_variable_character(chars)..] => { + chars.collect() + }, + )) + } + fn posix_environment_variable_character<'a>( + input: ParseInput<'a, '_, Rule>, + ) -> ParseResult<Cow<'a, str>> { + Ok(match input.as_str() { + "\\\"" => Cow::Owned("\"".to_owned()), + "\\\\" => Cow::Owned("\\".to_owned()), + "\\a" => Cow::Owned("\u{0007}".to_owned()), + "\\b" => Cow::Owned("\u{0008}".to_owned()), + "\\f" => Cow::Owned("\u{000C}".to_owned()), + "\\n" => Cow::Owned("\n".to_owned()), + "\\r" => Cow::Owned("\r".to_owned()), + "\\t" => Cow::Owned("\t".to_owned()), + "\\v" => Cow::Owned("\u{000B}".to_owned()), + s => Cow::Borrowed(s), + }) + } + + fn missing(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + + fn import_type( + input: ParseInput<Rule>, + ) -> ParseResult<ImportLocation<ParsedExpr>> { + Ok(parse_children!(input; + [missing(_)] => { + ImportLocation::Missing + }, + [env(e)] => { + ImportLocation::Env(e) + }, + [http(url)] => { + ImportLocation::Remote(url) + }, + [local((prefix, p))] => { + ImportLocation::Local(prefix, p) + }, + )) + } + + fn hash(input: ParseInput<Rule>) -> ParseResult<Hash> { + let s = input.as_str().trim(); let protocol = &s[..6]; let hash = &s[7..]; if protocol != "sha256" { - Err(format!("Unknown hashing protocol '{}'", protocol))? + Err(input.error(format!("Unknown hashing protocol '{}'", protocol)))? } - Hash::SHA256(hex::decode(hash).unwrap()) - }); + Ok(Hash::SHA256(hex::decode(hash).unwrap())) + } - rule!(import_hashed<crate::Import<ParsedExpr>>; children!( + fn import_hashed( + input: ParseInput<Rule>, + ) -> ParseResult<crate::Import<ParsedExpr>> { + Ok(parse_children!(input; [import_type(location)] => crate::Import {mode: ImportMode::Code, location, hash: None }, [import_type(location), hash(h)] => - crate::Import {mode: ImportMode::Code, location, hash: Some(h) }, - )); - - rule!(Text<()>); - rule!(Location<()>); - - rule!(import<ParsedExpr>; span; children!( - [import_hashed(imp)] => { - spanned(span, Import(crate::Import { - mode: ImportMode::Code, - ..imp - })) - }, - [import_hashed(imp), Text(_)] => { - spanned(span, Import(crate::Import { - mode: ImportMode::RawText, - ..imp - })) - }, - [import_hashed(imp), Location(_)] => { - spanned(span, Import(crate::Import { - mode: ImportMode::Location, - ..imp - })) - }, - )); - - rule!(lambda<()>); - rule!(forall<()>); - rule!(arrow<()>); - rule!(merge<()>); - rule!(assert<()>); - rule!(if_<()>); - rule!(in_<()>); - rule!(toMap<()>); - - rule!(empty_list_literal<ParsedExpr>; span; children!( - [application_expression(e)] => { - spanned(span, EmptyListLit(e)) - }, - )); - - rule!(expression<ParsedExpr>; span; children!( - [lambda(()), label(l), expression(typ), - arrow(()), expression(body)] => { - spanned(span, Lam(l, typ, body)) - }, - [if_(()), expression(cond), expression(left), expression(right)] => { - spanned(span, BoolIf(cond, left, right)) - }, - [let_binding(bindings).., in_(()), expression(final_expr)] => { - bindings.rev().fold( - final_expr, - |acc, x| unspanned(Let(x.0, x.1, x.2, acc)) - ) - }, - [forall(()), label(l), expression(typ), - arrow(()), expression(body)] => { - spanned(span, Pi(l, typ, body)) - }, - [operator_expression(typ), arrow(()), expression(body)] => { - spanned(span, Pi("_".into(), typ, body)) - }, - [merge(()), import_expression(x), import_expression(y), - application_expression(z)] => { - spanned(span, Merge(x, y, Some(z))) - }, - [empty_list_literal(e)] => e, - [assert(()), expression(x)] => { - spanned(span, Assert(x)) - }, - [toMap(()), import_expression(x), application_expression(y)] => { - spanned(span, ToMap(x, Some(y))) - }, - [operator_expression(e)] => e, - [operator_expression(e), expression(annot)] => { - spanned(span, Annot(e, annot)) - }, - )); - - rule!(let_binding<(Label, Option<ParsedExpr>, ParsedExpr)>; - children!( - [label(name), expression(annot), expression(expr)] => - (name, Some(annot), expr), - [label(name), expression(expr)] => - (name, None, expr), - )); - - rule!(List<()>); - rule!(Optional<()>); - - rule!(operator_expression<ParsedExpr>; prec_climb!( - application_expression, - { - use Rule::*; - // In order of precedence - let operators = vec![ - import_alt, - bool_or, - natural_plus, - text_append, - list_append, - bool_and, - combine, - prefer, - combine_types, - natural_times, - bool_eq, - bool_ne, - equivalent, - ]; - PrecClimber::new( - operators - .into_iter() - .map(|op| pcl::Operator::new(op, pcl::Assoc::Left)) - .collect(), - ) - }, - (l, op, r) => { - use crate::BinOp::*; - use Rule::*; - let op = match op.as_rule() { - import_alt => ImportAlt, - bool_or => BoolOr, - natural_plus => NaturalPlus, - text_append => TextAppend, - list_append => ListAppend, - bool_and => BoolAnd, - combine => RecursiveRecordMerge, - prefer => RightBiasedRecordMerge, - combine_types => RecursiveRecordTypeMerge, - natural_times => NaturalTimes, - bool_eq => BoolEQ, - bool_ne => BoolNE, - equivalent => Equivalence, - r => Err( - format!("Rule {:?} isn't an operator", r), - )?, - }; - - unspanned(BinOp(op, l, r)) - } - )); - - rule!(Some_<()>); - - rule!(application_expression<ParsedExpr>; children!( - [first_application_expression(e)] => e, - [first_application_expression(first), import_expression(rest)..] => { - rest.fold(first, |acc, e| unspanned(App(acc, e))) - }, - )); - - rule!(first_application_expression<ParsedExpr>; span; - children!( - [Some_(()), import_expression(e)] => { - spanned(span, SomeLit(e)) - }, - [merge(()), import_expression(x), import_expression(y)] => { - spanned(span, Merge(x, y, None)) - }, - [toMap(()), import_expression(x)] => { - spanned(span, ToMap(x, None)) - }, - [import_expression(e)] => e, - )); - - rule!(import_expression<ParsedExpr>; - children!( - [selector_expression(e)] => e, - [import(e)] => e, - )); - - rule!(selector_expression<ParsedExpr>; children!( - [primitive_expression(e)] => e, - [primitive_expression(first), selector(rest)..] => { - rest.fold(first, |acc, e| unspanned(match e { - Either::Left(l) => Field(acc, l), - Either::Right(ls) => Projection(acc, ls), - })) - }, - )); - - rule!(selector<Either<Label, DupTreeSet<Label>>>; children!( - [label(l)] => Either::Left(l), - [labels(ls)] => Either::Right(ls), - [expression(_e)] => unimplemented!("selection by expression"), // TODO - )); - - rule!(labels<DupTreeSet<Label>>; children!( - [label(ls)..] => ls.collect(), - )); - - rule!(primitive_expression<ParsedExpr>; span; children!( - [double_literal(n)] => spanned(span, DoubleLit(n)), - [natural_literal(n)] => spanned(span, NaturalLit(n)), - [integer_literal(n)] => spanned(span, IntegerLit(n)), - [double_quote_literal(s)] => spanned(span, TextLit(s)), - [single_quote_literal(s)] => spanned(span, TextLit(s)), - [empty_record_type(e)] => e, - [empty_record_literal(e)] => e, - [non_empty_record_type_or_literal(e)] => e, - [union_type(e)] => e, - [non_empty_list_literal(e)] => e, - [identifier(e)] => e, - [expression(e)] => e, - )); - - rule!(empty_record_literal<ParsedExpr>; span; - captured_str!(_) => spanned(span, RecordLit(Default::default())) - ); - - rule!(empty_record_type<ParsedExpr>; span; - captured_str!(_) => spanned(span, RecordType(Default::default())) - ); - - rule!(non_empty_record_type_or_literal<ParsedExpr>; span; - children!( - [label(first_label), non_empty_record_type(rest)] => { - let (first_expr, mut map) = rest; - map.insert(first_label, first_expr); - spanned(span, RecordType(map)) - }, - [label(first_label), non_empty_record_literal(rest)] => { - let (first_expr, mut map) = rest; - map.insert(first_label, first_expr); - spanned(span, RecordLit(map)) - }, - )); - - rule!(non_empty_record_type - <(ParsedExpr, DupTreeMap<Label, ParsedExpr>)>; children!( - [expression(expr), record_type_entry(entries)..] => { - (expr, entries.collect()) - } - )); + crate::Import {mode: ImportMode::Code, location, hash: Some(h) }, + )) + } - rule!(record_type_entry<(Label, ParsedExpr)>; children!( - [label(name), expression(expr)] => (name, expr) - )); + fn Text(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + fn Location(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } - rule!(non_empty_record_literal - <(ParsedExpr, DupTreeMap<Label, ParsedExpr>)>; children!( - [expression(expr), record_literal_entry(entries)..] => { - (expr, entries.collect()) - } - )); - - rule!(record_literal_entry<(Label, ParsedExpr)>; children!( - [label(name), expression(expr)] => (name, expr) - )); - - rule!(union_type<ParsedExpr>; span; children!( - [empty_union_type(_)] => { - spanned(span, UnionType(Default::default())) - }, - [union_type_entry(entries)..] => { - spanned(span, UnionType(entries.collect())) - }, - )); - - rule!(empty_union_type<()>); - - rule!(union_type_entry<(Label, Option<ParsedExpr>)>; children!( - [label(name), expression(expr)] => (name, Some(expr)), - [label(name)] => (name, None), - )); - - rule!(non_empty_list_literal<ParsedExpr>; span; - children!( - [expression(items)..] => spanned( - span, - NEListLit(items.collect()) - ) - )); + fn import(input: ParseInput<Rule>) -> ParseResult<ParsedExpr> { + let span = input.as_span(); + Ok(parse_children!(input; + [import_hashed(imp)] => { + spanned(span, Import(crate::Import { + mode: ImportMode::Code, + ..imp + })) + }, + [import_hashed(imp), Text(_)] => { + spanned(span, Import(crate::Import { + mode: ImportMode::RawText, + ..imp + })) + }, + [import_hashed(imp), Location(_)] => { + spanned(span, Import(crate::Import { + mode: ImportMode::Location, + ..imp + })) + }, + )) + } + + fn lambda(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + fn forall(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + fn arrow(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + fn merge(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + fn assert(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + fn if_(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + fn in_(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + fn toMap(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + + fn empty_list_literal(input: ParseInput<Rule>) -> ParseResult<ParsedExpr> { + let span = input.as_span(); + Ok(parse_children!(input; + [application_expression(e)] => { + spanned(span, EmptyListLit(e)) + }, + )) + } + + fn expression(input: ParseInput<Rule>) -> ParseResult<ParsedExpr> { + let span = input.as_span(); + Ok(parse_children!(input; + [lambda(()), label(l), expression(typ), + arrow(()), expression(body)] => { + spanned(span, Lam(l, typ, body)) + }, + [if_(()), expression(cond), expression(left), + expression(right)] => { + spanned(span, BoolIf(cond, left, right)) + }, + [let_binding(bindings).., in_(()), expression(final_expr)] => { + bindings.rev().fold( + final_expr, + |acc, x| unspanned(Let(x.0, x.1, x.2, acc)) + ) + }, + [forall(()), label(l), expression(typ), + arrow(()), expression(body)] => { + spanned(span, Pi(l, typ, body)) + }, + [operator_expression(typ), arrow(()), expression(body)] => { + spanned(span, Pi("_".into(), typ, body)) + }, + [merge(()), import_expression(x), import_expression(y), + application_expression(z)] => { + spanned(span, Merge(x, y, Some(z))) + }, + [empty_list_literal(e)] => e, + [assert(()), expression(x)] => { + spanned(span, Assert(x)) + }, + [toMap(()), import_expression(x), application_expression(y)] => { + spanned(span, ToMap(x, Some(y))) + }, + [operator_expression(e)] => e, + [operator_expression(e), expression(annot)] => { + spanned(span, Annot(e, annot)) + }, + )) + } + + fn let_binding( + input: ParseInput<Rule>, + ) -> ParseResult<(Label, Option<ParsedExpr>, ParsedExpr)> { + Ok(parse_children!(input; + [label(name), expression(annot), expression(expr)] => + (name, Some(annot), expr), + [label(name), expression(expr)] => + (name, None, expr), + )) + } + + fn List(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + fn Optional(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + + #[prec_climb(application_expression, make_precclimber())] + fn operator_expression( + input: ParseInput<Rule>, + l: ParsedExpr, + op: Pair<Rule>, + r: ParsedExpr, + ) -> ParseResult<ParsedExpr> { + use crate::BinOp::*; + use Rule::*; + let op = match op.as_rule() { + import_alt => ImportAlt, + bool_or => BoolOr, + natural_plus => NaturalPlus, + text_append => TextAppend, + list_append => ListAppend, + bool_and => BoolAnd, + combine => RecursiveRecordMerge, + prefer => RightBiasedRecordMerge, + combine_types => RecursiveRecordTypeMerge, + natural_times => NaturalTimes, + bool_eq => BoolEQ, + bool_ne => BoolNE, + equivalent => Equivalence, + r => Err(input.error(format!("Rule {:?} isn't an operator", r)))?, + }; + + Ok(unspanned(BinOp(op, l, r))) + } + + fn Some_(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + + fn application_expression( + input: ParseInput<Rule>, + ) -> ParseResult<ParsedExpr> { + Ok(parse_children!(input; + [first_application_expression(e)] => e, + [first_application_expression(first), + import_expression(rest)..] => { + rest.fold(first, |acc, e| unspanned(App(acc, e))) + }, + )) + } + + fn first_application_expression( + input: ParseInput<Rule>, + ) -> ParseResult<ParsedExpr> { + let span = input.as_span(); + Ok(parse_children!(input; + [Some_(()), import_expression(e)] => { + spanned(span, SomeLit(e)) + }, + [merge(()), import_expression(x), import_expression(y)] => { + spanned(span, Merge(x, y, None)) + }, + [toMap(()), import_expression(x)] => { + spanned(span, ToMap(x, None)) + }, + [import_expression(e)] => e, + )) + } + + fn import_expression(input: ParseInput<Rule>) -> ParseResult<ParsedExpr> { + Ok(parse_children!(input; + [selector_expression(e)] => e, + [import(e)] => e, + )) + } + + fn selector_expression(input: ParseInput<Rule>) -> ParseResult<ParsedExpr> { + Ok(parse_children!(input; + [primitive_expression(e)] => e, + [primitive_expression(first), selector(rest)..] => { + rest.fold(first, |acc, e| unspanned(match e { + Either::Left(l) => Field(acc, l), + Either::Right(ls) => Projection(acc, ls), + })) + }, + )) + } + + fn selector( + input: ParseInput<Rule>, + ) -> ParseResult<Either<Label, DupTreeSet<Label>>> { + Ok(parse_children!(input; + [label(l)] => Either::Left(l), + [labels(ls)] => Either::Right(ls), + [expression(_e)] => unimplemented!("selection by expression"), // TODO + )) + } + + fn labels(input: ParseInput<Rule>) -> ParseResult<DupTreeSet<Label>> { + Ok(parse_children!(input; + [label(ls)..] => ls.collect(), + )) + } + + fn primitive_expression( + input: ParseInput<Rule>, + ) -> ParseResult<ParsedExpr> { + let span = input.as_span(); + Ok(parse_children!(input; + [double_literal(n)] => spanned(span, DoubleLit(n)), + [natural_literal(n)] => spanned(span, NaturalLit(n)), + [integer_literal(n)] => spanned(span, IntegerLit(n)), + [double_quote_literal(s)] => spanned(span, TextLit(s)), + [single_quote_literal(s)] => spanned(span, TextLit(s)), + [empty_record_type(e)] => e, + [empty_record_literal(e)] => e, + [non_empty_record_type_or_literal(e)] => e, + [union_type(e)] => e, + [non_empty_list_literal(e)] => e, + [identifier(e)] => e, + [expression(e)] => e, + )) + } + + fn empty_record_literal( + input: ParseInput<Rule>, + ) -> ParseResult<ParsedExpr> { + let span = input.as_span(); + Ok(spanned(span, RecordLit(Default::default()))) + } - rule!(final_expression<ParsedExpr>; children!( - [expression(e), EOI(_)] => e - )); + fn empty_record_type(input: ParseInput<Rule>) -> ParseResult<ParsedExpr> { + let span = input.as_span(); + Ok(spanned(span, RecordType(Default::default()))) + } + + fn non_empty_record_type_or_literal( + input: ParseInput<Rule>, + ) -> ParseResult<ParsedExpr> { + let span = input.as_span(); + Ok(parse_children!(input; + [label(first_label), non_empty_record_type(rest)] => { + let (first_expr, mut map) = rest; + map.insert(first_label, first_expr); + spanned(span, RecordType(map)) + }, + [label(first_label), non_empty_record_literal(rest)] => { + let (first_expr, mut map) = rest; + map.insert(first_label, first_expr); + spanned(span, RecordLit(map)) + }, + )) + } + + fn non_empty_record_type( + input: ParseInput<Rule>, + ) -> ParseResult<(ParsedExpr, DupTreeMap<Label, ParsedExpr>)> { + Ok(parse_children!(input; + [expression(expr), record_type_entry(entries)..] => { + (expr, entries.collect()) + } + )) + } + + fn record_type_entry( + input: ParseInput<Rule>, + ) -> ParseResult<(Label, ParsedExpr)> { + Ok(parse_children!(input; + [label(name), expression(expr)] => (name, expr) + )) + } + + fn non_empty_record_literal( + input: ParseInput<Rule>, + ) -> ParseResult<(ParsedExpr, DupTreeMap<Label, ParsedExpr>)> { + Ok(parse_children!(input; + [expression(expr), record_literal_entry(entries)..] => { + (expr, entries.collect()) + } + )) + } + + fn record_literal_entry( + input: ParseInput<Rule>, + ) -> ParseResult<(Label, ParsedExpr)> { + Ok(parse_children!(input; + [label(name), expression(expr)] => (name, expr) + )) + } + + fn union_type(input: ParseInput<Rule>) -> ParseResult<ParsedExpr> { + let span = input.as_span(); + Ok(parse_children!(input; + [empty_union_type(_)] => { + spanned(span, UnionType(Default::default())) + }, + [union_type_entry(entries)..] => { + spanned(span, UnionType(entries.collect())) + }, + )) + } + + fn empty_union_type(_: ParseInput<Rule>) -> ParseResult<()> { + Ok(()) + } + + fn union_type_entry( + input: ParseInput<Rule>, + ) -> ParseResult<(Label, Option<ParsedExpr>)> { + Ok(parse_children!(input; + [label(name), expression(expr)] => (name, Some(expr)), + [label(name)] => (name, None), + )) + } + + fn non_empty_list_literal( + input: ParseInput<Rule>, + ) -> ParseResult<ParsedExpr> { + let span = input.as_span(); + Ok(parse_children!(input; + [expression(items)..] => spanned( + span, + NEListLit(items.collect()) + ) + )) + } + + fn final_expression(input: ParseInput<Rule>) -> ParseResult<ParsedExpr> { + Ok(parse_children!(input; + [expression(e), EOI(_)] => e + )) + } } pub fn parse_expr(s: &str) -> ParseResult<ParsedExpr> { let mut pairs = DhallParser::parse(Rule::final_expression, s)?; - let rc_input = s.to_string().into(); - let expr = EntryPoint::final_expression(rc_input, pairs.next().unwrap())?; + let expr = EntryPoint::final_expression(s, pairs.next().unwrap())?; assert_eq!(pairs.next(), None); Ok(expr) } |