From 48d89a280838919c1fba9e0acf1e090d3286c470 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sat, 2 Mar 2019 22:20:33 +0100 Subject: Start implementing parsing on top of pest --- dhall/src/core.rs | 5 + dhall/src/parser.rs | 127 ++++++++++++++++++----- dhall_parser/src/dhall.pest.visibility | 182 ++++++++++++++++++--------------- 3 files changed, 206 insertions(+), 108 deletions(-) diff --git a/dhall/src/core.rs b/dhall/src/core.rs index 473a6a6..ccaf0f5 100644 --- a/dhall/src/core.rs +++ b/dhall/src/core.rs @@ -179,6 +179,8 @@ pub enum Expr<'i, S, A> { Note(S, Box>), /// `Embed path ~ path` Embed(A), + + FailedParse(String, Vec>), } /// Built-in types @@ -698,6 +700,7 @@ pub fn shift<'i, S, T, A: Clone>(d: isize, v: V, e: &Expr<'i, S, A>) -> Expr<'i, // The Dhall compiler enforces that all embedded values are closed expressions // and `shift` does nothing to a closed expression Embed(ref p) => Embed(p.clone()), + FailedParse(_, _) => unreachable!(), } } @@ -795,6 +798,7 @@ pub fn subst<'i, S, T, A>(v: V<'i>, e: &Expr<'i, S, A>, b: &Expr<'i, T, A>) -> E Field(ref a, b) => Field(bx(subst(v, e, a)), b), Note(_, ref b) => subst(v, e, b), Embed(ref p) => Embed(p.clone()), + FailedParse(_, _) => unreachable!(), } } @@ -1040,6 +1044,7 @@ pub fn normalize<'i, S, T, A>(e: &Expr<'i, S, A>) -> Expr<'i, T, A> }, Note(_, ref e) => normalize(e), Embed(ref a) => Embed(a.clone()), + FailedParse(_, _) => unreachable!(), } } diff --git a/dhall/src/parser.rs b/dhall/src/parser.rs index 057fce2..0acfe39 100644 --- a/dhall/src/parser.rs +++ b/dhall/src/parser.rs @@ -1,9 +1,9 @@ use lalrpop_util; use crate::grammar; -use crate::grammar_util::BoxExpr; +use crate::grammar_util::{BoxExpr, ParsedExpr}; use crate::lexer::{Lexer, LexicalError, Tok}; -use crate::core::{bx, Expr}; +use crate::core::{bx, Expr, V}; pub type ParseError<'i> = lalrpop_util::ParseError, LexicalError>; @@ -14,55 +14,134 @@ pub fn parse_expr(s: &str) -> Result { use pest::Parser; use pest::error::Error; use pest::iterators::Pair; -use crate::generated_parser::{DhallParser, Rule}; +use dhall_parser::{DhallParser, Rule}; fn debug_pair(pair: Pair) { - fn aux(indent: usize, pair: Pair) { + fn aux(indent: usize, prefix: String, pair: Pair) { let indent_str = "| ".repeat(indent); - println!(r#"{}{:?}: "{}""#, indent_str, pair.as_rule(), pair.as_str()); - for p in pair.into_inner() { - aux(indent+1, p); + let rule = pair.as_rule(); + let contents = pair.as_str().clone(); + let mut inner = pair.into_inner(); + let mut first = true; + while let Some(p) = inner.next() { + if first { + first = false; + let last = inner.peek().is_none(); + if last && p.as_str() == contents { + let prefix = format!("{}{:?} > ", prefix, rule); + aux(indent, prefix, p); + continue; + } else { + println!(r#"{}{}{:?}: "{}""#, indent_str, prefix, rule, contents); + } + } + aux(indent+1, "".into(), p); + } + if first { + println!(r#"{}{}{:?}: "{}""#, indent_str, prefix, rule, contents); } + // println!(r#"{}{}{:?}: "{}""#, indent_str, prefix, rule, contents); + // for p in inner { + // aux(indent+1, "".into(), p); + // } } - aux(0, pair) + aux(0, "".into(), pair) } pub fn parse_expr_pest(s: &str) -> Result> { - let parsed_expr = DhallParser::parse(Rule::complete_expression, s)?.next().unwrap(); + let parsed_expr = DhallParser::parse(Rule::final_expression, s)?.next().unwrap(); debug_pair(parsed_expr.clone()); // println!("{}", parsed_expr.clone()); - fn parse_pair(pair: Pair) -> BoxExpr { + fn parse_binop<'a, F>(pair: Pair<'a, Rule>, mut f: F) -> BoxExpr<'a> + where F: FnMut(BoxExpr<'a>, BoxExpr<'a>) -> ParsedExpr<'a> { + let mut inner = pair.into_inner().map(parse_expression); + let first_expr = inner.next().unwrap(); + inner.fold(first_expr, |acc, e| bx(f(acc, e))) + } + + fn skip_expr(pair: Pair) -> BoxExpr { + let mut inner = pair.into_inner().map(parse_expression); + inner.next().unwrap() + } + + fn parse_str(pair: Pair) -> &str { + pair.as_str().trim() + } + + fn parse_natural(pair: Pair) -> Result { + parse_str(pair).parse() + } + + fn parse_expression(pair: Pair) -> BoxExpr { match pair.as_rule() { - Rule::natural_literal => bx(Expr::NaturalLit(str::parse(pair.as_str().trim()).unwrap())), - Rule::plus_expression => { - let mut inner = pair.into_inner().map(parse_pair); - let first_expr = inner.next().unwrap(); - inner.fold(first_expr, |acc, e| bx(Expr::NaturalPlus(acc, e))) + Rule::natural_literal_raw => bx(Expr::NaturalLit(parse_natural(pair).unwrap())), + + Rule::annotated_expression => { parse_binop(pair, Expr::Annot) } + Rule::import_alt_expression => { skip_expr(pair) } + Rule::or_expression => { parse_binop(pair, Expr::BoolOr) } + Rule::plus_expression => { parse_binop(pair, Expr::NaturalPlus) } + Rule::text_append_expression => { parse_binop(pair, Expr::TextAppend) } + Rule::list_append_expression => { skip_expr(pair) } + Rule::and_expression => { parse_binop(pair, Expr::BoolAnd) } + Rule::combine_expression => { skip_expr(pair) } + Rule::prefer_expression => { skip_expr(pair) } + Rule::combine_types_expression => { skip_expr(pair) } + Rule::times_expression => { parse_binop(pair, Expr::NaturalTimes) } + Rule::equal_expression => { parse_binop(pair, Expr::BoolEQ) } + Rule::not_equal_expression => { parse_binop(pair, Expr::BoolNE) } + Rule::application_expression => { parse_binop(pair, Expr::App) } + + Rule::selector_expression_raw => { + let mut inner = pair.into_inner(); + let first_expr = parse_expression(inner.next().unwrap()); + inner.fold(first_expr, |acc, e| bx(Expr::Field(acc, e.as_str()))) + } + + Rule::identifier_raw => { + let mut inner = pair.into_inner(); + let name = parse_str(inner.next().unwrap()); + let idx = inner.next().map(parse_natural).unwrap_or(Ok(0)).unwrap(); + bx(Expr::Var(V(name, idx))) } - Rule::times_expression => { - let mut inner = pair.into_inner().map(parse_pair); - let first_expr = inner.next().unwrap(); - inner.fold(first_expr, |acc, e| bx(Expr::NaturalTimes(acc, e))) + + + // Rule::record_type_or_literal => { + // let mut inner = pair.into_inner(); + // let first_expr = parse_expression(inner.next().unwrap()); + // inner.fold(first_expr, |acc, e| bx(Expr::Field(acc, e.as_str()))) + // } + + // r => panic!("{:?}", r), + // _ => bx(Expr::BoolLit(false)), + + _ => { + let rulename = format!("{:?}", pair.as_rule()); + let mut inner = pair.into_inner().map(parse_expression).map(|x| *x); + bx(Expr::FailedParse(rulename, inner.collect())) } - r => panic!("{:?}", r), } } - Ok(parse_pair(parsed_expr)) + Ok(parse_expression(parsed_expr)) } #[test] fn test_parse() { use crate::core::Expr::*; - let expr = "((22 + 3) * 10)"; + // let expr = r#"{ x = "foo", y = 4 }.x"#; + // let expr = r#"(1 + 2) * 3"#; + let expr = r#"if True then 1 + 3 * 5 else 2"#; println!("{:?}", parse_expr(expr)); match parse_expr_pest(expr) { - Err(e) => println!("{}", e), + Err(e) => { + println!("{:?}", e); + println!("{}", e); + }, ok => println!("{:?}", ok), } - assert_eq!(parse_expr_pest(expr).unwrap(), parse_expr(expr).unwrap()); + // assert_eq!(parse_expr_pest(expr).unwrap(), parse_expr(expr).unwrap()); assert!(false); println!("test {:?}", parse_expr("3 + 5 * 10")); diff --git a/dhall_parser/src/dhall.pest.visibility b/dhall_parser/src/dhall.pest.visibility index 3e6ba45..50ded68 100644 --- a/dhall_parser/src/dhall.pest.visibility +++ b/dhall_parser/src/dhall.pest.visibility @@ -1,95 +1,95 @@ -end_of_line -tab -block_comment -block_comment_chunk -block_comment_continue -not_end_of_line -line_comment -whitespace_chunk -whitespace -nonempty_whitespace -ALPHA -DIGIT -HEXDIG -simple_label_first_char -simple_label_next_other_char -simple_label_next_char -simple_label_start +# end_of_line +# tab +# block_comment +# block_comment_chunk +# block_comment_continue +# not_end_of_line +# line_comment +# whitespace_chunk +# whitespace +# nonempty_whitespace +# ALPHA +# DIGIT +# HEXDIG +# simple_label_first_char +# simple_label_next_other_char +# simple_label_next_char +# simple_label_start simple_label quoted_label label_raw label -double_quote_chunk +# double_quote_chunk double_quote_literal -single_quote_continue +# single_quote_continue single_quote_literal text_literal_raw -if_raw -then_raw -else_raw -let_raw -in_raw -as_raw -using_raw -merge_raw -missing_raw -Optional_raw -Text_raw -List_raw +# if_raw +# then_raw +# else_raw +# let_raw +# in_raw +# as_raw +# using_raw +# merge_raw +# missing_raw +# Optional_raw +# Text_raw +# List_raw Infinity_raw -if_ -then -else_ -let_ -in_ -as_ -using -merge +# if_ +# then +# else_ +# let_ +# in_ +# as_ +# using +# merge Optional Text List -equal -or -plus -text_append -list_append -and -times -double_equal -not_equal -dot -bar -comma -at -colon -import_alt -open_parens -close_parens_raw -close_parens -open_brace -close_brace_raw -close_brace -open_bracket -close_bracket_raw -close_bracket -open_angle -close_angle_raw -close_angle -combine -combine_types -prefer -lambda -forall -arrow -exponent +# equal +# or +# plus +# text_append +# list_append +# and +# times +# double_equal +# not_equal +# dot +# bar +# comma +# at +# colon +# import_alt +# open_parens +# close_parens_raw +# close_parens +# open_brace +# close_brace_raw +# close_brace +# open_bracket +# close_bracket_raw +# close_bracket +# open_angle +# close_angle_raw +# close_angle +# combine +# combine_types +# prefer +# lambda +# forall +# arrow +# exponent double_literal_raw natural_literal_raw integer_literal_raw identifier_raw identifier -path_character -quoted_path_character -path_component +# path_character +# quoted_path_character +# path_component path local_raw scheme @@ -121,11 +121,18 @@ import_type_raw hash_raw import_hashed_raw import_raw -expression -annotated_expression +# expression +lambda_expression +ifthenelse_expression +let_expression +forall_expression +arrow_expression +merge_expression +empty_list_or_optional empty_collection non_empty_optional -operator_expression +annotated_expression +# operator_expression import_alt_expression or_expression plus_expression @@ -139,16 +146,23 @@ times_expression equal_expression not_equal_expression application_expression -import_expression_raw -import_expression +# atomic_expression +# atomic_expression_raw selector_expression_raw -primitive_expression_raw +selector_raw labels_raw +# primitive_expression_raw +# literal_expression_raw record_type_or_literal +empty_record_literal +empty_record_type non_empty_record_type_or_literal non_empty_record_type non_empty_record_literal union_type_or_literal +empty_union_type non_empty_union_type_or_literal -non_empty_list_literal -complete_expression +non_empty_list_literal_raw +# parenthesized_expression_raw +# complete_expression +# final_expression -- cgit v1.2.3