From 2cd161a43f0e3a25b9613663b6979ea514447a14 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Tue, 5 Mar 2019 16:36:12 +0100 Subject: Parse single quote literals --- dhall/src/parser.rs | 71 ++++++++++++++++++++++++++-------- dhall_parser/src/dhall.abnf | 9 +++-- dhall_parser/src/dhall.pest.visibility | 5 ++- 3 files changed, 62 insertions(+), 23 deletions(-) diff --git a/dhall/src/parser.rs b/dhall/src/parser.rs index 5075a24..ee93a7d 100644 --- a/dhall/src/parser.rs +++ b/dhall/src/parser.rs @@ -399,6 +399,29 @@ named!(str<&'a str>; with_captured_str!(s; { s.trim() })); named!(raw_str<&'a str>; with_captured_str!(s; s)); +named_rule!(escaped_quote_pair<&'a str>; plain_value!("''")); +named_rule!(escaped_interpolation<&'a str>; plain_value!("${")); + +named_rule!(single_quote_continue>; match_children!( + // TODO: handle interpolation + // (c: expression, rest: single_quote_continue) => { + // rest.push(c); rest + // }, + (c: escaped_quote_pair, rest: single_quote_continue) => { + rest.push(c); rest + }, + (c: escaped_interpolation, rest: single_quote_continue) => { + rest.push(c); rest + }, + // capture interpolation as string + (c: raw_str, rest: single_quote_continue) => { + rest.push(c); rest + }, + () => { + vec![] + }, +)); + named!(natural; with_raw_pair!(pair; { pair.as_str().trim() .parse() @@ -411,7 +434,7 @@ named!(integer; with_raw_pair!(pair; { .map_err(|e: std::num::ParseIntError| custom_parse_error(&pair, format!("{}", e)))? })); -named!(letbinding<(&'a str, Option>, BoxExpr<'a>)>; +named_rule!(let_binding<(&'a str, Option>, BoxExpr<'a>)>; match_children!((name: str, annot?: expression, expr: expression) => (name, annot, expr)) ); @@ -445,7 +468,8 @@ named_rule!(union_type_entries>>; }) ); -named_rule!(non_empty_union_type_or_literal<(Option<(&'a str, BoxExpr<'a>)>, BTreeMap<&'a str, ParsedExpr<'a>>)>; +named_rule!(non_empty_union_type_or_literal + <(Option<(&'a str, BoxExpr<'a>)>, BTreeMap<&'a str, ParsedExpr<'a>>)>; match_children!( (label: str, e: expression, entries: union_type_entries) => { (Some((label, e)), entries) @@ -466,10 +490,16 @@ named_rule!(non_empty_union_type_or_literal<(Option<(&'a str, BoxExpr<'a>)>, BTr named_rule!(empty_union_type<()>; plain_value!(())); named!(expression>; match_rule!( + // TODO: parse escapes and interpolation Rule::double_quote_literal => match_children!((strs*: raw_str) => { bx(Expr::TextLit(strs.collect())) }), + Rule::single_quote_literal => + match_children!((eol: raw_str, contents: single_quote_continue) => { + contents.push(eol); + bx(Expr::TextLit(contents.into_iter().rev().collect())) + }), Rule::natural_literal_raw => map!(natural; |n| bx(Expr::NaturalLit(n))), Rule::integer_literal_raw => map!(integer; |n| bx(Expr::IntegerLit(n))), @@ -499,7 +529,7 @@ named!(expression>; match_rule!( }), Rule::let_expression => - match_children!((bindings*: letbinding, final_expr: expression) => { + match_children!((bindings*: let_binding, final_expr: expression) => { bindings.fold(final_expr, |acc, x| bx(Expr::Let(x.0, x.1, x.2, acc))) }), @@ -583,13 +613,13 @@ named!(expression>; match_rule!( }), - _ => with_rule!(rule; - match_children!((exprs*: expression) => { - let rulename = format!("{:?}", rule); - // panic!(rulename); - bx(Expr::FailedParse(rulename, exprs.map(|x| *x).collect())) - }) - ), + // _ => with_rule!(rule; + // match_children!((exprs*: expression) => { + // let rulename = format!("{:?}", rule); + // // panic!(rulename); + // bx(Expr::FailedParse(rulename, exprs.map(|x| *x).collect())) + // }) + // ), )); named!(final_expression>; @@ -608,13 +638,20 @@ fn test_parse() { // let expr = r#"(1 + 2) * 3"#; let expr = r#"if True then 1 + 3 * 5 else 2"#; println!("{:?}", parse_expr_lalrpop(expr)); - match parse_expr_pest(expr) { - Err(e) => { - println!("{:?}", e); - println!("{}", e); - } - ok => println!("{:?}", ok), - } + use std::thread; + // I don't understand why it stack overflows even on tiny expressions... + thread::Builder::new() + .stack_size(3 * 1024 * 1024) + .spawn(move || match parse_expr_pest(expr) { + Err(e) => { + println!("{:?}", e); + println!("{}", e); + } + ok => println!("{:?}", ok), + }) + .unwrap() + .join() + .unwrap(); // assert_eq!(parse_expr_pest(expr).unwrap(), parse_expr_lalrpop(expr).unwrap()); // assert!(false); diff --git a/dhall_parser/src/dhall.abnf b/dhall_parser/src/dhall.abnf index 391741f..e311aa6 100644 --- a/dhall_parser/src/dhall.abnf +++ b/dhall_parser/src/dhall.abnf @@ -100,9 +100,10 @@ ; ; For simplicity this supports Unix and Windows line-endings, which are the most ; common -end-of-line = +end-of-line-silent = %x0A ; "\n" / %x0D.0A ; "\r\n" +end-of-line = end-of-line-silent tab = %x09 ; "\t" @@ -112,7 +113,7 @@ block-comment-chunk = block-comment / %x20-10FFFF / tab - / end-of-line + / end-of-line-silent block-comment-continue = "-}" / block-comment-chunk block-comment-continue @@ -120,12 +121,12 @@ not-end-of-line = %x20-10FFFF / tab ; NOTE: Slightly different from Haskell-style single-line comments because this ; does not require a space after the dashes -line-comment = "--" *not-end-of-line end-of-line +line-comment = "--" *not-end-of-line end-of-line-silent whitespace-chunk = " " / tab - / end-of-line + / end-of-line-silent / line-comment / block-comment diff --git a/dhall_parser/src/dhall.pest.visibility b/dhall_parser/src/dhall.pest.visibility index a2dedde..e9bf241 100644 --- a/dhall_parser/src/dhall.pest.visibility +++ b/dhall_parser/src/dhall.pest.visibility @@ -1,4 +1,5 @@ -# end_of_line +end_of_line +# end_of_line_silent # tab # block_comment # block_comment_chunk @@ -21,7 +22,7 @@ label_raw label # double_quote_chunk double_quote_literal -# single_quote_continue +single_quote_continue single_quote_literal # text_literal_raw # if_raw -- cgit v1.2.3