From d0978c35d88811462de5c448a24770f73b321e70 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sun, 24 Mar 2019 21:05:01 +0100 Subject: Parse multiline strings correctly Closes #24 --- Cargo.lock | 30 +++++++++++++++++++ dhall-lang | 2 +- dhall/Cargo.toml | 4 +++ dhall/build.rs | 36 +++++++++++++---------- dhall/tests/normalization.rs | 8 ------ dhall_core/src/parser.rs | 68 +++++++++++++++++++++++++++++++------------- dhall_core/src/text.rs | 2 +- 7 files changed, 106 insertions(+), 44 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fa37a5e..cfbe901 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -75,6 +75,7 @@ dependencies = [ "pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)", "serde_cbor 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)", "term-painter 0.2.4 (registry+https://github.com/rust-lang/crates.io-index)", + "walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -270,6 +271,14 @@ dependencies = [ "proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)", ] +[[package]] +name = "same-file" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "serde" version = "1.0.89" @@ -348,6 +357,16 @@ name = "version_check" version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "walkdir" +version = "2.2.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", + "winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "winapi" version = "0.2.8" @@ -372,6 +391,14 @@ name = "winapi-i686-pc-windows-gnu" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" +[[package]] +name = "winapi-util" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +dependencies = [ + "winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)", +] + [[package]] name = "winapi-x86_64-pc-windows-gnu" version = "0.4.0" @@ -407,6 +434,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum pretty_assertions 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)" = "3f81e1644e1b54f5a68959a29aa86cde704219254669da328ecfdf6a1f09d427" "checksum proc-macro2 0.4.27 (registry+https://github.com/rust-lang/crates.io-index)" = "4d317f9caece796be1980837fd5cb3dfec5613ebdb04ad0956deea83ce168915" "checksum quote 0.6.11 (registry+https://github.com/rust-lang/crates.io-index)" = "cdd8e04bd9c52e0342b406469d494fcb033be4bdbe5c606016defbb1681411e1" +"checksum same-file 1.0.4 (registry+https://github.com/rust-lang/crates.io-index)" = "8f20c4be53a8a1ff4c1f1b2bd14570d2f634628709752f0702ecdd2b3f9a5267" "checksum serde 1.0.89 (registry+https://github.com/rust-lang/crates.io-index)" = "92514fb95f900c9b5126e32d020f5c6d40564c27a5ea6d1d7d9f157a96623560" "checksum serde_cbor 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "45cd6d95391b16cd57e88b68be41d504183b7faae22030c0cc3b3f73dd57b2fd" "checksum sha-1 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "51b9d1f3b5de8a167ab06834a7c883bd197f2191e1dda1a22d9ccfeedbf9aded" @@ -418,8 +446,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum ucd-trie 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "71a9c5b1fe77426cf144cc30e49e955270f5086e31a6441dfa8b32efc09b9d77" "checksum unicode-xid 0.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "fc72304796d0818e357ead4e000d19c9c174ab23dc11093ac919054d20a6a7fc" "checksum version_check 0.1.5 (registry+https://github.com/rust-lang/crates.io-index)" = "914b1a6776c4c929a602fafd8bc742e06365d4bcbe48c30f9cca5824f70dc9dd" +"checksum walkdir 2.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "9d9d7ed3431229a144296213105a390676cc49c9b6a72bd19f3176c98e129fa1" "checksum winapi 0.2.8 (registry+https://github.com/rust-lang/crates.io-index)" = "167dc9d6949a9b857f3451275e911c3f44255842c1f7a76f33c55103a909087a" "checksum winapi 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "92c1eb33641e276cfa214a0522acad57be5c56b10cb348b3c5117db75f3ac4b0" "checksum winapi-build 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "2d315eee3b34aca4797b2da6b13ed88266e6d612562a0c46390af8299fc699bc" "checksum winapi-i686-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" +"checksum winapi-util 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7168bab6e1daee33b4557efd0e95d5ca70a03706d39fa5f3fe7a236f584b03c9" "checksum winapi-x86_64-pc-windows-gnu 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" diff --git a/dhall-lang b/dhall-lang index 55abd1d..e6e500d 160000 --- a/dhall-lang +++ b/dhall-lang @@ -1 +1 @@ -Subproject commit 55abd1d97a21bab45a533ea8279e28722d380a6d +Subproject commit e6e500d3243d6b091c6d1220dd6567a262d2e2aa diff --git a/dhall/Cargo.toml b/dhall/Cargo.toml index 234daa4..e898cde 100644 --- a/dhall/Cargo.toml +++ b/dhall/Cargo.toml @@ -22,3 +22,7 @@ dhall_generator = { path = "../dhall_generator" } [dev-dependencies] pretty_assertions = "0.6.1" + +[build-dependencies] +walkdir = "2" + diff --git a/dhall/build.rs b/dhall/build.rs index c8ad6ad..cc62a97 100644 --- a/dhall/build.rs +++ b/dhall/build.rs @@ -1,23 +1,28 @@ use std::env; use std::ffi::OsString; -use std::fs::{self, File}; +use std::fs::File; use std::io::Write; use std::path::Path; +use walkdir::WalkDir; fn dhall_files_in_dir<'a>(dir: &'a Path) -> impl Iterator + 'a { - fs::read_dir(dir).unwrap().filter_map(move |path| { - let path = path.unwrap().path(); - let path = path.strip_prefix(dir).unwrap(); - if path.extension() != Some(&OsString::from("dhall")) { - return None; - } - let path = path.to_string_lossy(); - let path = path[..path.len() - 6].to_owned(); - Some(path) - }) + WalkDir::new(dir) + .into_iter() + .filter_map(|e| e.ok()) + .filter_map(move |path| { + let path = path.path(); + let path = path.strip_prefix(dir).unwrap(); + if path.extension() != Some(&OsString::from("dhall")) { + return None; + } + let path = path.to_string_lossy(); + let path = path[..path.len() - 6].to_owned(); + Some(path) + }) } fn main() -> std::io::Result<()> { + println!("cargo:rerun-if-changed=../dhall-lang/.git"); let out_dir = env::var("OUT_DIR").unwrap(); let tests_dir = Path::new("../dhall-lang/tests/"); @@ -25,17 +30,18 @@ fn main() -> std::io::Result<()> { let mut file = File::create(parser_tests_path)?; for path in dhall_files_in_dir(&tests_dir.join("parser/success/")) { - let name = &path[..path.len() - 1]; + let path = &path[..path.len() - 1]; + let name = path.replace("/", "_"); // Skip this test; parser is way too slow indebug mode if name == "largeExpression" { continue; } - writeln!(file, r#"make_spec_test!(ParserSuccess, spec_parser_success_{0}, "{0}");"#, name)?; + writeln!(file, r#"make_spec_test!(ParserSuccess, spec_parser_success_{}, "{}");"#, name, path)?; } for path in dhall_files_in_dir(&tests_dir.join("parser/failure/")) { - let name = &path; - writeln!(file, r#"make_spec_test!(ParserFailure, spec_parser_failure_{0}, "{0}");"#, name)?; + let name = path.replace("/", "_"); + writeln!(file, r#"make_spec_test!(ParserFailure, spec_parser_failure_{}, "{}");"#, name, path)?; } Ok(()) diff --git a/dhall/tests/normalization.rs b/dhall/tests/normalization.rs index 5df46a6..d2db38d 100644 --- a/dhall/tests/normalization.rs +++ b/dhall/tests/normalization.rs @@ -14,14 +14,6 @@ norm!(spec_normalization_success_haskell_tutorial_access_0, "haskell-tutorial/ac // norm!(spec_normalization_success_haskell_tutorial_combineTypes_1, "haskell-tutorial/combineTypes/1"); // norm!(spec_normalization_success_haskell_tutorial_prefer_0, "haskell-tutorial/prefer/0"); norm!(spec_normalization_success_haskell_tutorial_projection_0, "haskell-tutorial/projection/0"); -// norm!(spec_normalization_success_multiline_escape, "multiline/escape"); -// norm!(spec_normalization_success_multiline_hangingIndent, "multiline/hangingIndent"); -// norm!(spec_normalization_success_multiline_interesting, "multiline/interesting"); -// norm!(spec_normalization_success_multiline_interiorIndent, "multiline/interiorIndent"); -// norm!(spec_normalization_success_multiline_interpolation, "multiline/interpolation"); -// norm!(spec_normalization_success_multiline_preserveComment, "multiline/preserveComment"); -// norm!(spec_normalization_success_multiline_singleLine, "multiline/singleLine"); -// norm!(spec_normalization_success_multiline_twoLines, "multiline/twoLines"); norm!(spec_normalization_success_prelude_Bool_and_0, "prelude/Bool/and/0"); diff --git a/dhall_core/src/parser.rs b/dhall_core/src/parser.rs index 3d5a761..fb119b1 100644 --- a/dhall_core/src/parser.rs +++ b/dhall_core/src/parser.rs @@ -1,3 +1,4 @@ +use itertools::Itertools; use pest::iterators::Pair; use pest::Parser; use std::collections::BTreeMap; @@ -321,8 +322,24 @@ make_parser! { rule!(end_of_line<()>; captured_str!(_) => ()); rule!(single_quote_literal; children!( - [end_of_line(eol), single_quote_continue(contents)] => { - contents.into_iter().rev().collect::() + [end_of_line(eol), single_quote_continue(lines)] => { + let space = InterpolatedTextContents::Text(" ".to_owned()); + let newline = InterpolatedTextContents::Text("\n".to_owned()); + let min_indent = lines + .iter() + .map(|l| { + l.iter().rev().take_while(|c| **c == space).count() + }) + .min() + .unwrap(); + + lines + .into_iter() + .rev() + .map(|mut l| { l.split_off(l.len() - min_indent); l }) + .intersperse(vec![newline]) + .flat_map(|x| x.into_iter().rev()) + .collect::() } )); rule!(single_quote_char<&'a str>; @@ -338,25 +355,38 @@ make_parser! { [expression(e)] => e )); - rule!(single_quote_continue>; children!( - [interpolation(c), single_quote_continue(rest)] => { - let mut rest = rest; - rest.push(InterpolatedTextContents::Expr(c)); rest - }, - [escaped_quote_pair(c), single_quote_continue(rest)] => { - let mut rest = rest; - rest.push(InterpolatedTextContents::Text(c.to_owned())); rest - }, - [escaped_interpolation(c), single_quote_continue(rest)] => { - let mut rest = rest; - rest.push(InterpolatedTextContents::Text(c.to_owned())); rest - }, - [single_quote_char(c), single_quote_continue(rest)] => { - let mut rest = rest; - rest.push(InterpolatedTextContents::Text(c.to_owned())); rest + rule!(single_quote_continue>>; children!( + [interpolation(c), single_quote_continue(lines)] => { + let c = InterpolatedTextContents::Expr(c); + let mut lines = lines; + lines.last_mut().unwrap().push(c); + lines + }, + [escaped_quote_pair(c), single_quote_continue(lines)] => { + let c = InterpolatedTextContents::Text(c.to_owned()); + let mut lines = lines; + lines.last_mut().unwrap().push(c); + lines + }, + [escaped_interpolation(c), single_quote_continue(lines)] => { + let c = InterpolatedTextContents::Text(c.to_owned()); + let mut lines = lines; + lines.last_mut().unwrap().push(c); + lines + }, + [single_quote_char("\n"), single_quote_continue(lines)] => { + let mut lines = lines; + lines.push(vec![]); + lines + }, + [single_quote_char(c), single_quote_continue(lines)] => { + let c = InterpolatedTextContents::Text(c.to_owned()); + let mut lines = lines; + lines.last_mut().unwrap().push(c); + lines }, [] => { - vec![] + vec![vec![]] }, )); diff --git a/dhall_core/src/text.rs b/dhall_core/src/text.rs index 2a468d2..d377877 100644 --- a/dhall_core/src/text.rs +++ b/dhall_core/src/text.rs @@ -29,7 +29,7 @@ impl From for InterpolatedText { } } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum InterpolatedTextContents { Text(String), Expr(SubExpr), -- cgit v1.2.3