From 4a45ed7e1f80a7d3e4032e08eb499dab3412453f Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sat, 23 Mar 2019 01:09:05 +0100 Subject: Parse unicode escapes in string --- dhall_core/src/parser.rs | 44 ++++++++++++++++++++++++-------------------- dhall_core/src/printer.rs | 20 +++++++++++++------- dhall_core/src/text.rs | 20 ++++++++++---------- 3 files changed, 47 insertions(+), 37 deletions(-) (limited to 'dhall_core/src') diff --git a/dhall_core/src/parser.rs b/dhall_core/src/parser.rs index 8908c78..9fd3213 100644 --- a/dhall_core/src/parser.rs +++ b/dhall_core/src/parser.rs @@ -13,7 +13,7 @@ use crate::*; // are here and hopefully you can figure out how they work. type ParsedText = InterpolatedText; -type ParsedTextContents<'a> = InterpolatedTextContents<'a, X, Import>; +type ParsedTextContents = InterpolatedTextContents; pub type ParseError = pest::error::Error; @@ -270,7 +270,7 @@ make_parser! { } )); - rule!(double_quote_chunk>; children!( + rule!(double_quote_chunk; children!( [interpolation(e)] => { InterpolatedTextContents::Expr(e) }, @@ -278,24 +278,28 @@ make_parser! { InterpolatedTextContents::Text(s) }, [double_quote_char(s)] => { - InterpolatedTextContents::Text(s) + InterpolatedTextContents::Text(s.to_owned()) }, )); - rule!(double_quote_escaped<&'a str>; - // TODO: parse all escapes + rule!(double_quote_escaped; captured_str!(s) => { match s { - "\"" => "\"", - "$" => "$", - "\\" => "\\", - "/" => "/", - // "b" => "\b", - // "f" => "\f", - "n" => "\n", - "r" => "\r", - "t" => "\t", - // "uXXXX" - _ => unimplemented!(), + "\"" => "\"".to_owned(), + "$" => "$".to_owned(), + "\\" => "\\".to_owned(), + "/" => "/".to_owned(), + "b" => "\u{0008}".to_owned(), + "f" => "\u{000C}".to_owned(), + "n" => "\n".to_owned(), + "r" => "\r".to_owned(), + "t" => "\t".to_owned(), + _ => { + // "uXXXX" + use std::convert::TryFrom; + let c = u16::from_str_radix(&s[1..5], 16).unwrap(); + let c = char::try_from(c as u32).unwrap(); + std::iter::once(c).collect() + } } } ); @@ -323,22 +327,22 @@ make_parser! { [expression(e)] => e )); - rule!(single_quote_continue>>; children!( + rule!(single_quote_continue>; children!( [interpolation(c), single_quote_continue(rest)] => { let mut rest = rest; rest.push(InterpolatedTextContents::Expr(c)); rest }, [escaped_quote_pair(c), single_quote_continue(rest)] => { let mut rest = rest; - rest.push(InterpolatedTextContents::Text(c)); rest + rest.push(InterpolatedTextContents::Text(c.to_owned())); rest }, [escaped_interpolation(c), single_quote_continue(rest)] => { let mut rest = rest; - rest.push(InterpolatedTextContents::Text(c)); rest + rest.push(InterpolatedTextContents::Text(c.to_owned())); rest }, [single_quote_char(c), single_quote_continue(rest)] => { let mut rest = rest; - rest.push(InterpolatedTextContents::Text(c)); rest + rest.push(InterpolatedTextContents::Text(c.to_owned())); rest }, [] => { vec![] diff --git a/dhall_core/src/printer.rs b/dhall_core/src/printer.rs index 508c1c8..5ecf5ce 100644 --- a/dhall_core/src/printer.rs +++ b/dhall_core/src/printer.rs @@ -206,13 +206,19 @@ impl Expr { for x in a.iter() { match x { InterpolatedTextContents::Text(a) => { - // TODO Format all escapes properly - f.write_str( - &a.replace("\n", "\\n") - .replace("\t", "\\t") - .replace("\r", "\\r") - .replace("\"", "\\\""), - )?; + for c in a.chars() { + match c { + '\\' => f.write_str("\\\\"), + '"' => f.write_str("\\\""), + '$' => f.write_str("\\$"), + '\u{0008}' => f.write_str("\\b"), + '\u{000C}' => f.write_str("\\f"), + '\n' => f.write_str("\\n"), + '\r' => f.write_str("\\r"), + '\t' => f.write_str("\\t"), + c => write!(f, "{}", c), + }?; + } } InterpolatedTextContents::Expr(e) => { f.write_str("${ ")?; diff --git a/dhall_core/src/text.rs b/dhall_core/src/text.rs index eb0d39c..2a468d2 100644 --- a/dhall_core/src/text.rs +++ b/dhall_core/src/text.rs @@ -30,8 +30,8 @@ impl From for InterpolatedText { } #[derive(Debug, Clone)] -pub enum InterpolatedTextContents<'a, Note, Embed> { - Text(&'a str), +pub enum InterpolatedTextContents { + Text(String), Expr(SubExpr), } @@ -46,35 +46,35 @@ impl InterpolatedText { } } - pub fn iter(&self) -> impl Iterator> { + pub fn iter<'a>( + &'a self, + ) -> impl Iterator> + 'a { use std::iter::once; - once(InterpolatedTextContents::Text(self.head.as_ref())).chain( + once(InterpolatedTextContents::Text(self.head.clone())).chain( self.tail.iter().flat_map(|(e, s)| { once(InterpolatedTextContents::Expr(Rc::clone(e))) - .chain(once(InterpolatedTextContents::Text(s))) + .chain(once(InterpolatedTextContents::Text(s.clone()))) }), ) } } -impl<'a, N: 'a, E: 'a> FromIterator> +impl<'a, N: 'a, E: 'a> FromIterator> for InterpolatedText { fn from_iter(iter: T) -> Self where - T: IntoIterator>, + T: IntoIterator>, { let mut res = InterpolatedText { head: "".to_owned(), tail: vec![], }; - // let mut empty_string = "".to_owned(); let mut crnt_str = &mut res.head; for x in iter.into_iter() { match x { - InterpolatedTextContents::Text(s) => crnt_str.push_str(s), + InterpolatedTextContents::Text(s) => crnt_str.push_str(&s), InterpolatedTextContents::Expr(e) => { - // crnt_str = &mut empty_string; res.tail.push((e.clone(), "".to_owned())); crnt_str = &mut res.tail.last_mut().unwrap().1; } -- cgit v1.2.3