From e52f50080d8e0e6d6a05b1045e3e0e840acb50d0 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Sun, 4 Aug 2019 14:07:34 +0200 Subject: Braced escape sequences --- dhall-lang | 2 +- dhall_generated_parser/src/dhall.pest.visibility | 1 + dhall_syntax/src/parser.rs | 28 ++++++++++++++++++++---- 3 files changed, 26 insertions(+), 5 deletions(-) diff --git a/dhall-lang b/dhall-lang index 0396b67..dbf4ebc 160000 --- a/dhall-lang +++ b/dhall-lang @@ -1 +1 @@ -Subproject commit 0396b67639a6deaff480844e71b576db998717d3 +Subproject commit dbf4ebcfabf499e87c27e75bec108d91929ccc31 diff --git a/dhall_generated_parser/src/dhall.pest.visibility b/dhall_generated_parser/src/dhall.pest.visibility index 0c48656..2b7c477 100644 --- a/dhall_generated_parser/src/dhall.pest.visibility +++ b/dhall_generated_parser/src/dhall.pest.visibility @@ -21,6 +21,7 @@ label # any_label double_quote_chunk double_quote_escaped +# unicode_escape double_quote_char double_quote_literal single_quote_continue diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs index db1699b..2450c76 100644 --- a/dhall_syntax/src/parser.rs +++ b/dhall_syntax/src/parser.rs @@ -402,11 +402,31 @@ make_parser! { "n" => "\n".to_owned(), "r" => "\r".to_owned(), "t" => "\t".to_owned(), + // "uXXXX" or "u{XXXXX}" _ => { - // "uXXXX" - use std::convert::TryFrom; - let c = u16::from_str_radix(&s[1..5], 16).unwrap(); - let c = char::try_from(u32::from(c)).unwrap(); + use std::convert::{TryFrom, TryInto}; + + let s = &s[1..]; + let s = if &s[0..1] == "{" { + &s[1..s.len()-1] + } else { + &s[0..s.len()] + }; + + if s.len() > 8 { + Err(format!("Escape sequences can't have more than 8 chars: \"{}\"", s))? + } + + // pad with zeroes + let s: String = std::iter::repeat('0') + .take(8 - s.len()) + .chain(s.chars()) + .collect(); + + // `s` has length 8, so `bytes` has length 4 + let bytes: &[u8] = &hex::decode(s).unwrap(); + let c = u32::from_be_bytes(bytes.try_into().unwrap()); + let c = char::try_from(c).unwrap(); std::iter::once(c).collect() } } -- cgit v1.2.3