From d5c3e8f2ef7438b7ac84be34cfe019ce365ae529 Mon Sep 17 00:00:00 2001 From: Nadrieril Date: Wed, 7 Aug 2019 16:22:00 +0200 Subject: Forbid surrogate pairs and non-characters --- dhall-lang | 2 +- dhall/src/tests.rs | 2 ++ dhall_generated_parser/src/dhall.pest.visibility | 1 + dhall_syntax/src/parser.rs | 23 +++++++++++++++++++++-- 4 files changed, 25 insertions(+), 3 deletions(-) diff --git a/dhall-lang b/dhall-lang index ee2fe7d..599f83b 160000 --- a/dhall-lang +++ b/dhall-lang @@ -1 +1 @@ -Subproject commit ee2fe7d9cbd699fc9f40ca4858abcc0f13105324 +Subproject commit 599f83b9d5ed24f4357455aecc794f572234a69a diff --git a/dhall/src/tests.rs b/dhall/src/tests.rs index 2f68dac..8b32fb4 100644 --- a/dhall/src/tests.rs +++ b/dhall/src/tests.rs @@ -195,6 +195,8 @@ pub fn run_test( let err = parse_file_str(&file_path).unwrap_err(); match err { Error::Parse(_) => {} + Error::IO(e) + if e.kind() == std::io::ErrorKind::InvalidData => {} e => panic!("Expected parse error, got: {:?}", e), } } diff --git a/dhall_generated_parser/src/dhall.pest.visibility b/dhall_generated_parser/src/dhall.pest.visibility index 3142ad5..33018ae 100644 --- a/dhall_generated_parser/src/dhall.pest.visibility +++ b/dhall_generated_parser/src/dhall.pest.visibility @@ -1,4 +1,5 @@ # end_of_line +# valid_non_ascii # tab # block_comment # block_comment_char diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs index 5be8477..8336c74 100644 --- a/dhall_syntax/src/parser.rs +++ b/dhall_syntax/src/parser.rs @@ -425,8 +425,27 @@ make_parser! { // `s` has length 8, so `bytes` has length 4 let bytes: &[u8] = &hex::decode(s).unwrap(); - let c = u32::from_be_bytes(bytes.try_into().unwrap()); - let c = char::try_from(c).unwrap(); + let i = u32::from_be_bytes(bytes.try_into().unwrap()); + let c = char::try_from(i).unwrap(); + match i { + 0xD800..=0xDFFF => { + let c_ecapsed = c.escape_unicode(); + Err(format!("Escape sequences can't contain surrogate pairs: \"{}\"", c_ecapsed))? + }, + 0x0FFFE..=0x0FFFF | 0x1FFFE..=0x1FFFF | + 0x2FFFE..=0x2FFFF | 0x3FFFE..=0x3FFFF | + 0x4FFFE..=0x4FFFF | 0x5FFFE..=0x5FFFF | + 0x6FFFE..=0x6FFFF | 0x7FFFE..=0x7FFFF | + 0x8FFFE..=0x8FFFF | 0x9FFFE..=0x9FFFF | + 0xAFFFE..=0xAFFFF | 0xBFFFE..=0xBFFFF | + 0xCFFFE..=0xCFFFF | 0xDFFFE..=0xDFFFF | + 0xEFFFE..=0xEFFFF | 0xFFFFE..=0xFFFFF | + 0x10FFFE..=0x10FFFF => { + let c_ecapsed = c.escape_unicode(); + Err(format!("Escape sequences can't contain non-characters: \"{}\"", c_ecapsed))? + }, + _ => {} + } std::iter::once(c).collect() } } -- cgit v1.2.3