summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNadrieril2019-08-07 16:22:00 +0200
committerNadrieril2019-08-07 16:22:00 +0200
commitd5c3e8f2ef7438b7ac84be34cfe019ce365ae529 (patch)
tree29326ae083f3b5f13fa6327a1df182a2b8ee54cd
parent7827d10b431061ca9d2ef83d04a5a4e9f4c7fd47 (diff)
Forbid surrogate pairs and non-characters
m---------dhall-lang0
-rw-r--r--dhall/src/tests.rs2
-rw-r--r--dhall_generated_parser/src/dhall.pest.visibility1
-rw-r--r--dhall_syntax/src/parser.rs23
4 files changed, 24 insertions, 2 deletions
diff --git a/dhall-lang b/dhall-lang
-Subproject ee2fe7d9cbd699fc9f40ca4858abcc0f1310532
+Subproject 599f83b9d5ed24f4357455aecc794f572234a69
diff --git a/dhall/src/tests.rs b/dhall/src/tests.rs
index 2f68dac..8b32fb4 100644
--- a/dhall/src/tests.rs
+++ b/dhall/src/tests.rs
@@ -195,6 +195,8 @@ pub fn run_test(
let err = parse_file_str(&file_path).unwrap_err();
match err {
Error::Parse(_) => {}
+ Error::IO(e)
+ if e.kind() == std::io::ErrorKind::InvalidData => {}
e => panic!("Expected parse error, got: {:?}", e),
}
}
diff --git a/dhall_generated_parser/src/dhall.pest.visibility b/dhall_generated_parser/src/dhall.pest.visibility
index 3142ad5..33018ae 100644
--- a/dhall_generated_parser/src/dhall.pest.visibility
+++ b/dhall_generated_parser/src/dhall.pest.visibility
@@ -1,4 +1,5 @@
# end_of_line
+# valid_non_ascii
# tab
# block_comment
# block_comment_char
diff --git a/dhall_syntax/src/parser.rs b/dhall_syntax/src/parser.rs
index 5be8477..8336c74 100644
--- a/dhall_syntax/src/parser.rs
+++ b/dhall_syntax/src/parser.rs
@@ -425,8 +425,27 @@ make_parser! {
// `s` has length 8, so `bytes` has length 4
let bytes: &[u8] = &hex::decode(s).unwrap();
- let c = u32::from_be_bytes(bytes.try_into().unwrap());
- let c = char::try_from(c).unwrap();
+ let i = u32::from_be_bytes(bytes.try_into().unwrap());
+ let c = char::try_from(i).unwrap();
+ match i {
+ 0xD800..=0xDFFF => {
+ let c_ecapsed = c.escape_unicode();
+ Err(format!("Escape sequences can't contain surrogate pairs: \"{}\"", c_ecapsed))?
+ },
+ 0x0FFFE..=0x0FFFF | 0x1FFFE..=0x1FFFF |
+ 0x2FFFE..=0x2FFFF | 0x3FFFE..=0x3FFFF |
+ 0x4FFFE..=0x4FFFF | 0x5FFFE..=0x5FFFF |
+ 0x6FFFE..=0x6FFFF | 0x7FFFE..=0x7FFFF |
+ 0x8FFFE..=0x8FFFF | 0x9FFFE..=0x9FFFF |
+ 0xAFFFE..=0xAFFFF | 0xBFFFE..=0xBFFFF |
+ 0xCFFFE..=0xCFFFF | 0xDFFFE..=0xDFFFF |
+ 0xEFFFE..=0xEFFFF | 0xFFFFE..=0xFFFFF |
+ 0x10FFFE..=0x10FFFF => {
+ let c_ecapsed = c.escape_unicode();
+ Err(format!("Escape sequences can't contain non-characters: \"{}\"", c_ecapsed))?
+ },
+ _ => {}
+ }
std::iter::once(c).collect()
}
}