diff options
author | Nadrieril | 2020-11-01 18:50:09 +0000 |
---|---|---|
committer | GitHub | 2020-11-01 18:50:09 +0000 |
commit | 2ae979a22ee4b79590f74110d61164383c7b5182 (patch) | |
tree | 5286606d6d59f5d5d4b9ecb7f651c97d201a092e | |
parent | 48037367933085ca9c1c67c8c59f311e2b21be6d (diff) | |
parent | e5381c9b76f1d88dedb4a453cd026c8e98be5533 (diff) |
Merge pull request #192 from Nadrieril/rework-caching
-rw-r--r-- | Cargo.lock | 7 | ||||
-rw-r--r-- | dhall/Cargo.toml | 4 | ||||
-rw-r--r-- | dhall/src/error/mod.rs | 4 | ||||
-rw-r--r-- | dhall/src/lib.rs | 5 | ||||
-rw-r--r-- | dhall/src/semantics/parse.rs | 7 | ||||
-rw-r--r-- | dhall/src/semantics/resolve/cache.rs | 658 | ||||
-rw-r--r-- | dhall/src/semantics/resolve/env.rs | 90 | ||||
-rw-r--r-- | dhall/src/semantics/resolve/resolve.rs | 131 | ||||
-rw-r--r-- | dhall/src/syntax/ast/expr.rs | 5 | ||||
-rw-r--r-- | dhall/src/syntax/ast/import.rs | 2 | ||||
-rw-r--r-- | dhall/src/syntax/binary/decode.rs | 2 | ||||
-rw-r--r-- | dhall/src/syntax/text/parser.rs | 2 | ||||
-rw-r--r-- | dhall/src/utils.rs | 17 | ||||
-rw-r--r-- | dhall/tests/import/data/simple.dhall | 1 | ||||
-rw-r--r-- | dhall/tests/import/failure/unit/HashMismatch2.dhall | 2 | ||||
-rw-r--r-- | dhall/tests/import/failure/unit/HashMismatch2.txt | 9 | ||||
-rw-r--r-- | dhall/tests/spec.rs | 82 |
17 files changed, 288 insertions, 740 deletions
@@ -300,6 +300,7 @@ dependencies = [ "annotate-snippets", "anyhow", "colored-diff", + "fs_extra", "hex", "itertools 0.9.0", "lazy_static", @@ -409,6 +410,12 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "00b0228411908ca8685dba7fc2cdd70ec9990a6e753e89b6ac91a84c40fbaf4b" [[package]] +name = "fs_extra" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2022715d62ab30faffd124d40b76f4134a550a87792276512b18d63272333394" + +[[package]] name = "fuchsia-zircon" version = "0.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" diff --git a/dhall/Cargo.toml b/dhall/Cargo.toml index 7923009..207539a 100644 --- a/dhall/Cargo.toml +++ b/dhall/Cargo.toml @@ -41,15 +41,13 @@ reqwest = { version = "0.10", features = ["blocking"], optional = true } [dev-dependencies] anyhow = "1.0.28" colored-diff = "0.2.2" +fs_extra = "1.2.0" libtest-mimic = "0.3.0" rand = "0.7" version-sync = "0.9" walkdir = "2" [build-dependencies] -walkdir = "2" abnf_to_pest = { version = "^0.5.0", path = "../abnf_to_pest" } pest_generator = "2.1.3" quote = "1.0" - - diff --git a/dhall/src/error/mod.rs b/dhall/src/error/mod.rs index d533264..800b8c1 100644 --- a/dhall/src/error/mod.rs +++ b/dhall/src/error/mod.rs @@ -1,6 +1,6 @@ use std::io::Error as IOError; -use crate::semantics::resolve::{ImportLocation, ImportStack}; +use crate::semantics::resolve::{CyclesStack, ImportLocation}; use crate::syntax::{Import, ParseError}; mod builder; @@ -31,7 +31,7 @@ pub enum ImportError { MissingEnvVar, SanityCheck, UnexpectedImport(Import<()>), - ImportCycle(ImportStack, ImportLocation), + ImportCycle(CyclesStack, ImportLocation), Url(url::ParseError), } diff --git a/dhall/src/lib.rs b/dhall/src/lib.rs index 87c461b..f9d259c 100644 --- a/dhall/src/lib.rs +++ b/dhall/src/lib.rs @@ -13,6 +13,7 @@ pub mod error; pub mod operations; pub mod semantics; pub mod syntax; +pub mod utils; use std::fmt::Display; use std::path::Path; @@ -37,8 +38,8 @@ pub struct Resolved(Hir); /// A typed expression #[derive(Debug, Clone)] pub struct Typed { - hir: Hir, - ty: Type, + pub hir: Hir, + pub ty: Type, } /// A normalized expression. diff --git a/dhall/src/semantics/parse.rs b/dhall/src/semantics/parse.rs index 82396e0..a770c15 100644 --- a/dhall/src/semantics/parse.rs +++ b/dhall/src/semantics/parse.rs @@ -1,5 +1,3 @@ -use std::fs::File; -use std::io::Read; use std::path::Path; use url::Url; @@ -36,9 +34,8 @@ pub fn parse_binary(data: &[u8]) -> Result<Parsed, Error> { } pub fn parse_binary_file(f: &Path) -> Result<Parsed, Error> { - let mut buffer = Vec::new(); - File::open(f)?.read_to_end(&mut buffer)?; - let expr = binary::decode(&buffer)?; + let data = crate::utils::read_binary_file(f)?; + let expr = binary::decode(&data)?; let root = ImportLocation::Local(f.to_owned()); Ok(Parsed(expr, root)) } diff --git a/dhall/src/semantics/resolve/cache.rs b/dhall/src/semantics/resolve/cache.rs index 164baea..7763f18 100644 --- a/dhall/src/semantics/resolve/cache.rs +++ b/dhall/src/semantics/resolve/cache.rs @@ -2,13 +2,10 @@ use std::env; use std::io::Write; use std::path::{Path, PathBuf}; -use crate::error::{CacheError, Error, ErrorKind}; -use crate::parse::parse_binary_file; -use crate::semantics::{Import, TypedHir}; -use crate::syntax::Hash; -use crate::syntax::{binary, Expr}; -use crate::Parsed; -use std::env::VarError; +use crate::error::{CacheError, Error}; +use crate::parse::parse_binary; +use crate::syntax::{binary, Hash}; +use crate::Typed; use std::ffi::OsStr; use std::fs::File; @@ -20,634 +17,97 @@ const ALTERNATE_CACHE_ENV_VAR: &str = "HOME"; const ALTERNATE_CACHE_ENV_VAR: &str = "LOCALAPPDATA"; #[cfg(any(unix, windows))] -fn load_cache_dir( - env_provider: impl Fn(&str) -> Result<String, VarError> + Copy, -) -> Result<PathBuf, CacheError> { - let env_provider = |s| { - env_provider(s) - .map(PathBuf::from) - .map_err(|_| CacheError::MissingConfiguration) +fn default_cache_dir() -> Result<PathBuf, CacheError> { + let cache_base_path = match env::var(OsStr::new(CACHE_ENV_VAR)) { + Ok(path) => PathBuf::from(path), + Err(_) => match env::var(OsStr::new(ALTERNATE_CACHE_ENV_VAR)) { + Ok(path) => PathBuf::from(path).join(".cache"), + Err(_) => return Err(CacheError::MissingConfiguration), + }, }; - let cache_base_path = env_provider(CACHE_ENV_VAR).or_else(|_| { - env_provider(ALTERNATE_CACHE_ENV_VAR).map(|path| path.join(".cache")) - })?; Ok(cache_base_path.join("dhall")) } + #[cfg(not(any(unix, windows)))] -fn load_cache_dir( - _provider: impl Fn(&str) -> Result<String, VarError> + Copy, -) -> Result<PathBuf, CacheError> { +fn default_cache_dir() -> Result<PathBuf, CacheError> { Err(CacheError::MissingConfiguration) } -#[derive(Debug, PartialEq)] +#[derive(Debug, Clone, PartialEq)] pub struct Cache { - cache_dir: Option<PathBuf>, + cache_dir: PathBuf, } impl Cache { - fn new_with_provider( - provider: impl Fn(&str) -> Result<String, VarError> + Copy, - ) -> Cache { - // Should warn that we can't initialize cache on error - let cache_dir = load_cache_dir(provider).and_then(|path| { - if !path.exists() { - std::fs::create_dir_all(path.as_path()) - .map(|_| path) - .map_err(|e| CacheError::InitialisationError { cause: e }) - } else { - Ok(path) - } - }); - Cache { - cache_dir: cache_dir.ok(), + pub fn new() -> Result<Cache, Error> { + let cache_dir = default_cache_dir()?; + if !cache_dir.exists() { + std::fs::create_dir_all(&cache_dir) + .map_err(|e| CacheError::InitialisationError { cause: e })?; } + Ok(Cache { cache_dir }) } - pub fn new() -> Cache { - Cache::new_with_provider(|name| env::var(OsStr::new(name))) - } -} - -impl Cache { - fn cache_file(&self, import: &Import) -> Option<PathBuf> { - self.cache_dir - .as_ref() - .and_then(|cache_dir| { - import.hash.as_ref().map(|hash| (cache_dir, hash)) - }) - .map(|(cache_dir, hash)| cache_dir.join(cache_filename(hash))) + fn entry_path(&self, hash: &Hash) -> PathBuf { + self.cache_dir.join(filename_for_hash(hash)) } - fn search_cache_file(&self, import: &Import) -> Option<PathBuf> { - self.cache_file(import) - .filter(|cache_file| cache_file.exists()) + pub fn get(&self, hash: &Hash) -> Result<Typed, Error> { + let path = self.entry_path(hash); + let res = read_cache_file(&path, hash); + if let Err(_) = res { + if path.exists() { + // Delete cache file since it's invalid. We ignore the error. + let _ = std::fs::remove_file(&path); + } + } + res } - fn search_cache(&self, import: &Import) -> Option<Result<Parsed, Error>> { - self.search_cache_file(import) - .map(|cache_file| parse_binary_file(cache_file.as_path())) + pub fn insert(&self, hash: &Hash, expr: &Typed) -> Result<(), Error> { + let path = self.entry_path(hash); + write_cache_file(&path, expr) } +} - // Side effect since we don't use the result - fn delete_cache(&self, import: &Import) { - self.search_cache_file(import) - .map(|cache_file| std::fs::remove_file(cache_file.as_path())); - } +/// Read a file from the cache, also checking that its hash is valid. +fn read_cache_file(path: &Path, hash: &Hash) -> Result<Typed, Error> { + let data = crate::utils::read_binary_file(path)?; - // Side effect since we don't use the result - fn save_expr(&self, import: &Import, expr: &Expr) { - self.cache_file(import) - .map(|cache_file| save_expr(cache_file.as_path(), expr)); + match hash { + Hash::SHA256(hash) => { + let actual_hash = crate::utils::sha256_hash(&data); + if hash[..] != actual_hash[..] { + return Err(CacheError::CacheHashInvalid.into()); + } + } } - pub fn caching_import<F, R>( - &self, - import: &Import, - fetcher: F, - mut resolver: R, - ) -> Result<TypedHir, Error> - where - F: FnOnce() -> Result<Parsed, Error>, - R: FnMut(Parsed) -> Result<TypedHir, Error>, - { - // Lookup the cache - self.search_cache(import) - // On cache found - .and_then(|cache_result| { - // Try to resolve the cache imported content - match cache_result.and_then(|parsed| resolver(parsed)).and_then( - |typed_hir| { - check_hash(import.hash.as_ref().unwrap(), typed_hir) - }, - ) { - // Cache content is invalid (can't be parsed / can't be resolved / content sha invalid ) - Err(_) => { - // Delete cache file since it's invalid - self.delete_cache(import); - // Result as there were no cache - None - } - // Cache valid - r => Some(r), - } - }) - .unwrap_or_else(|| { - // Fetch and resolve as provided - let imported = fetcher().and_then(resolver); - // Save in cache the result if ok - let _ = imported.as_ref().map(|(hir, _)| { - self.save_expr(import, &hir.to_expr_noopts()) - }); - imported - }) - } + Ok(parse_binary(&data)?.skip_resolve()?.typecheck()?) } -fn save_expr(file_path: &Path, expr: &Expr) -> Result<(), Error> { - File::create(file_path)?.write_all(binary::encode(expr)?.as_slice())?; +/// Write a file to the cache. +fn write_cache_file(path: &Path, expr: &Typed) -> Result<(), Error> { + let data = binary::encode(&expr.to_expr())?; + File::create(path)?.write_all(data.as_slice())?; Ok(()) } -fn check_hash(hash: &Hash, typed_hir: TypedHir) -> Result<TypedHir, Error> { - if hash.as_ref()[..] != typed_hir.0.to_expr_alpha().hash()?[..] { - Err(Error::new(ErrorKind::Cache(CacheError::CacheHashInvalid))) - } else { - Ok(typed_hir) - } -} - -fn cache_filename<A: AsRef<[u8]>>(v: A) -> String { - format!("1220{}", hex::encode(v.as_ref())) -} - -impl AsRef<[u8]> for Hash { - fn as_ref(&self) -> &[u8] { - match self { - Hash::SHA256(sha) => sha.as_slice(), - } +fn filename_for_hash(hash: &Hash) -> String { + match hash { + Hash::SHA256(sha) => format!("1220{}", hex::encode(&sha)), } } #[cfg(test)] mod test { use super::*; - use crate::semantics::parse::parse_str; - use crate::syntax::{ - parse_expr, ExprKind, ImportMode, ImportTarget, NumKind, Span, - }; - use rand::distributions::Alphanumeric; - use rand::Rng; - use std::env::temp_dir; - - #[cfg(any(unix, windows))] - #[test] - fn load_cache_dir_should_result_xdg_cache_first() { - let actual = load_cache_dir(|var| match var { - CACHE_ENV_VAR => Ok("/home/user/custom".to_string()), - _ => Err(VarError::NotPresent), - }); - assert_eq!(actual.unwrap(), PathBuf::from("/home/user/custom/dhall")); - } - - #[cfg(unix)] - #[test] - fn load_cache_dir_should_result_alternate() { - let actual = load_cache_dir(|var| match var { - ALTERNATE_CACHE_ENV_VAR => Ok("/home/user".to_string()), - _ => Err(VarError::NotPresent), - }); - assert_eq!(actual.unwrap(), PathBuf::from("/home/user/.cache/dhall")); - } - - #[test] - fn load_cache_dir_should_result_none() { - let actual = load_cache_dir(|_| Err(VarError::NotPresent)); - assert!(matches!( - actual.unwrap_err(), - CacheError::MissingConfiguration - )); - } - - #[test] - fn new_with_provider_should_create_cache_folder() { - let test_id = rand::thread_rng() - .sample_iter(Alphanumeric) - .take(36) - .collect::<String>(); - let dir = temp_dir().join(test_id); - - std::fs::create_dir_all(dir.as_path()).unwrap(); - - let actual = Cache::new_with_provider(|_| { - Ok(dir.clone().to_str().map(String::from).unwrap()) - }); - assert_eq!( - actual, - Cache { - cache_dir: Some(dir.join("dhall")) - } - ); - assert!(dir.join("dhall").exists()); - std::fs::remove_dir_all(dir.as_path()).unwrap(); - } - - #[test] - fn new_with_provider_should_return_cache_for_existing_folder() { - let test_id = rand::thread_rng() - .sample_iter(Alphanumeric) - .take(36) - .collect::<String>(); - let dir = temp_dir().join(test_id); - - std::fs::create_dir_all(dir.as_path()).unwrap(); - File::create(dir.join("dhall")).unwrap(); - - assert!(dir.join("dhall").exists()); - - let actual = Cache::new_with_provider(|_| { - Ok(dir.clone().to_str().map(String::from).unwrap()) - }); - assert_eq!( - actual, - Cache { - cache_dir: Some(dir.join("dhall")) - } - ); - std::fs::remove_dir_all(dir.as_path()).unwrap(); - } - - #[test] - fn caching_import_should_load_cache() -> Result<(), Error> { - let test_id = rand::thread_rng() - .sample_iter(Alphanumeric) - .take(36) - .collect::<String>(); - let dir = temp_dir().join(test_id); - - std::fs::create_dir_all(dir.as_path())?; - - let cache = Cache::new_with_provider(|_| { - Ok(dir.clone().to_str().map(String::from).unwrap()) - }); - - // Create cache file - let expr = - Expr::new(ExprKind::Num(NumKind::Natural(1)), Span::Artificial); - File::create(dir.join("dhall").join("1220d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15"))? - .write_all(binary::encode(&expr)?.as_ref())?; - - let import = Import { - mode: ImportMode::Code, - location: ImportTarget::Missing, - hash: Some(Hash::SHA256(hex::decode("d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15").unwrap())), - }; - - let mut resolve_counter = 0; - - let result = cache.caching_import( - &import, - || panic!("Should not fetch import"), - |parsed| { - resolve_counter += 1; - let result = parsed.resolve()?.typecheck()?; - Ok((result.normalize().to_hir(), result.ty)) - }, - ); - - assert!(result.is_ok()); - assert_eq!(resolve_counter, 1); - - std::fs::remove_dir_all(dir.as_path()).unwrap(); - Ok(()) - } - - #[test] - fn caching_import_should_skip_cache_if_missing_cache_folder( - ) -> Result<(), Error> { - let cache = Cache::new_with_provider(|_| Err(VarError::NotPresent)); - - let import = Import { - mode: ImportMode::Code, - location: ImportTarget::Missing, - hash: Some(Hash::SHA256(hex::decode("d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15").unwrap())), - }; - - let mut resolve_counter = 0; - let mut fetcher_counter = 0; - - let result = cache.caching_import( - &import, - || { - fetcher_counter += 1; - parse_str("1") - }, - |parsed| { - resolve_counter += 1; - let result = parsed.resolve()?.typecheck()?; - Ok((result.normalize().to_hir(), result.ty)) - }, - ); - - assert!(result.is_ok(), "caching_import Should be valid"); - assert_eq!(resolve_counter, 1); - assert_eq!(fetcher_counter, 1); - Ok(()) - } - - #[test] - fn caching_import_should_skip_cache_on_no_hash_import() -> Result<(), Error> - { - let cache = Cache::new_with_provider(|_| Err(VarError::NotPresent)); - - let import = Import { - mode: ImportMode::Code, - location: ImportTarget::Missing, - hash: None, - }; - - let mut resolve_counter = 0; - let mut fetcher_counter = 0; - - let result = cache.caching_import( - &import, - || { - fetcher_counter += 1; - parse_str("1") - }, - |parsed| { - resolve_counter += 1; - let result = parsed.resolve()?.typecheck()?; - Ok((result.normalize().to_hir(), result.ty)) - }, - ); - - assert!(result.is_ok(), "caching_import Should be valid"); - assert_eq!(resolve_counter, 1); - assert_eq!(fetcher_counter, 1); - Ok(()) - } - - #[test] - fn caching_import_should_fetch_import_if_no_cache() -> Result<(), Error> { - let test_id = rand::thread_rng() - .sample_iter(Alphanumeric) - .take(36) - .collect::<String>(); - let dir = temp_dir().join(test_id); - - std::fs::create_dir_all(dir.as_path())?; - - let cache = Cache::new_with_provider(|_| { - Ok(dir.clone().to_str().map(String::from).unwrap()) - }); - - let import = Import { - mode: ImportMode::Code, - location: ImportTarget::Missing, - hash: Some(Hash::SHA256(hex::decode("d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15").unwrap())), - }; - - let mut fetcher_counter = 0; - let mut resolve_counter = 0; - - let result = cache.caching_import( - &import, - || { - fetcher_counter += 1; - parse_str("1") - }, - |parsed| { - resolve_counter += 1; - let result = parsed.resolve()?.typecheck()?; - Ok((result.normalize().to_hir(), result.ty)) - }, - ); - - assert!(result.is_ok(), "caching_import Should be valid"); - assert_eq!(resolve_counter, 1); - assert_eq!(fetcher_counter, 1); - - std::fs::remove_dir_all(dir.as_path()).unwrap(); - Ok(()) - } - - #[test] - fn caching_import_should_fetch_import_on_cache_parsed_error( - ) -> Result<(), Error> { - let test_id = rand::thread_rng() - .sample_iter(Alphanumeric) - .take(36) - .collect::<String>(); - let dir = temp_dir().join(test_id); - - std::fs::create_dir_all(dir.as_path())?; - - let cache = Cache::new_with_provider(|_| { - Ok(dir.clone().to_str().map(String::from).unwrap()) - }); - - File::create(dir.join("dhall").join("1220d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15"))? - .write_all("Invalid content".as_bytes())?; - - let import = Import { - mode: ImportMode::Code, - location: ImportTarget::Missing, - hash: Some(Hash::SHA256(hex::decode("d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15").unwrap())), - }; - - let mut fetcher_counter = 0; - let mut resolve_counter = 0; - - let result = cache.caching_import( - &import, - || { - fetcher_counter += 1; - parse_str("1") - }, - |parsed| { - resolve_counter += 1; - let result = parsed.resolve()?.typecheck()?; - Ok((result.normalize().to_hir(), result.ty)) - }, - ); - - assert!(result.is_ok(), "caching_import Should be valid"); - assert_eq!(fetcher_counter, 1, "Should fetch since cache is invalid"); - assert_eq!( - resolve_counter, 1, - "Should resolve only 1 time because cache can't be parsed" - ); - - std::fs::remove_dir_all(dir.as_path()).unwrap(); - Ok(()) - } - - #[test] - fn caching_import_should_fetch_import_on_cache_resolve_error( - ) -> Result<(), Error> { - let test_id = rand::thread_rng() - .sample_iter(Alphanumeric) - .take(36) - .collect::<String>(); - let dir = temp_dir().join(test_id); - - std::fs::create_dir_all(dir.as_path())?; - - let cache = Cache::new_with_provider(|_| { - Ok(dir.clone().to_str().map(String::from).unwrap()) - }); - - let expr = - Expr::new(ExprKind::Num(NumKind::Natural(2)), Span::Artificial); - File::create(dir.join("dhall").join("1220d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15"))? - .write_all(binary::encode(&expr)?.as_slice())?; - - let import = Import { - mode: ImportMode::Code, - location: ImportTarget::Missing, - hash: Some(Hash::SHA256(hex::decode("d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15").unwrap())), - }; - - let mut fetcher_counter = 0; - let mut resolve_counter = 0; - - let result = cache.caching_import( - &import, - || { - fetcher_counter += 1; - parse_str("1") - }, - |parsed| { - resolve_counter += 1; - match resolve_counter { - 1 => Err(Error::new(ErrorKind::Cache( - CacheError::CacheHashInvalid, - ))), - _ => { - let result = parsed.resolve()?.typecheck()?; - Ok((result.normalize().to_hir(), result.ty)) - } - } - }, - ); - - assert!(result.is_ok(), "caching_import Should be valid"); - assert_eq!(fetcher_counter, 1, "Should fetch since cache is invalid"); - assert_eq!( - resolve_counter, 2, - "Should resolve 2 time (one for cache that fail, one for fetch)" - ); - - std::fs::remove_dir_all(dir.as_path()).unwrap(); - Ok(()) - } - - #[test] - fn caching_import_should_fetch_import_on_invalid_hash_cache_content( - ) -> Result<(), Error> { - let test_id = rand::thread_rng() - .sample_iter(Alphanumeric) - .take(36) - .collect::<String>(); - let dir = temp_dir().join(test_id); - - std::fs::create_dir_all(dir.as_path())?; - - let cache = Cache::new_with_provider(|_| { - Ok(dir.clone().to_str().map(String::from).unwrap()) - }); - - let expr = - Expr::new(ExprKind::Num(NumKind::Natural(2)), Span::Artificial); - File::create(dir.join("dhall").join("1220d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15"))? - .write_all(binary::encode(&expr)?.as_slice())?; - - let import = Import { - mode: ImportMode::Code, - location: ImportTarget::Missing, - hash: Some(Hash::SHA256(hex::decode("d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15").unwrap())), - }; - - let mut fetcher_counter = 0; - let mut resolve_counter = 0; - - let result = cache.caching_import( - &import, - || { - fetcher_counter += 1; - parse_str("1") - }, - |parsed| { - resolve_counter += 1; - let result = parsed.resolve()?.typecheck()?; - Ok((result.normalize().to_hir(), result.ty)) - }, - ); - - assert!(result.is_ok(), "caching_import Should be valid"); - assert_eq!(fetcher_counter, 1, "Should fetch since cache is invalid"); - assert_eq!( - resolve_counter, 2, - "Should resolve 2 time (one for cache, one for fetch)" - ); - - std::fs::remove_dir_all(dir.as_path()).unwrap(); - Ok(()) - } - - #[test] - fn caching_import_should_save_import_if_missing() -> Result<(), Error> { - let test_id = rand::thread_rng() - .sample_iter(Alphanumeric) - .take(36) - .collect::<String>(); - let dir = temp_dir().join(test_id); - - std::fs::create_dir_all(dir.as_path())?; - - let cache = Cache::new_with_provider(|_| { - Ok(dir.clone().to_str().map(String::from).unwrap()) - }); - let import = Import { - mode: ImportMode::Code, - location: ImportTarget::Missing, - hash: Some(Hash::SHA256(hex::decode("d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15").unwrap())), - }; - - let mut fetcher_counter = 0; - let mut resolve_counter = 0; - - let result = cache.caching_import( - &import, - || { - fetcher_counter += 1; - parse_str("1") - }, - |parsed| { - resolve_counter += 1; - let result = parsed.resolve()?.typecheck()?; - Ok((result.normalize().to_hir(), result.ty)) - }, - ); - - assert!(result.is_ok(), "caching_import Should be valid"); - assert_eq!(fetcher_counter, 1, "Should fetch since cache is mising"); - assert_eq!(resolve_counter, 1, "Should resolve 1 time"); - - let cache_file = dir.join("dhall").join("1220d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15"); - assert!(cache_file.exists()); - - std::fs::remove_dir_all(dir.as_path()).unwrap(); - Ok(()) - } + use crate::syntax::parse_expr; #[test] - fn cache_filename_should_result_for_hash() { + fn filename_for_hash_should_work() { let hash = - Hash::SHA256(parse_expr("1").unwrap().hash().unwrap().into_vec()); - assert_eq!("1220d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15".to_string(), cache_filename(hash)); - } - - #[test] - fn check_hash_should_be_ok_for_same_hash() -> Result<(), Error> { - let typed = parse_str("1")?.resolve()?.typecheck()?; - let hash = Hash::SHA256(parse_expr("1")?.hash()?.into_vec()); - - let expected = (typed.normalize().to_hir(), typed.ty); - let actual = check_hash(&hash, expected.clone()); - assert_eq!(actual.unwrap(), expected); - Ok(()) - } - - #[test] - fn check_hash_should_be_ok_for_unmatching_hash() -> Result<(), Error> { - let typed = parse_str("1")?.resolve()?.typecheck()?; - let hash = Hash::SHA256(parse_expr("2")?.hash()?.into_vec()); - - let expected = (typed.normalize().to_hir(), typed.ty); - let actual = check_hash(&hash, expected); - assert!(actual.is_err()); - Ok(()) + Hash::SHA256(parse_expr("1").unwrap().sha256_hash().unwrap()); + assert_eq!("1220d60d8415e36e86dae7f42933d3b0c4fe3ca238f057fba206c7e9fbf5d784fe15".to_string(), filename_for_hash(&hash)); } } diff --git a/dhall/src/semantics/resolve/env.rs b/dhall/src/semantics/resolve/env.rs index 6346a6d..29dd16b 100644 --- a/dhall/src/semantics/resolve/env.rs +++ b/dhall/src/semantics/resolve/env.rs @@ -1,8 +1,9 @@ use std::collections::HashMap; use crate::error::{Error, ImportError}; -use crate::semantics::{AlphaVar, ImportLocation, TypedHir, VarEnv}; -use crate::syntax::{Label, V}; +use crate::semantics::{AlphaVar, Cache, ImportLocation, VarEnv}; +use crate::syntax::{Hash, Label, V}; +use crate::Typed; /// Environment for resolving names. #[derive(Debug, Clone, Default)] @@ -10,14 +11,15 @@ pub struct NameEnv { names: Vec<Label>, } -pub type ImportCache = HashMap<ImportLocation, TypedHir>; -pub type ImportStack = Vec<ImportLocation>; +pub type MemCache = HashMap<ImportLocation, Typed>; +pub type CyclesStack = Vec<ImportLocation>; /// Environment for resolving imports -#[derive(Debug, Clone, Default)] +#[derive(Debug)] pub struct ImportEnv { - cache: ImportCache, - stack: ImportStack, + disk_cache: Option<Cache>, // Missing if it failed to initialize + mem_cache: MemCache, + stack: CyclesStack, } impl NameEnv { @@ -66,38 +68,62 @@ impl NameEnv { impl ImportEnv { pub fn new() -> Self { - ImportEnv::default() + ImportEnv { + disk_cache: Cache::new().ok(), + mem_cache: Default::default(), + stack: Default::default(), + } + } + + pub fn get_from_mem_cache( + &mut self, + location: &ImportLocation, + ) -> Option<Typed> { + Some(self.mem_cache.get(location)?.clone()) + } + + pub fn get_from_disk_cache( + &mut self, + hash: &Option<Hash>, + ) -> Option<Typed> { + let hash = hash.as_ref()?; + let expr = self.disk_cache.as_ref()?.get(hash).ok()?; + Some(expr) + } + + pub fn write_to_mem_cache( + &mut self, + location: ImportLocation, + expr: Typed, + ) { + self.mem_cache.insert(location, expr); } - pub fn handle_import( + pub fn write_to_disk_cache(&mut self, hash: &Option<Hash>, expr: &Typed) { + if let Some(disk_cache) = self.disk_cache.as_ref() { + if let Some(hash) = hash { + let _ = disk_cache.insert(hash, &expr); + } + } + } + + pub fn with_cycle_detection( &mut self, - mut location: ImportLocation, - do_resolve: impl FnOnce(&mut Self) -> Result<TypedHir, Error>, - ) -> Result<TypedHir, Error> { + location: ImportLocation, + do_resolve: impl FnOnce(&mut Self) -> Result<Typed, Error>, + ) -> Result<Typed, Error> { if self.stack.contains(&location) { return Err( ImportError::ImportCycle(self.stack.clone(), location).into() ); } - Ok(match self.cache.get(&location) { - Some(expr) => expr.clone(), - None => { - let expr = { - // Push the current location on the stack - self.stack.push(location); - // Resolve the import recursively - // WARNING: do not propagate errors here or the stack will get messed up. - let result = do_resolve(self); - // Remove location from the stack. - location = self.stack.pop().unwrap(); - result - }?; - - // Add the resolved import to the cache - self.cache.insert(location, expr.clone()); - - expr - } - }) + // Push the current location on the stack + self.stack.push(location); + // Resolve the import recursively + // WARNING: do not propagate errors here or the stack will get messed up. + let result = do_resolve(self); + // Remove location from the stack. + self.stack.pop().unwrap(); + result } } diff --git a/dhall/src/semantics/resolve/resolve.rs b/dhall/src/semantics/resolve/resolve.rs index 2b401dc..614ea22 100644 --- a/dhall/src/semantics/resolve/resolve.rs +++ b/dhall/src/semantics/resolve/resolve.rs @@ -10,20 +10,17 @@ use crate::builtins::Builtin; use crate::error::ErrorBuilder; use crate::error::{Error, ImportError}; use crate::operations::{BinOp, OpKind}; -use crate::semantics::{mkerr, Cache, Hir, HirKind, ImportEnv, NameEnv, Type}; +use crate::semantics::{mkerr, Hir, HirKind, ImportEnv, NameEnv, Type}; use crate::syntax; use crate::syntax::{ Expr, ExprKind, FilePath, FilePrefix, Hash, ImportMode, ImportTarget, Label, Span, UnspannedExpr, URL, }; -use crate::{Parsed, Resolved}; +use crate::{Parsed, Resolved, Typed}; // TODO: evaluate import headers pub type Import = syntax::Import<()>; -/// Owned Hir with a type. Different from Tir because the Hir is owned. -pub type TypedHir = (Hir, Type); - /// The location of some data, usually some dhall code. #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum ImportLocation { @@ -220,65 +217,55 @@ fn make_aslocation_uniontype() -> Expr { mkexpr(ExprKind::UnionType(union)) } +fn check_hash(import: &Import, typed: &Typed, span: Span) -> Result<(), Error> { + match (import.mode, &import.hash) { + (ImportMode::Code, Some(Hash::SHA256(hash))) => { + let actual_hash = typed.hir.to_expr_alpha().sha256_hash()?; + if hash[..] != actual_hash[..] { + mkerr( + ErrorBuilder::new("hash mismatch") + .span_err(span, "hash mismatch") + .note(format!("Expected sha256:{}", hex::encode(hash))) + .note(format!( + "Found sha256:{}", + hex::encode(actual_hash) + )) + .format(), + )? + } + } + _ => {} + } + Ok(()) +} + fn resolve_one_import( env: &mut ImportEnv, - cache: &Cache, import: &Import, - location: &ImportLocation, + location: ImportLocation, span: Span, -) -> Result<TypedHir, Error> { - let do_sanity_check = import.mode != ImportMode::Location; - let location = location.chain(&import.location, do_sanity_check)?; - env.handle_import(location.clone(), |env| match import.mode { +) -> Result<Typed, Error> { + let (hir, ty) = match import.mode { ImportMode::Code => { - let (hir, ty) = cache.caching_import( - import, - || location.fetch_dhall(), - |parsed| { - let typed = - resolve_with_env(env, cache, parsed)?.typecheck()?; - let hir = typed.normalize().to_hir(); - Ok((hir, typed.ty)) - }, - )?; - match &import.hash { - Some(Hash::SHA256(hash)) => { - let actual_hash = hir.to_expr_alpha().hash()?; - if hash[..] != actual_hash[..] { - mkerr( - ErrorBuilder::new("hash mismatch") - .span_err(span, "hash mismatch") - .note(format!( - "Expected sha256:{}", - hex::encode(hash) - )) - .note(format!( - "Found sha256:{}", - hex::encode(actual_hash) - )) - .format(), - )? - } - } - None => {} - } - Ok((hir, ty)) + let parsed = location.fetch_dhall()?; + let typed = resolve_with_env(env, parsed)?.typecheck()?; + let hir = typed.normalize().to_hir(); + (hir, typed.ty) } ImportMode::RawText => { let text = location.fetch_text()?; - let hir = Hir::new( - HirKind::Expr(ExprKind::TextLit(text.into())), - Span::Artificial, - ); - Ok((hir, Type::from_builtin(Builtin::Text))) + let hir = + Hir::new(HirKind::Expr(ExprKind::TextLit(text.into())), span); + (hir, Type::from_builtin(Builtin::Text)) } ImportMode::Location => { let expr = location.into_location(); let hir = skip_resolve_expr(&expr)?; let ty = hir.typecheck_noenv()?.ty().clone(); - Ok((hir, ty)) + (hir, ty) } - }) + }; + Ok(Typed { hir, ty }) } /// Desugar a `with` expression. @@ -342,7 +329,7 @@ fn desugar(expr: &Expr) -> Cow<'_, Expr> { fn traverse_resolve_expr( name_env: &mut NameEnv, expr: &Expr, - f: &mut impl FnMut(Import, Span) -> Result<TypedHir, Error>, + f: &mut impl FnMut(Import, Span) -> Result<Typed, Error>, ) -> Result<Hir, Error> { let expr = desugar(expr); Ok(match expr.kind() { @@ -382,7 +369,7 @@ fn traverse_resolve_expr( // TODO: evaluate import headers let import = import.traverse_ref(|_| Ok::<_, Error>(()))?; let imported = f(import, expr.span())?; - HirKind::Import(imported.0, imported.1) + HirKind::Import(imported.hir, imported.ty) } kind => HirKind::Expr(kind), }; @@ -393,23 +380,53 @@ fn traverse_resolve_expr( fn resolve_with_env( env: &mut ImportEnv, - cache: &Cache, parsed: Parsed, ) -> Result<Resolved, Error> { - let Parsed(expr, location) = parsed; + let Parsed(expr, base_location) = parsed; let resolved = traverse_resolve_expr( &mut NameEnv::new(), &expr, &mut |import, span| { - resolve_one_import(env, cache, &import, &location, span) + let do_sanity_check = import.mode != ImportMode::Location; + let location = + base_location.chain(&import.location, do_sanity_check)?; + + // If the import is in the in-memory cache, or the hash is in the on-disk cache, return + // the cached contents. + if let Some(typed) = env.get_from_mem_cache(&location) { + // The same location may be used with different or no hashes. Thus we need to check + // the hashes every time. + check_hash(&import, &typed, span)?; + env.write_to_disk_cache(&import.hash, &typed); + return Ok(typed); + } + if let Some(typed) = env.get_from_disk_cache(&import.hash) { + // No need to check the hash, it was checked before reading the file. We also don't + // write to the in-memory cache, because the location might be completely unrelated + // to the cached file (e.g. `missing sha256:...` is valid). + // This actually means that importing many times a same hashed import will take + // longer than importing many times a same non-hashed import. + return Ok(typed); + } + + // Resolve this import, making sure that recursive imports don't cycle back to the + // current one. + let typed = env.with_cycle_detection(location.clone(), |env| { + resolve_one_import(env, &import, location.clone(), span.clone()) + })?; + + // Add the resolved import to the caches + check_hash(&import, &typed, span)?; + env.write_to_disk_cache(&import.hash, &typed); + env.write_to_mem_cache(location, typed.clone()); + Ok(typed) }, )?; Ok(Resolved(resolved)) } pub fn resolve(parsed: Parsed) -> Result<Resolved, Error> { - let cache = Cache::new(); - resolve_with_env(&mut ImportEnv::new(), &cache, parsed) + resolve_with_env(&mut ImportEnv::new(), parsed) } pub fn skip_resolve_expr(expr: &Expr) -> Result<Hir, Error> { diff --git a/dhall/src/syntax/ast/expr.rs b/dhall/src/syntax/ast/expr.rs index b1a978f..eba2735 100644 --- a/dhall/src/syntax/ast/expr.rs +++ b/dhall/src/syntax/ast/expr.rs @@ -178,10 +178,9 @@ impl Expr { } // Compute the sha256 hash of the binary form of the expression. - pub fn hash(&self) -> Result<Box<[u8]>, Error> { - use sha2::Digest; + pub fn sha256_hash(&self) -> Result<Box<[u8]>, Error> { let data = binary::encode(self)?; - Ok(sha2::Sha256::digest(&data).as_slice().into()) + Ok(crate::utils::sha256_hash(&data)) } } diff --git a/dhall/src/syntax/ast/import.rs b/dhall/src/syntax/ast/import.rs index c45fe51..69f4021 100644 --- a/dhall/src/syntax/ast/import.rs +++ b/dhall/src/syntax/ast/import.rs @@ -52,7 +52,7 @@ pub enum ImportMode { #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub enum Hash { - SHA256(Vec<u8>), + SHA256(Box<[u8]>), } /// Reference to an external resource diff --git a/dhall/src/syntax/binary/decode.rs b/dhall/src/syntax/binary/decode.rs index f4747d3..4ea7d98 100644 --- a/dhall/src/syntax/binary/decode.rs +++ b/dhall/src/syntax/binary/decode.rs @@ -290,7 +290,7 @@ fn cbor_value_to_dhall(data: &Value) -> Result<DecodedExpr, DecodeError> { Null => None, Bytes(bytes) => match bytes.as_slice() { [18, 32, rest @ ..] => { - Some(Hash::SHA256(rest.to_vec())) + Some(Hash::SHA256(rest.to_vec().into())) } _ => { return Err(DecodeError::WrongFormatError(format!( diff --git a/dhall/src/syntax/text/parser.rs b/dhall/src/syntax/text/parser.rs index 37f28e5..07921b5 100644 --- a/dhall/src/syntax/text/parser.rs +++ b/dhall/src/syntax/text/parser.rs @@ -593,7 +593,7 @@ impl DhallParser { input.error(format!("Unknown hashing protocol '{}'", protocol)) ); } - Ok(Hash::SHA256(hex::decode(hash).unwrap())) + Ok(Hash::SHA256(hex::decode(hash).unwrap().into())) } fn import_hashed( diff --git a/dhall/src/utils.rs b/dhall/src/utils.rs new file mode 100644 index 0000000..d1e642a --- /dev/null +++ b/dhall/src/utils.rs @@ -0,0 +1,17 @@ +use std::fs::File; +use std::io::Read; +use std::path::Path; + +use crate::error::Error; + +// Compute the sha256 hash of a bitstring. +pub fn sha256_hash(data: &[u8]) -> Box<[u8]> { + use sha2::Digest; + sha2::Sha256::digest(data).as_slice().into() +} + +pub fn read_binary_file(path: impl AsRef<Path>) -> Result<Box<[u8]>, Error> { + let mut buffer = Vec::new(); + File::open(path)?.read_to_end(&mut buffer)?; + Ok(buffer.into()) +} diff --git a/dhall/tests/import/data/simple.dhall b/dhall/tests/import/data/simple.dhall new file mode 100644 index 0000000..00750ed --- /dev/null +++ b/dhall/tests/import/data/simple.dhall @@ -0,0 +1 @@ +3 diff --git a/dhall/tests/import/failure/unit/HashMismatch2.dhall b/dhall/tests/import/failure/unit/HashMismatch2.dhall new file mode 100644 index 0000000..5fae772 --- /dev/null +++ b/dhall/tests/import/failure/unit/HashMismatch2.dhall @@ -0,0 +1,2 @@ +-- This ensures that even if the file gets imported without hash first, the hash check is not skipped later +../../data/simple.dhall + ../../data/simple.dhall sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + ../../data/simple.dhall diff --git a/dhall/tests/import/failure/unit/HashMismatch2.txt b/dhall/tests/import/failure/unit/HashMismatch2.txt new file mode 100644 index 0000000..f03ab24 --- /dev/null +++ b/dhall/tests/import/failure/unit/HashMismatch2.txt @@ -0,0 +1,9 @@ +Type error: error: hash mismatch + --> <current file>:2:27 + | +1 | -- This ensures that even if the file gets imported without hash first, the hash check is not skipped later +2 | ../../data/simple.dhall + ../../data/simple.dhall sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + ../../data/simple.dhall + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ hash mismatch + | + = note: Expected sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa + = note: Found sha256:15f52ecf91c94c1baac02d5a4964b2ed8fa401641a2c8a95e8306ec7c1e3b8d2 diff --git a/dhall/tests/spec.rs b/dhall/tests/spec.rs index 8d67892..36cbd81 100644 --- a/dhall/tests/spec.rs +++ b/dhall/tests/spec.rs @@ -1,4 +1,6 @@ use anyhow::Result; +use rand::distributions::Alphanumeric; +use rand::Rng; use std::env; use std::ffi::OsString; use std::fmt::{Debug, Display}; @@ -593,26 +595,6 @@ fn unwrap_err<T: Debug, E>(x: Result<T, E>) -> Result<E, TestError> { fn run_test(test: &SpecTest) -> Result<()> { use self::SpecTestKind::*; - // Setup current directory to the root of the repository. Important for `as Location` tests. - let root_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")) - .parent() - .unwrap() - .to_path_buf(); - env::set_current_dir(root_dir.as_path())?; - // Set environment variable for import tests. - env::set_var("DHALL_TEST_VAR", "6 * 7"); - - // Configure cache for import tests - env::set_var( - "XDG_CACHE_HOME", - root_dir - .join("dhall-lang") - .join("tests") - .join("import") - .join("cache") - .as_path(), - ); - let SpecTest { input: expr, output: expected, @@ -670,7 +652,7 @@ fn run_test(test: &SpecTest) -> Result<()> { } SemanticHash => { let expr = expr.normalize()?.to_expr_alpha(); - let hash = hex::encode(expr.hash()?); + let hash = hex::encode(expr.sha256_hash()?); expected.compare_ui(format!("sha256:{}", hash))?; } TypeInferenceSuccess => { @@ -690,7 +672,6 @@ fn run_test(test: &SpecTest) -> Result<()> { expected.compare(expr)?; } } - Ok(()) } @@ -700,17 +681,50 @@ fn main() { .flat_map(discover_tests_for_feature) .collect(); - libtest_mimic::run_tests(&Arguments::from_args(), tests, |test| { - let result = std::panic::catch_unwind(move || { - run_test_stringy_error(&test.data) + // Setup current directory to the root of the repository. Important for `as Location` tests. + let root_dir = PathBuf::from(env!("CARGO_MANIFEST_DIR")) + .parent() + .unwrap() + .to_path_buf(); + env::set_current_dir(root_dir.as_path()).unwrap(); + + // Set environment variable for import tests. + env::set_var("DHALL_TEST_VAR", "6 * 7"); + + // Configure cache for import tests + let dhall_cache_dir = root_dir + .join("dhall-lang") + .join("tests") + .join("import") + .join("cache") + .join("dhall"); + let random_id = rand::thread_rng() + .sample_iter(Alphanumeric) + .take(36) + .collect::<String>(); + let cache_dir = format!("dhall-tests-{}", random_id); + let cache_dir = env::temp_dir().join(cache_dir); + + std::fs::create_dir_all(&cache_dir).unwrap(); + fs_extra::dir::copy(&dhall_cache_dir, &cache_dir, &Default::default()) + .unwrap(); + env::set_var("XDG_CACHE_HOME", &cache_dir); + + let res = + libtest_mimic::run_tests(&Arguments::from_args(), tests, |test| { + let result = std::panic::catch_unwind(move || { + run_test_stringy_error(&test.data) + }); + match result { + Ok(Ok(_)) => Outcome::Passed, + Ok(Err(e)) => Outcome::Failed { msg: Some(e) }, + Err(_) => Outcome::Failed { + msg: Some("thread panicked".to_string()), + }, + } }); - match result { - Ok(Ok(_)) => Outcome::Passed, - Ok(Err(e)) => Outcome::Failed { msg: Some(e) }, - Err(_) => Outcome::Failed { - msg: Some("thread panicked".to_string()), - }, - } - }) - .exit(); + + std::fs::remove_dir_all(&cache_dir).unwrap(); + + res.exit(); } |