From 1f1cc8809bc639daea9090ea8de6eaa4e5e46b93 Mon Sep 17 00:00:00 2001 From: Luca Casonato Date: Fri, 19 Apr 2024 16:29:24 +0200 Subject: [PATCH 1/2] fix: don't do case insensitive fs ops during graph building Previously users could import files relatively with the wrong file casing. So far only two package versions have run into this, both from the same package. --- api/src/analysis.rs | 4 +- api/src/ids.rs | 205 +++++++++++++----- api/src/publish.rs | 27 +++ api/src/tarball.rs | 25 ++- .../YouTube.tsx | 0 .../case_insensitive_dep_reference/jsr.json | 5 + .../case_insensitive_dep_reference/mod.ts | 1 + .../jsr.json | 5 + .../case_insensitive_export_reference/mod.ts | 1 + 9 files changed, 205 insertions(+), 68 deletions(-) create mode 100644 api/testdata/tarballs/case_insensitive_dep_reference/YouTube.tsx create mode 100644 api/testdata/tarballs/case_insensitive_dep_reference/jsr.json create mode 100644 api/testdata/tarballs/case_insensitive_dep_reference/mod.ts create mode 100644 api/testdata/tarballs/case_insensitive_export_reference/jsr.json create mode 100644 api/testdata/tarballs/case_insensitive_export_reference/mod.ts diff --git a/api/src/analysis.rs b/api/src/analysis.rs index 477f89b3..eb70b5dc 100644 --- a/api/src/analysis.rs +++ b/api/src/analysis.rs @@ -218,7 +218,9 @@ async fn analyze_package_inner( .map_err(PublishError::NpmTarballError)?; let (meta, readme_path) = { - let readme = files.iter().find(|file| file.0.is_readme()); + let readme = files + .iter() + .find(|file| file.0.case_insensitive().is_readme()); ( generate_score(main_entrypoint, &doc_nodes, &readme, all_fast_check), diff --git a/api/src/ids.rs b/api/src/ids.rs index 45e9bfc1..90b47816 100644 --- a/api/src/ids.rs +++ b/api/src/ids.rs @@ -1,3 +1,5 @@ +use std::borrow::Cow; + // Copyright 2024 the JSR authors. All rights reserved. MIT license. use deno_semver::VersionParseError; use sqlx::postgres::PgValueRef; @@ -457,13 +459,12 @@ pub enum VersionValidateError { /// The path must only contain ascii alphanumeric characters, and the characters /// '$', '(', ')', '+', '-', '.', '@', '[', ']', '_', '{', '}', '~'. /// -/// Path's are case sensitive, but comparisons and hashing are case insensitive. -/// This matches the behaviour of the Windows FS APIs. +/// Path's are case sensitive, and comparisons and hashing are also case +/// sensitive. However, to ensure no collisions based only on case-sensitivity, +/// one may use the `CaseInsensitivePackagePath` type to compare paths in a +/// case insensitive manner. #[derive(Clone, Default)] -pub struct PackagePath { - path: String, - lower: Option, -} +pub struct PackagePath(String); impl PackagePath { pub fn new(path: String) -> Result { @@ -485,14 +486,6 @@ impl PackagePath { return Err(PackagePathValidationError::MissingPrefix); }; - let mut has_upper = false; - let mut valid_char_mapper = |c: char| { - if c.is_ascii_uppercase() { - has_upper = true; - } - valid_char(c) - }; - let mut last = None; while let Some(component) = components.next() { @@ -508,7 +501,7 @@ impl PackagePath { return Err(PackagePathValidationError::DotSegment); } - if let Some(err) = component.chars().find_map(&mut valid_char_mapper) { + if let Some(err) = component.chars().find_map(&mut valid_char) { return Err(err); } @@ -544,38 +537,17 @@ impl PackagePath { )); } - let lower = has_upper.then(|| path.to_ascii_lowercase()); - - Ok(Self { path, lower }) + Ok(Self(path)) } - pub fn is_readme(&self) -> bool { - let path = - std::path::PathBuf::from(self.lower.as_ref().unwrap_or(&self.path)); - let name = path - .file_stem() - .and_then(|name| name.to_str()) - .unwrap_or_default(); - let extension = path - .extension() - .and_then(|ext| ext.to_str()) - .unwrap_or_default(); - let parent = path - .parent() - .and_then(|ext| ext.to_str()) - .unwrap_or_default(); - - parent == "/" - && name == "readme" - && matches!(extension, "md" | "txt" | "markdown") + pub fn case_insensitive(&self) -> CaseInsensitivePackagePath<'_> { + CaseInsensitivePackagePath::new(Cow::Borrowed(self)) } } impl PartialEq for PackagePath { fn eq(&self, other: &Self) -> bool { - let self_lower = self.lower.as_ref().unwrap_or(&self.path); - let other_lower = other.lower.as_ref().unwrap_or(&other.path); - self_lower == other_lower + self.0 == other.0 } } @@ -583,8 +555,7 @@ impl Eq for PackagePath {} impl std::hash::Hash for PackagePath { fn hash(&self, state: &mut H) { - let lower = self.lower.as_ref().unwrap_or(&self.path); - lower.hash(state); + self.0.hash(state); } } @@ -606,19 +577,19 @@ impl std::ops::Deref for PackagePath { type Target = str; fn deref(&self) -> &Self::Target { - &self.path + &self.0 } } impl std::fmt::Display for PackagePath { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{}", self.path) + write!(f, "{}", self.0) } } impl std::fmt::Debug for PackagePath { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self.path) + write!(f, "{:?}", self.0) } } @@ -637,7 +608,7 @@ impl serde::Serialize for PackagePath { where S: serde::Serializer, { - self.path.serialize(serializer) + self.0.serialize(serializer) } } @@ -656,7 +627,7 @@ impl<'q> sqlx::Encode<'q, Postgres> for PackagePath { buf: &mut >::ArgumentBuffer, ) -> sqlx::encode::IsNull { >::encode_by_ref( - &self.path, buf, + &self.0, buf, ) } } @@ -755,6 +726,85 @@ pub enum PackagePathValidationError { InvalidOtherChar(char), } +/// Case insensitive package path. This type is useful for comparing package +/// paths in a case insensitive manner. +/// +/// The hash and equality of this type are case insensitive. +#[derive(Clone, Default)] +pub struct CaseInsensitivePackagePath<'a> { + path: Cow<'a, PackagePath>, + lower: Option, +} + +impl<'a> CaseInsensitivePackagePath<'a> { + pub fn new(path: Cow<'a, PackagePath>) -> Self { + let has_uppercase = (*path).chars().any(|c| char::is_ascii_uppercase(&c)); + let lower = has_uppercase.then(|| path.to_ascii_lowercase()); + Self { path, lower } + } + + pub fn into_inner(self) -> Cow<'a, PackagePath> { + self.path + } + + pub fn is_readme(&self) -> bool { + let path = + std::path::PathBuf::from(self.lower.as_deref().unwrap_or(&self.path)); + let name = path + .file_stem() + .and_then(|name| name.to_str()) + .unwrap_or_default(); + let extension = path + .extension() + .and_then(|ext| ext.to_str()) + .unwrap_or_default(); + let parent = path + .parent() + .and_then(|ext| ext.to_str()) + .unwrap_or_default(); + + parent == "/" + && name == "readme" + && matches!(extension, "md" | "txt" | "markdown") + } + + pub fn to_owned(&self) -> CaseInsensitivePackagePath<'static> { + CaseInsensitivePackagePath { + path: Cow::Owned(PackagePath::clone(&self.path)), + lower: self.lower.clone(), + } + } +} + +impl PartialEq for CaseInsensitivePackagePath<'_> { + fn eq(&self, other: &Self) -> bool { + let self_lower = self.lower.as_deref().unwrap_or(&self.path); + let other_lower = other.lower.as_deref().unwrap_or(&other.path); + self_lower == other_lower + } +} + +impl Eq for CaseInsensitivePackagePath<'_> {} + +impl std::hash::Hash for CaseInsensitivePackagePath<'_> { + fn hash(&self, state: &mut H) { + let self_lower = self.lower.as_deref().unwrap_or(&self.path); + self_lower.hash(state); + } +} + +impl std::fmt::Display for CaseInsensitivePackagePath<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Display::fmt(&self.path, f) + } +} + +impl std::fmt::Debug for CaseInsensitivePackagePath<'_> { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + std::fmt::Debug::fmt(&self.path, f) + } +} + #[cfg(test)] mod tests { use std::collections::HashSet; @@ -954,37 +1004,80 @@ mod tests { let a_capitalized = PackagePath::try_from("/Foo").unwrap(); let b = PackagePath::try_from("/bar").unwrap(); - assert_eq!(a, a_capitalized); + assert_eq!(a, a); + assert_ne!(a, a_capitalized); assert_ne!(a, b); assert_ne!(a_capitalized, b); let mut set = HashSet::new(); assert!(set.insert(a.clone())); - assert!(!set.insert(a_capitalized.clone())); + assert!(!set.insert(a.clone())); + assert!(set.insert(a_capitalized.clone())); assert!(set.insert(b.clone())); assert!(set.contains(&a)); assert!(set.contains(&a_capitalized)); assert!(set.contains(&b)); + + let a_case_insensitive = a.case_insensitive(); + let a_capitalized_case_insensitive = a_capitalized.case_insensitive(); + let b_case_insensitive = b.case_insensitive(); + + assert_eq!(a_case_insensitive, a_case_insensitive); + assert_eq!(a_case_insensitive, a_capitalized_case_insensitive); + assert_ne!(a_case_insensitive, b_case_insensitive); + + let mut set = HashSet::new(); + assert!(set.insert(a_case_insensitive.clone())); + assert!(!set.insert(a_case_insensitive.clone())); + assert!(!set.insert(a_capitalized_case_insensitive.clone())); + assert!(set.insert(b_case_insensitive.clone())); + + assert!(set.contains(&a_case_insensitive)); + assert!(set.contains(&a_capitalized_case_insensitive)); + assert!(set.contains(&b_case_insensitive)); } #[test] fn test_package_path_is_readme() { // Valid READMEs - assert!(PackagePath::try_from("/README.md").unwrap().is_readme()); - assert!(PackagePath::try_from("/README.txt").unwrap().is_readme()); + assert!(PackagePath::try_from("/README.md") + .unwrap() + .case_insensitive() + .is_readme()); + assert!(PackagePath::try_from("/README.txt") + .unwrap() + .case_insensitive() + .is_readme()); assert!(PackagePath::try_from("/README.markdown") .unwrap() + .case_insensitive() + .is_readme()); + assert!(PackagePath::try_from("/readme.md") + .unwrap() + .case_insensitive() + .is_readme()); + assert!(PackagePath::try_from("/readme.txt") + .unwrap() + .case_insensitive() .is_readme()); - assert!(PackagePath::try_from("/readme.md").unwrap().is_readme()); - assert!(PackagePath::try_from("/readme.txt").unwrap().is_readme()); assert!(PackagePath::try_from("/readme.markdown") .unwrap() + .case_insensitive() + .is_readme()); + assert!(PackagePath::try_from("/ReAdMe.md") + .unwrap() + .case_insensitive() .is_readme()); - assert!(PackagePath::try_from("/ReAdMe.md").unwrap().is_readme()); // Invalid READMEs - assert!(!PackagePath::try_from("/foo/README.md").unwrap().is_readme()); - assert!(!PackagePath::try_from("/foo.md").unwrap().is_readme()); + assert!(!PackagePath::try_from("/foo/README.md") + .unwrap() + .case_insensitive() + .is_readme()); + assert!(!PackagePath::try_from("/foo.md") + .unwrap() + .case_insensitive() + .is_readme()); } } diff --git a/api/src/publish.rs b/api/src/publish.rs index 2b033dde..e88475b1 100644 --- a/api/src/publish.rs +++ b/api/src/publish.rs @@ -905,6 +905,33 @@ pub mod tests { assert_eq!(error.code, "caseInsensitiveDuplicatePath"); } + #[tokio::test] + async fn case_insensitive_exports_reference() { + let t = TestSetup::new().await; + let task = process_tarball_setup( + &t, + create_mock_tarball("case_insensitive_exports_reference"), + ) + .await; + assert_eq!(task.status, PublishingTaskStatus::Failure, "{task:#?}"); + let error = task.error.unwrap(); + assert_eq!(error.code, "configFileExportsInvalid"); + } + + #[tokio::test] + async fn case_insensitive_dep_reference() { + let t = TestSetup::new().await; + let task = process_tarball_setup( + &t, + create_mock_tarball("case_insensitive_dep_reference"), + ) + .await; + assert_eq!(task.status, PublishingTaskStatus::Failure, "{task:#?}"); + let error = task.error.unwrap(); + assert_eq!(error.code, "graphError"); + assert_eq!(error.message, "failed to build module graph: Module not found \"file:///Youtube.tsx\".\n at file:///mod.ts:1:8"); + } + #[tokio::test] async fn no_exports() { let t = TestSetup::new().await; diff --git a/api/src/tarball.rs b/api/src/tarball.rs index d50c1c31..49e233fd 100644 --- a/api/src/tarball.rs +++ b/api/src/tarball.rs @@ -1,5 +1,4 @@ // Copyright 2024 the JSR authors. All rights reserved. MIT license. -use std::collections::hash_map::Entry; use std::collections::HashMap; use std::collections::HashSet; use std::io; @@ -43,6 +42,7 @@ use crate::gcp::CACHE_CONTROL_IMMUTABLE; use crate::gcs_paths::docs_v1_path; use crate::gcs_paths::file_path; use crate::gcs_paths::npm_tarball_path; +use crate::ids::CaseInsensitivePackagePath; use crate::ids::PackagePath; use crate::ids::PackagePathValidationError; use crate::ids::ScopedPackageName; @@ -102,6 +102,7 @@ pub async fn process_tarball( .map_err(PublishError::UntarError)?; let mut files = HashMap::new(); + let mut case_insensitive_paths = HashSet::::new(); let mut file_infos = Vec::new(); let mut total_file_size = 0; @@ -165,16 +166,18 @@ pub async fn process_tarball( let hash = sha2::Sha256::digest(&bytes); let hash = format!("sha256-{:x}", hash); - match files.entry(path.clone()) { - Entry::Occupied(entry) => { - return Err(PublishError::CaseInsensitiveDuplicatePath { - a: path, - b: entry.key().clone(), - }); - } - Entry::Vacant(entry) => { - entry.insert(bytes); - } + // check for case-insensitive duplicate paths + let case_insensitive_path = path.case_insensitive(); + if let Some(existing) = case_insensitive_paths.get(&case_insensitive_path) { + return Err(PublishError::CaseInsensitiveDuplicatePath { + a: path.clone(), + b: existing.clone().into_inner().into_owned(), + }); + } + case_insensitive_paths.insert(case_insensitive_path.to_owned()); + + if files.insert(path.clone(), bytes).is_some() { + unreachable!("duplicate path: {:?}", path); } let file_info = FileInfo { path, hash, size }; diff --git a/api/testdata/tarballs/case_insensitive_dep_reference/YouTube.tsx b/api/testdata/tarballs/case_insensitive_dep_reference/YouTube.tsx new file mode 100644 index 00000000..e69de29b diff --git a/api/testdata/tarballs/case_insensitive_dep_reference/jsr.json b/api/testdata/tarballs/case_insensitive_dep_reference/jsr.json new file mode 100644 index 00000000..94c22c0e --- /dev/null +++ b/api/testdata/tarballs/case_insensitive_dep_reference/jsr.json @@ -0,0 +1,5 @@ +{ + "name": "@scope/foo", + "version": "1.2.3", + "exports": "./mod.ts" +} diff --git a/api/testdata/tarballs/case_insensitive_dep_reference/mod.ts b/api/testdata/tarballs/case_insensitive_dep_reference/mod.ts new file mode 100644 index 00000000..51133350 --- /dev/null +++ b/api/testdata/tarballs/case_insensitive_dep_reference/mod.ts @@ -0,0 +1 @@ +import "./Youtube.tsx" \ No newline at end of file diff --git a/api/testdata/tarballs/case_insensitive_export_reference/jsr.json b/api/testdata/tarballs/case_insensitive_export_reference/jsr.json new file mode 100644 index 00000000..91eb756f --- /dev/null +++ b/api/testdata/tarballs/case_insensitive_export_reference/jsr.json @@ -0,0 +1,5 @@ +{ + "name": "@scope/foo", + "version": "1.2.3", + "exports": "./Mod.ts" +} diff --git a/api/testdata/tarballs/case_insensitive_export_reference/mod.ts b/api/testdata/tarballs/case_insensitive_export_reference/mod.ts new file mode 100644 index 00000000..8289e4ba --- /dev/null +++ b/api/testdata/tarballs/case_insensitive_export_reference/mod.ts @@ -0,0 +1 @@ +export const hello = "Hello, world!"; From 13701b2d3f8315320f7e98745cf9a902780c9870 Mon Sep 17 00:00:00 2001 From: Luca Casonato Date: Fri, 19 Apr 2024 16:45:53 +0200 Subject: [PATCH 2/2] fix --- .../jsr.json | 0 .../mod.ts | 0 2 files changed, 0 insertions(+), 0 deletions(-) rename api/testdata/tarballs/{case_insensitive_export_reference => case_insensitive_exports_reference}/jsr.json (100%) rename api/testdata/tarballs/{case_insensitive_export_reference => case_insensitive_exports_reference}/mod.ts (100%) diff --git a/api/testdata/tarballs/case_insensitive_export_reference/jsr.json b/api/testdata/tarballs/case_insensitive_exports_reference/jsr.json similarity index 100% rename from api/testdata/tarballs/case_insensitive_export_reference/jsr.json rename to api/testdata/tarballs/case_insensitive_exports_reference/jsr.json diff --git a/api/testdata/tarballs/case_insensitive_export_reference/mod.ts b/api/testdata/tarballs/case_insensitive_exports_reference/mod.ts similarity index 100% rename from api/testdata/tarballs/case_insensitive_export_reference/mod.ts rename to api/testdata/tarballs/case_insensitive_exports_reference/mod.ts