lol

prefetch-npm-deps: look up hashes from cache when fixing up lockfiles

authored by winter.bsky.social and committed by

Lily Foster ac35d7ea 7efebca8

+329 -62
+6
pkgs/build-support/node/build-npm-package/hooks/npm-config-hook.sh
··· 56 56 exit 1 57 57 fi 58 58 59 + export CACHE_MAP_PATH="$TMP/MEOW" 60 + @prefetchNpmDeps@ --map-cache 61 + 59 62 @prefetchNpmDeps@ --fixup-lockfile "$srcLockfile" 60 63 61 64 local cachePath ··· 108 111 fi 109 112 110 113 patchShebangs node_modules 114 + 115 + rm "$CACHE_MAP_PATH" 116 + unset CACHE_MAP_PATH 111 117 112 118 echo "Finished npmConfigHook" 113 119 }
+30
pkgs/build-support/node/fetch-npm-deps/Cargo.lock
··· 305 305 "tempfile", 306 306 "ureq", 307 307 "url", 308 + "walkdir", 308 309 ] 309 310 310 311 [[package]] ··· 399 400 version = "1.0.11" 400 401 source = "registry+https://github.com/rust-lang/crates.io-index" 401 402 checksum = "4501abdff3ae82a1c1b477a17252eb69cee9e66eb915c1abaa4f44d873df9f09" 403 + 404 + [[package]] 405 + name = "same-file" 406 + version = "1.0.6" 407 + source = "registry+https://github.com/rust-lang/crates.io-index" 408 + checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502" 409 + dependencies = [ 410 + "winapi-util", 411 + ] 402 412 403 413 [[package]] 404 414 name = "scopeguard" ··· 584 594 checksum = "49874b5167b65d7193b8aba1567f5c7d93d001cafc34600cee003eda787e483f" 585 595 586 596 [[package]] 597 + name = "walkdir" 598 + version = "2.3.2" 599 + source = "registry+https://github.com/rust-lang/crates.io-index" 600 + checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56" 601 + dependencies = [ 602 + "same-file", 603 + "winapi", 604 + "winapi-util", 605 + ] 606 + 607 + [[package]] 587 608 name = "wasm-bindgen" 588 609 version = "0.2.82" 589 610 source = "registry+https://github.com/rust-lang/crates.io-index" ··· 681 702 version = "0.4.0" 682 703 source = "registry+https://github.com/rust-lang/crates.io-index" 683 704 checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" 705 + 706 + [[package]] 707 + name = "winapi-util" 708 + version = "0.1.5" 709 + source = "registry+https://github.com/rust-lang/crates.io-index" 710 + checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178" 711 + dependencies = [ 712 + "winapi", 713 + ] 684 714 685 715 [[package]] 686 716 name = "winapi-x86_64-pc-windows-gnu"
+1
pkgs/build-support/node/fetch-npm-deps/Cargo.toml
··· 17 17 tempfile = "3.3.0" 18 18 ureq = { version = "2.5.0" } 19 19 url = { version = "2.3.1", features = ["serde"] } 20 + walkdir = "2.3.2"
+15 -15
pkgs/build-support/node/fetch-npm-deps/src/cacache.rs
··· 1 1 use digest::{Digest, Update}; 2 - use serde::Serialize; 2 + use serde::{Deserialize, Serialize}; 3 3 use sha1::Sha1; 4 4 use sha2::{Sha256, Sha512}; 5 5 use std::{ ··· 9 9 }; 10 10 use url::Url; 11 11 12 - #[derive(Serialize)] 13 - struct Key { 14 - key: String, 15 - integrity: String, 16 - time: u8, 17 - size: usize, 18 - metadata: Metadata, 12 + #[derive(Serialize, Deserialize)] 13 + pub(super) struct Key { 14 + pub(super) key: String, 15 + pub(super) integrity: String, 16 + pub(super) time: u8, 17 + pub(super) size: usize, 18 + pub(super) metadata: Metadata, 19 19 } 20 20 21 - #[derive(Serialize)] 22 - struct Metadata { 23 - url: Url, 24 - options: Options, 21 + #[derive(Serialize, Deserialize)] 22 + pub(super) struct Metadata { 23 + pub(super) url: Url, 24 + pub(super) options: Options, 25 25 } 26 26 27 - #[derive(Serialize)] 28 - struct Options { 29 - compress: bool, 27 + #[derive(Serialize, Deserialize)] 28 + pub(super) struct Options { 29 + pub(super) compress: bool, 30 30 } 31 31 32 32 pub struct Cache(PathBuf);
+257 -35
pkgs/build-support/node/fetch-npm-deps/src/main.rs
··· 1 1 #![warn(clippy::pedantic)] 2 2 3 - use crate::cacache::Cache; 4 - use anyhow::anyhow; 3 + use crate::cacache::{Cache, Key}; 4 + use anyhow::{anyhow, bail}; 5 5 use rayon::prelude::*; 6 6 use serde_json::{Map, Value}; 7 7 use std::{ 8 + collections::HashMap, 8 9 env, fs, 9 - path::Path, 10 + path::{Path, PathBuf}, 10 11 process::{self, Command}, 11 12 }; 12 13 use tempfile::tempdir; 14 + use url::Url; 15 + use walkdir::WalkDir; 13 16 14 17 mod cacache; 15 18 mod parse; 16 19 17 - /// `fixup_lockfile` removes the `integrity` field from Git dependencies. 20 + fn cache_map_path() -> Option<PathBuf> { 21 + env::var_os("CACHE_MAP_PATH").map(PathBuf::from) 22 + } 23 + 24 + /// `fixup_lockfile` rewrites `integrity` hashes to match cache and removes the `integrity` field from Git dependencies. 25 + /// 26 + /// Sometimes npm has multiple instances of a given `resolved` URL that have different types of `integrity` hashes (e.g. SHA-1 27 + /// and SHA-512) in the lockfile. Given we only cache one version of these, the `integrity` field must be normalized to the hash 28 + /// we cache as (which is the strongest available one). 18 29 /// 19 30 /// Git dependencies from specific providers can be retrieved from those providers' automatic tarball features. 20 31 /// When these dependencies are specified with a commit identifier, npm generates a tarball, and inserts the integrity hash of that 21 32 /// tarball into the lockfile. 22 33 /// 23 34 /// Thus, we remove this hash, to replace it with our own determinstic copies of dependencies from hosted Git providers. 24 - fn fixup_lockfile(mut lock: Map<String, Value>) -> anyhow::Result<Option<Map<String, Value>>> { 25 - if lock 35 + /// 36 + /// If no fixups were performed, `None` is returned and the lockfile structure should be left as-is. If fixups were performed, the 37 + /// `dependencies` key in v2 lockfiles designed for backwards compatibility with v1 parsers is removed because of inconsistent data. 38 + fn fixup_lockfile( 39 + mut lock: Map<String, Value>, 40 + cache: &Option<HashMap<String, String>>, 41 + ) -> anyhow::Result<Option<Map<String, Value>>> { 42 + let mut fixed = false; 43 + 44 + match lock 26 45 .get("lockfileVersion") 27 46 .ok_or_else(|| anyhow!("couldn't get lockfile version"))? 28 47 .as_i64() 29 48 .ok_or_else(|| anyhow!("lockfile version isn't an int"))? 30 - < 2 31 49 { 32 - return Ok(None); 33 - } 50 + 1 => fixup_v1_deps( 51 + lock.get_mut("dependencies") 52 + .unwrap() 53 + .as_object_mut() 54 + .unwrap(), 55 + cache, 56 + &mut fixed, 57 + ), 58 + 2 | 3 => { 59 + for package in lock 60 + .get_mut("packages") 61 + .ok_or_else(|| anyhow!("couldn't get packages"))? 62 + .as_object_mut() 63 + .ok_or_else(|| anyhow!("packages isn't a map"))? 64 + .values_mut() 65 + { 66 + if let Some(Value::String(resolved)) = package.get("resolved") { 67 + if let Some(Value::String(integrity)) = package.get("integrity") { 68 + if resolved.starts_with("git+ssh://") { 69 + fixed = true; 34 70 35 - let mut fixed = false; 71 + package 72 + .as_object_mut() 73 + .ok_or_else(|| anyhow!("package isn't a map"))? 74 + .remove("integrity"); 75 + } else if let Some(cache_hashes) = cache { 76 + let cache_hash = cache_hashes 77 + .get(resolved) 78 + .expect("dependency should have a hash"); 36 79 37 - for package in lock 38 - .get_mut("packages") 39 - .ok_or_else(|| anyhow!("couldn't get packages"))? 40 - .as_object_mut() 41 - .ok_or_else(|| anyhow!("packages isn't a map"))? 42 - .values_mut() 43 - { 44 - if let Some(Value::String(resolved)) = package.get("resolved") { 45 - if resolved.starts_with("git+ssh://") && package.get("integrity").is_some() { 46 - fixed = true; 80 + if integrity != cache_hash { 81 + fixed = true; 47 82 48 - package 49 - .as_object_mut() 50 - .ok_or_else(|| anyhow!("package isn't a map"))? 51 - .remove("integrity"); 83 + *package 84 + .as_object_mut() 85 + .ok_or_else(|| anyhow!("package isn't a map"))? 86 + .get_mut("integrity") 87 + .unwrap() = Value::String(cache_hash.clone()); 88 + } 89 + } 90 + } 91 + } 92 + } 93 + 94 + if fixed { 95 + lock.remove("dependencies"); 52 96 } 53 97 } 98 + v => bail!("unsupported lockfile version {v}"), 54 99 } 55 100 56 101 if fixed { 57 - lock.remove("dependencies"); 58 - 59 102 Ok(Some(lock)) 60 103 } else { 61 104 Ok(None) 62 105 } 63 106 } 64 107 108 + // Recursive helper to fixup v1 lockfile deps 109 + fn fixup_v1_deps( 110 + dependencies: &mut serde_json::Map<String, Value>, 111 + cache: &Option<HashMap<String, String>>, 112 + fixed: &mut bool, 113 + ) { 114 + for dep in dependencies.values_mut() { 115 + if let Some(Value::String(resolved)) = dep 116 + .as_object() 117 + .expect("v1 dep must be object") 118 + .get("resolved") 119 + { 120 + if let Some(Value::String(integrity)) = dep 121 + .as_object() 122 + .expect("v1 dep must be object") 123 + .get("integrity") 124 + { 125 + if resolved.starts_with("git+ssh://") { 126 + *fixed = true; 127 + 128 + dep.as_object_mut() 129 + .expect("v1 dep must be object") 130 + .remove("integrity"); 131 + } else if let Some(cache_hashes) = cache { 132 + let cache_hash = cache_hashes 133 + .get(resolved) 134 + .expect("dependency should have a hash"); 135 + 136 + if integrity != cache_hash { 137 + *fixed = true; 138 + 139 + *dep.as_object_mut() 140 + .expect("v1 dep must be object") 141 + .get_mut("integrity") 142 + .unwrap() = Value::String(cache_hash.clone()); 143 + } 144 + } 145 + } 146 + } 147 + 148 + if let Some(Value::Object(more_deps)) = dep.as_object_mut().unwrap().get_mut("dependencies") 149 + { 150 + fixup_v1_deps(more_deps, cache, fixed); 151 + } 152 + } 153 + } 154 + 155 + fn map_cache() -> anyhow::Result<HashMap<Url, String>> { 156 + let mut hashes = HashMap::new(); 157 + 158 + let content_path = Path::new(&env::var_os("npmDeps").unwrap()).join("_cacache/index-v5"); 159 + 160 + for entry in WalkDir::new(content_path) { 161 + let entry = entry?; 162 + 163 + if entry.file_type().is_file() { 164 + let content = fs::read_to_string(entry.path())?; 165 + let key: Key = serde_json::from_str(content.split_ascii_whitespace().nth(1).unwrap())?; 166 + 167 + hashes.insert(key.metadata.url, key.integrity); 168 + } 169 + } 170 + 171 + Ok(hashes) 172 + } 173 + 65 174 fn main() -> anyhow::Result<()> { 66 175 let args = env::args().collect::<Vec<_>>(); 67 176 ··· 76 185 if args[1] == "--fixup-lockfile" { 77 186 let lock = serde_json::from_str(&fs::read_to_string(&args[2])?)?; 78 187 79 - if let Some(fixed) = fixup_lockfile(lock)? { 188 + let cache = cache_map_path() 189 + .map(|map_path| Ok::<_, anyhow::Error>(serde_json::from_slice(&fs::read(map_path)?)?)) 190 + .transpose()?; 191 + 192 + if let Some(fixed) = fixup_lockfile(lock, &cache)? { 80 193 println!("Fixing lockfile"); 81 194 82 195 fs::write(&args[2], serde_json::to_string(&fixed)?)?; 83 196 } 197 + 198 + return Ok(()); 199 + } else if args[1] == "--map-cache" { 200 + let map = map_cache()?; 201 + 202 + fs::write( 203 + cache_map_path().expect("CACHE_MAP_PATH environment variable must be set"), 204 + serde_json::to_string(&map)?, 205 + )?; 84 206 85 207 return Ok(()); 86 208 } ··· 133 255 134 256 #[cfg(test)] 135 257 mod tests { 258 + use std::collections::HashMap; 259 + 136 260 use super::fixup_lockfile; 137 261 use serde_json::json; 138 262 ··· 147 271 }, 148 272 "foo": { 149 273 "resolved": "https://github.com/NixOS/nixpkgs", 150 - "integrity": "aaa" 274 + "integrity": "sha1-aaa" 151 275 }, 152 276 "bar": { 153 277 "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", 154 - "integrity": "bbb" 155 - } 278 + "integrity": "sha512-aaa" 279 + }, 280 + "foo-bad": { 281 + "resolved": "foo", 282 + "integrity": "sha1-foo" 283 + }, 284 + "foo-good": { 285 + "resolved": "foo", 286 + "integrity": "sha512-foo" 287 + }, 156 288 } 157 289 }); 158 290 ··· 165 297 }, 166 298 "foo": { 167 299 "resolved": "https://github.com/NixOS/nixpkgs", 168 - "integrity": "aaa" 300 + "integrity": "" 169 301 }, 170 302 "bar": { 171 303 "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", 172 - } 304 + }, 305 + "foo-bad": { 306 + "resolved": "foo", 307 + "integrity": "sha512-foo" 308 + }, 309 + "foo-good": { 310 + "resolved": "foo", 311 + "integrity": "sha512-foo" 312 + }, 173 313 } 174 314 }); 175 315 316 + let mut hashes = HashMap::new(); 317 + 318 + hashes.insert( 319 + String::from("https://github.com/NixOS/nixpkgs"), 320 + String::new(), 321 + ); 322 + 323 + hashes.insert( 324 + String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"), 325 + String::new(), 326 + ); 327 + 328 + hashes.insert(String::from("foo"), String::from("sha512-foo")); 329 + 176 330 assert_eq!( 177 - fixup_lockfile(input.as_object().unwrap().clone())?, 331 + fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?, 178 332 Some(expected.as_object().unwrap().clone()) 179 333 ); 180 334 335 + Ok(()) 336 + } 337 + 338 + #[test] 339 + fn lockfile_v1_fixup() -> anyhow::Result<()> { 340 + let input = json!({ 341 + "lockfileVersion": 1, 342 + "name": "foo", 343 + "dependencies": { 344 + "foo": { 345 + "resolved": "https://github.com/NixOS/nixpkgs", 346 + "integrity": "sha512-aaa" 347 + }, 348 + "foo-good": { 349 + "resolved": "foo", 350 + "integrity": "sha512-foo" 351 + }, 352 + "bar": { 353 + "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", 354 + "integrity": "sha512-bbb", 355 + "dependencies": { 356 + "foo-bad": { 357 + "resolved": "foo", 358 + "integrity": "sha1-foo" 359 + }, 360 + }, 361 + }, 362 + } 363 + }); 364 + 365 + let expected = json!({ 366 + "lockfileVersion": 1, 367 + "name": "foo", 368 + "dependencies": { 369 + "foo": { 370 + "resolved": "https://github.com/NixOS/nixpkgs", 371 + "integrity": "" 372 + }, 373 + "foo-good": { 374 + "resolved": "foo", 375 + "integrity": "sha512-foo" 376 + }, 377 + "bar": { 378 + "resolved": "git+ssh://git@github.com/NixOS/nixpkgs.git", 379 + "dependencies": { 380 + "foo-bad": { 381 + "resolved": "foo", 382 + "integrity": "sha512-foo" 383 + }, 384 + }, 385 + }, 386 + } 387 + }); 388 + 389 + let mut hashes = HashMap::new(); 390 + 391 + hashes.insert( 392 + String::from("https://github.com/NixOS/nixpkgs"), 393 + String::new(), 394 + ); 395 + 396 + hashes.insert( 397 + String::from("git+ssh://git@github.com/NixOS/nixpkgs.git"), 398 + String::new(), 399 + ); 400 + 401 + hashes.insert(String::from("foo"), String::from("sha512-foo")); 402 + 181 403 assert_eq!( 182 - fixup_lockfile(json!({"lockfileVersion": 1}).as_object().unwrap().clone())?, 183 - None 404 + fixup_lockfile(input.as_object().unwrap().clone(), &Some(hashes))?, 405 + Some(expected.as_object().unwrap().clone()) 184 406 ); 185 407 186 408 Ok(())
+19 -11
pkgs/build-support/node/fetch-npm-deps/src/parse/lock.rs
··· 97 97 } 98 98 99 99 #[derive(Debug, PartialEq, Eq)] 100 - pub(super) struct HashCollection(HashSet<Hash>); 100 + pub struct HashCollection(HashSet<Hash>); 101 101 102 102 impl HashCollection { 103 - pub(super) fn into_best(self) -> Option<Hash> { 103 + pub fn from_str(s: impl AsRef<str>) -> anyhow::Result<HashCollection> { 104 + let hashes = s 105 + .as_ref() 106 + .split_ascii_whitespace() 107 + .map(Hash::new) 108 + .collect::<anyhow::Result<_>>()?; 109 + 110 + Ok(HashCollection(hashes)) 111 + } 112 + 113 + pub fn into_best(self) -> Option<Hash> { 104 114 self.0.into_iter().max() 105 115 } 106 116 } ··· 136 146 where 137 147 E: de::Error, 138 148 { 139 - let hashes = value 140 - .split_ascii_whitespace() 141 - .map(Hash::new) 142 - .collect::<anyhow::Result<_>>() 143 - .map_err(E::custom)?; 144 - 145 - Ok(HashCollection(hashes)) 149 + HashCollection::from_str(value).map_err(E::custom) 146 150 } 147 151 } 148 152 149 - #[derive(Debug, Deserialize, PartialEq, Eq, Hash)] 153 + #[derive(Clone, Debug, Deserialize, PartialEq, Eq, Hash)] 150 154 pub struct Hash(String); 151 155 152 156 // Hash algorithms, in ascending preference. ··· 166 170 Err(anyhow!("unknown hash algorithm {algo:?}")) 167 171 } 168 172 } 173 + 174 + pub fn as_str(&self) -> &str { 175 + &self.0 176 + } 169 177 } 170 178 171 179 impl fmt::Display for Hash { 172 180 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { 173 - self.0.fmt(f) 181 + self.as_str().fmt(f) 174 182 } 175 183 } 176 184
+1 -1
pkgs/build-support/node/fetch-npm-deps/src/parse/mod.rs
··· 9 9 use tempfile::{tempdir, TempDir}; 10 10 use url::Url; 11 11 12 - mod lock; 12 + pub mod lock; 13 13 14 14 pub fn lockfile(content: &str, force_git_deps: bool) -> anyhow::Result<Vec<Package>> { 15 15 let mut packages = lock::packages(content)