nixpkgs mirror (for testing)
github.com/NixOS/nixpkgs
nix
1use anyhow::{anyhow, bail, Context};
2use lock::UrlOrString;
3use log::{debug, info};
4use rayon::prelude::*;
5use serde_json::{Map, Value};
6use std::{
7 fs,
8 io::Write,
9 process::{Command, Stdio},
10};
11use tempfile::{tempdir, TempDir};
12use url::Url;
13
14use crate::util;
15
16pub mod lock;
17
18pub fn lockfile(
19 content: &str,
20 force_git_deps: bool,
21 force_empty_cache: bool,
22) -> anyhow::Result<Vec<Package>> {
23 debug!("parsing lockfile with contents:\n{content}");
24
25 let mut packages = lock::packages(content)
26 .context("failed to extract packages from lockfile")?
27 .into_par_iter()
28 .map(|p| {
29 let n = p.name.clone().unwrap();
30
31 Package::from_lock(p).with_context(|| format!("failed to parse data for {n}"))
32 })
33 .collect::<anyhow::Result<Vec<_>>>()?;
34
35 if packages.is_empty() && !force_empty_cache {
36 bail!("No cacheable dependencies were found. Please inspect the upstream `package-lock.json` file and ensure that remote dependencies have `resolved` URLs and `integrity` hashes. If the lockfile is missing this data, attempt to get upstream to fix it via a tool like <https://github.com/jeslie0/npm-lockfile-fix>. If generating an empty cache is intentional and you would like to do it anyways, set `forceEmptyCache = true`.");
37 }
38
39 let mut new = Vec::new();
40
41 for pkg in packages
42 .iter()
43 .filter(|p| matches!(p.specifics, Specifics::Git { .. }))
44 {
45 let dir = match &pkg.specifics {
46 Specifics::Git { workdir } => workdir,
47 Specifics::Registry { .. } => unimplemented!(),
48 };
49
50 let path = dir.path().join("package");
51
52 info!("recursively parsing lockfile for {} at {path:?}", pkg.name);
53
54 let lockfile_contents = fs::read_to_string(path.join("package-lock.json"));
55
56 let package_json_path = path.join("package.json");
57 let mut package_json: Map<String, Value> =
58 serde_json::from_str(&fs::read_to_string(package_json_path)?)?;
59
60 if let Some(scripts) = package_json
61 .get_mut("scripts")
62 .and_then(Value::as_object_mut)
63 {
64 // https://github.com/npm/pacote/blob/272edc1bac06991fc5f95d06342334bbacfbaa4b/lib/git.js#L166-L172
65 for typ in [
66 "postinstall",
67 "build",
68 "preinstall",
69 "install",
70 "prepack",
71 "prepare",
72 ] {
73 if scripts.contains_key(typ) && lockfile_contents.is_err() && !force_git_deps {
74 bail!("Git dependency {} contains install scripts, but has no lockfile, which is something that will probably break. Open an issue if you can't feasibly patch this dependency out, and we'll come up with a workaround.\nIf you'd like to attempt to try to use this dependency anyways, set `forceGitDeps = true`.", pkg.name);
75 }
76 }
77 }
78
79 if let Ok(lockfile_contents) = lockfile_contents {
80 new.append(&mut lockfile(
81 &lockfile_contents,
82 force_git_deps,
83 // force_empty_cache is turned on here since recursively parsed lockfiles should be
84 // allowed to have an empty cache without erroring by default
85 true,
86 )?);
87 }
88 }
89
90 packages.append(&mut new);
91
92 packages.par_sort_by(|x, y| {
93 x.url
94 .partial_cmp(&y.url)
95 .expect("resolved should be comparable")
96 });
97
98 packages.dedup_by(|x, y| x.url == y.url);
99
100 Ok(packages)
101}
102
103#[derive(Debug)]
104pub struct Package {
105 pub name: String,
106 pub url: Url,
107 specifics: Specifics,
108}
109
110#[derive(Debug)]
111enum Specifics {
112 Registry { integrity: lock::Hash },
113 Git { workdir: TempDir },
114}
115
116impl Package {
117 fn from_lock(pkg: lock::Package) -> anyhow::Result<Package> {
118 let mut resolved = match pkg
119 .resolved
120 .expect("at this point, packages should have URLs")
121 {
122 UrlOrString::Url(u) => u,
123 UrlOrString::String(_) => panic!("at this point, all packages should have URLs"),
124 };
125
126 let specifics = match get_hosted_git_url(&resolved)? {
127 Some(hosted) => {
128 let body = util::get_url_body_with_retry(&hosted)?;
129
130 let workdir = tempdir()?;
131
132 let tar_path = workdir.path().join("package");
133
134 fs::create_dir(&tar_path)?;
135
136 let mut cmd = Command::new("tar")
137 .args(["--extract", "--gzip", "--strip-components=1", "-C"])
138 .arg(&tar_path)
139 .stdin(Stdio::piped())
140 .spawn()?;
141
142 cmd.stdin.take().unwrap().write_all(&body)?;
143
144 let exit = cmd.wait()?;
145
146 if !exit.success() {
147 bail!(
148 "failed to extract tarball for {}: tar exited with status code {}",
149 pkg.name.unwrap(),
150 exit.code().unwrap()
151 );
152 }
153
154 resolved = hosted;
155
156 Specifics::Git { workdir }
157 }
158 None => Specifics::Registry {
159 integrity: pkg
160 .integrity
161 .expect("non-git dependencies should have associated integrity")
162 .into_best()
163 .expect("non-git dependencies should have non-empty associated integrity"),
164 },
165 };
166
167 Ok(Package {
168 name: pkg.name.unwrap(),
169 url: resolved,
170 specifics,
171 })
172 }
173
174 pub fn tarball(&self) -> anyhow::Result<Vec<u8>> {
175 match &self.specifics {
176 Specifics::Registry { .. } => Ok(util::get_url_body_with_retry(&self.url)?),
177 Specifics::Git { workdir } => Ok(Command::new("tar")
178 .args([
179 "--sort=name",
180 "--mtime=@0",
181 "--owner=0",
182 "--group=0",
183 "--numeric-owner",
184 "--format=gnu",
185 "-I",
186 "gzip -n -9",
187 "--create",
188 "-C",
189 ])
190 .arg(workdir.path())
191 .arg("package")
192 .output()?
193 .stdout),
194 }
195 }
196
197 pub fn integrity(&self) -> Option<&lock::Hash> {
198 match &self.specifics {
199 Specifics::Registry { integrity } => Some(integrity),
200 Specifics::Git { .. } => None,
201 }
202 }
203}
204
205#[allow(clippy::case_sensitive_file_extension_comparisons)]
206fn get_hosted_git_url(url: &Url) -> anyhow::Result<Option<Url>> {
207 if ["git", "git+ssh", "git+https", "ssh"].contains(&url.scheme()) {
208 let mut s = url
209 .path_segments()
210 .ok_or_else(|| anyhow!("bad URL: {url}"))?;
211
212 let mut get_url = || match url.host_str()? {
213 "github.com" => {
214 let user = s.next()?;
215 let mut project = s.next()?;
216 let typ = s.next();
217 let mut commit = s.next();
218
219 if typ.is_none() {
220 commit = url.fragment();
221 } else if typ.is_some() && typ != Some("tree") {
222 return None;
223 }
224
225 if project.ends_with(".git") {
226 project = project.strip_suffix(".git")?;
227 }
228
229 let commit = commit.unwrap();
230
231 Some(
232 Url::parse(&format!(
233 "https://codeload.github.com/{user}/{project}/tar.gz/{commit}"
234 ))
235 .ok()?,
236 )
237 }
238 "bitbucket.org" => {
239 let user = s.next()?;
240 let mut project = s.next()?;
241 let aux = s.next();
242
243 if aux == Some("get") {
244 return None;
245 }
246
247 if project.ends_with(".git") {
248 project = project.strip_suffix(".git")?;
249 }
250
251 let commit = url.fragment()?;
252
253 Some(
254 Url::parse(&format!(
255 "https://bitbucket.org/{user}/{project}/get/{commit}.tar.gz"
256 ))
257 .ok()?,
258 )
259 }
260 "gitlab.com" => {
261 /* let path = &url.path()[1..];
262
263 if path.contains("/~/") || path.contains("/archive.tar.gz") {
264 return None;
265 }
266
267 let user = s.next()?;
268 let mut project = s.next()?;
269
270 if project.ends_with(".git") {
271 project = project.strip_suffix(".git")?;
272 }
273
274 let commit = url.fragment()?;
275
276 Some(
277 Url::parse(&format!(
278 "https://gitlab.com/{user}/{project}/repository/archive.tar.gz?ref={commit}"
279 ))
280 .ok()?,
281 ) */
282
283 // lmao: https://github.com/npm/hosted-git-info/pull/109
284 None
285 }
286 "git.sr.ht" => {
287 let user = s.next()?;
288 let mut project = s.next()?;
289 let aux = s.next();
290
291 if aux == Some("archive") {
292 return None;
293 }
294
295 if project.ends_with(".git") {
296 project = project.strip_suffix(".git")?;
297 }
298
299 let commit = url.fragment()?;
300
301 Some(
302 Url::parse(&format!(
303 "https://git.sr.ht/{user}/{project}/archive/{commit}.tar.gz"
304 ))
305 .ok()?,
306 )
307 }
308 _ => None,
309 };
310
311 match get_url() {
312 Some(u) => Ok(Some(u)),
313 None => Err(anyhow!("This lockfile either contains a Git dependency with an unsupported host, or a malformed URL in the lockfile: {url}"))
314 }
315 } else {
316 Ok(None)
317 }
318}
319
320#[cfg(test)]
321mod tests {
322 use super::get_hosted_git_url;
323 use url::Url;
324
325 #[test]
326 fn hosted_git_urls() {
327 for (input, expected) in [
328 (
329 "git+ssh://git@github.com/castlabs/electron-releases.git#fc5f78d046e8d7cdeb66345a2633c383ab41f525",
330 Some("https://codeload.github.com/castlabs/electron-releases/tar.gz/fc5f78d046e8d7cdeb66345a2633c383ab41f525"),
331 ),
332 (
333 "git+ssh://bitbucket.org/foo/bar#branch",
334 Some("https://bitbucket.org/foo/bar/get/branch.tar.gz")
335 ),
336 (
337 "git+ssh://git.sr.ht/~foo/bar#branch",
338 Some("https://git.sr.ht/~foo/bar/archive/branch.tar.gz")
339 ),
340 ] {
341 assert_eq!(
342 get_hosted_git_url(&Url::parse(input).unwrap()).unwrap(),
343 expected.map(|u| Url::parse(u).unwrap())
344 );
345 }
346
347 assert!(
348 get_hosted_git_url(&Url::parse("ssh://git@gitlab.com/foo/bar.git#fix/bug").unwrap())
349 .is_err(),
350 "GitLab URLs should be marked as invalid (lol)"
351 );
352 }
353}