Clone of https://github.com/NixOS/nixpkgs.git (to stress-test knotserver)
at github-to-sqlite-beautifulsoup4 559 lines 18 kB view raw
1# Functions for working with paths, see ./path.md 2{ lib }: 3let 4 5 inherit (builtins) 6 isString 7 isPath 8 split 9 match 10 typeOf 11 ; 12 13 inherit (lib.lists) 14 length 15 head 16 last 17 genList 18 elemAt 19 all 20 concatMap 21 foldl' 22 take 23 drop 24 ; 25 26 inherit (lib.strings) 27 concatStringsSep 28 substring 29 ; 30 31 inherit (lib.asserts) 32 assertMsg 33 ; 34 35 inherit (lib.path.subpath) 36 isValid 37 ; 38 39 # Return the reason why a subpath is invalid, or `null` if it's valid 40 subpathInvalidReason = value: 41 if ! isString value then 42 "The given value is of type ${builtins.typeOf value}, but a string was expected" 43 else if value == "" then 44 "The given string is empty" 45 else if substring 0 1 value == "/" then 46 "The given string \"${value}\" starts with a `/`, representing an absolute path" 47 # We don't support ".." components, see ./path.md#parent-directory 48 else if match "(.*/)?\\.\\.(/.*)?" value != null then 49 "The given string \"${value}\" contains a `..` component, which is not allowed in subpaths" 50 else null; 51 52 # Split and normalise a relative path string into its components. 53 # Error for ".." components and doesn't include "." components 54 splitRelPath = path: 55 let 56 # Split the string into its parts using regex for efficiency. This regex 57 # matches patterns like "/", "/./", "/././", with arbitrarily many "/"s 58 # together. These are the main special cases: 59 # - Leading "./" gets split into a leading "." part 60 # - Trailing "/." or "/" get split into a trailing "." or "" 61 # part respectively 62 # 63 # These are the only cases where "." and "" parts can occur 64 parts = split "/+(\\./+)*" path; 65 66 # `split` creates a list of 2 * k + 1 elements, containing the k + 67 # 1 parts, interleaved with k matches where k is the number of 68 # (non-overlapping) matches. This calculation here gets the number of parts 69 # back from the list length 70 # floor( (2 * k + 1) / 2 ) + 1 == floor( k + 1/2 ) + 1 == k + 1 71 partCount = length parts / 2 + 1; 72 73 # To assemble the final list of components we want to: 74 # - Skip a potential leading ".", normalising "./foo" to "foo" 75 # - Skip a potential trailing "." or "", normalising "foo/" and "foo/." to 76 # "foo". See ./path.md#trailing-slashes 77 skipStart = if head parts == "." then 1 else 0; 78 skipEnd = if last parts == "." || last parts == "" then 1 else 0; 79 80 # We can now know the length of the result by removing the number of 81 # skipped parts from the total number 82 componentCount = partCount - skipEnd - skipStart; 83 84 in 85 # Special case of a single "." path component. Such a case leaves a 86 # componentCount of -1 due to the skipStart/skipEnd not verifying that 87 # they don't refer to the same character 88 if path == "." then [] 89 90 # Generate the result list directly. This is more efficient than a 91 # combination of `filter`, `init` and `tail`, because here we don't 92 # allocate any intermediate lists 93 else genList (index: 94 # To get to the element we need to add the number of parts we skip and 95 # multiply by two due to the interleaved layout of `parts` 96 elemAt parts ((skipStart + index) * 2) 97 ) componentCount; 98 99 # Join relative path components together 100 joinRelPath = components: 101 # Always return relative paths with `./` as a prefix (./path.md#leading-dots-for-relative-paths) 102 "./" + 103 # An empty string is not a valid relative path, so we need to return a `.` when we have no components 104 (if components == [] then "." else concatStringsSep "/" components); 105 106 # Type: Path -> { root :: Path, components :: [ String ] } 107 # 108 # Deconstruct a path value type into: 109 # - root: The filesystem root of the path, generally `/` 110 # - components: All the path's components 111 # 112 # This is similar to `splitString "/" (toString path)` but safer 113 # because it can distinguish different filesystem roots 114 deconstructPath = 115 let 116 recurse = components: base: 117 # If the parent of a path is the path itself, then it's a filesystem root 118 if base == dirOf base then { root = base; inherit components; } 119 else recurse ([ (baseNameOf base) ] ++ components) (dirOf base); 120 in recurse []; 121 122in /* No rec! Add dependencies on this file at the top. */ { 123 124 /* 125 Append a subpath string to a path. 126 127 Like `path + ("/" + string)` but safer, because it errors instead of returning potentially surprising results. 128 More specifically, it checks that the first argument is a [path value type](https://nixos.org/manual/nix/stable/language/values.html#type-path"), 129 and that the second argument is a [valid subpath string](#function-library-lib.path.subpath.isValid). 130 131 Laws: 132 133 - Not influenced by subpath [normalisation](#function-library-lib.path.subpath.normalise): 134 135 append p s == append p (subpath.normalise s) 136 137 Type: 138 append :: Path -> String -> Path 139 140 Example: 141 append /foo "bar/baz" 142 => /foo/bar/baz 143 144 # subpaths don't need to be normalised 145 append /foo "./bar//baz/./" 146 => /foo/bar/baz 147 148 # can append to root directory 149 append /. "foo/bar" 150 => /foo/bar 151 152 # first argument needs to be a path value type 153 append "/foo" "bar" 154 => <error> 155 156 # second argument needs to be a valid subpath string 157 append /foo /bar 158 => <error> 159 append /foo "" 160 => <error> 161 append /foo "/bar" 162 => <error> 163 append /foo "../bar" 164 => <error> 165 */ 166 append = 167 # The absolute path to append to 168 path: 169 # The subpath string to append 170 subpath: 171 assert assertMsg (isPath path) '' 172 lib.path.append: The first argument is of type ${builtins.typeOf path}, but a path was expected''; 173 assert assertMsg (isValid subpath) '' 174 lib.path.append: Second argument is not a valid subpath string: 175 ${subpathInvalidReason subpath}''; 176 path + ("/" + subpath); 177 178 /* 179 Whether the first path is a component-wise prefix of the second path. 180 181 Laws: 182 183 - `hasPrefix p q` is only true if [`q == append p s`](#function-library-lib.path.append) for some [subpath](#function-library-lib.path.subpath.isValid) `s`. 184 185 - `hasPrefix` is a [non-strict partial order](https://en.wikipedia.org/wiki/Partially_ordered_set#Non-strict_partial_order) over the set of all path values. 186 187 Type: 188 hasPrefix :: Path -> Path -> Bool 189 190 Example: 191 hasPrefix /foo /foo/bar 192 => true 193 hasPrefix /foo /foo 194 => true 195 hasPrefix /foo/bar /foo 196 => false 197 hasPrefix /. /foo 198 => true 199 */ 200 hasPrefix = 201 path1: 202 assert assertMsg 203 (isPath path1) 204 "lib.path.hasPrefix: First argument is of type ${typeOf path1}, but a path was expected"; 205 let 206 path1Deconstructed = deconstructPath path1; 207 in 208 path2: 209 assert assertMsg 210 (isPath path2) 211 "lib.path.hasPrefix: Second argument is of type ${typeOf path2}, but a path was expected"; 212 let 213 path2Deconstructed = deconstructPath path2; 214 in 215 assert assertMsg 216 (path1Deconstructed.root == path2Deconstructed.root) '' 217 lib.path.hasPrefix: Filesystem roots must be the same for both paths, but paths with different roots were given: 218 first argument: "${toString path1}" with root "${toString path1Deconstructed.root}" 219 second argument: "${toString path2}" with root "${toString path2Deconstructed.root}"''; 220 take (length path1Deconstructed.components) path2Deconstructed.components == path1Deconstructed.components; 221 222 /* 223 Remove the first path as a component-wise prefix from the second path. 224 The result is a [normalised subpath string](#function-library-lib.path.subpath.normalise). 225 226 Laws: 227 228 - Inverts [`append`](#function-library-lib.path.append) for [normalised subpath string](#function-library-lib.path.subpath.normalise): 229 230 removePrefix p (append p s) == subpath.normalise s 231 232 Type: 233 removePrefix :: Path -> Path -> String 234 235 Example: 236 removePrefix /foo /foo/bar/baz 237 => "./bar/baz" 238 removePrefix /foo /foo 239 => "./." 240 removePrefix /foo/bar /foo 241 => <error> 242 removePrefix /. /foo 243 => "./foo" 244 */ 245 removePrefix = 246 path1: 247 assert assertMsg 248 (isPath path1) 249 "lib.path.removePrefix: First argument is of type ${typeOf path1}, but a path was expected."; 250 let 251 path1Deconstructed = deconstructPath path1; 252 path1Length = length path1Deconstructed.components; 253 in 254 path2: 255 assert assertMsg 256 (isPath path2) 257 "lib.path.removePrefix: Second argument is of type ${typeOf path2}, but a path was expected."; 258 let 259 path2Deconstructed = deconstructPath path2; 260 success = take path1Length path2Deconstructed.components == path1Deconstructed.components; 261 components = 262 if success then 263 drop path1Length path2Deconstructed.components 264 else 265 throw '' 266 lib.path.removePrefix: The first path argument "${toString path1}" is not a component-wise prefix of the second path argument "${toString path2}".''; 267 in 268 assert assertMsg 269 (path1Deconstructed.root == path2Deconstructed.root) '' 270 lib.path.removePrefix: Filesystem roots must be the same for both paths, but paths with different roots were given: 271 first argument: "${toString path1}" with root "${toString path1Deconstructed.root}" 272 second argument: "${toString path2}" with root "${toString path2Deconstructed.root}"''; 273 joinRelPath components; 274 275 /* 276 Split the filesystem root from a [path](https://nixos.org/manual/nix/stable/language/values.html#type-path). 277 The result is an attribute set with these attributes: 278 - `root`: The filesystem root of the path, meaning that this directory has no parent directory. 279 - `subpath`: The [normalised subpath string](#function-library-lib.path.subpath.normalise) that when [appended](#function-library-lib.path.append) to `root` returns the original path. 280 281 Laws: 282 - [Appending](#function-library-lib.path.append) the `root` and `subpath` gives the original path: 283 284 p == 285 append 286 (splitRoot p).root 287 (splitRoot p).subpath 288 289 - Trying to get the parent directory of `root` using [`readDir`](https://nixos.org/manual/nix/stable/language/builtins.html#builtins-readDir) returns `root` itself: 290 291 dirOf (splitRoot p).root == (splitRoot p).root 292 293 Type: 294 splitRoot :: Path -> { root :: Path, subpath :: String } 295 296 Example: 297 splitRoot /foo/bar 298 => { root = /.; subpath = "./foo/bar"; } 299 300 splitRoot /. 301 => { root = /.; subpath = "./."; } 302 303 # Nix neutralises `..` path components for all path values automatically 304 splitRoot /foo/../bar 305 => { root = /.; subpath = "./bar"; } 306 307 splitRoot "/foo/bar" 308 => <error> 309 */ 310 splitRoot = 311 # The path to split the root off of 312 path: 313 assert assertMsg 314 (isPath path) 315 "lib.path.splitRoot: Argument is of type ${typeOf path}, but a path was expected"; 316 let 317 deconstructed = deconstructPath path; 318 in { 319 root = deconstructed.root; 320 subpath = joinRelPath deconstructed.components; 321 }; 322 323 /* 324 Whether a value is a valid subpath string. 325 326 A subpath string points to a specific file or directory within an absolute base directory. 327 It is a stricter form of a relative path that excludes `..` components, since those could escape the base directory. 328 329 - The value is a string. 330 331 - The string is not empty. 332 333 - The string doesn't start with a `/`. 334 335 - The string doesn't contain any `..` path components. 336 337 Type: 338 subpath.isValid :: String -> Bool 339 340 Example: 341 # Not a string 342 subpath.isValid null 343 => false 344 345 # Empty string 346 subpath.isValid "" 347 => false 348 349 # Absolute path 350 subpath.isValid "/foo" 351 => false 352 353 # Contains a `..` path component 354 subpath.isValid "../foo" 355 => false 356 357 # Valid subpath 358 subpath.isValid "foo/bar" 359 => true 360 361 # Doesn't need to be normalised 362 subpath.isValid "./foo//bar/" 363 => true 364 */ 365 subpath.isValid = 366 # The value to check 367 value: 368 subpathInvalidReason value == null; 369 370 371 /* 372 Join subpath strings together using `/`, returning a normalised subpath string. 373 374 Like `concatStringsSep "/"` but safer, specifically: 375 376 - All elements must be [valid subpath strings](#function-library-lib.path.subpath.isValid). 377 378 - The result gets [normalised](#function-library-lib.path.subpath.normalise). 379 380 - The edge case of an empty list gets properly handled by returning the neutral subpath `"./."`. 381 382 Laws: 383 384 - Associativity: 385 386 subpath.join [ x (subpath.join [ y z ]) ] == subpath.join [ (subpath.join [ x y ]) z ] 387 388 - Identity - `"./."` is the neutral element for normalised paths: 389 390 subpath.join [ ] == "./." 391 subpath.join [ (subpath.normalise p) "./." ] == subpath.normalise p 392 subpath.join [ "./." (subpath.normalise p) ] == subpath.normalise p 393 394 - Normalisation - the result is [normalised](#function-library-lib.path.subpath.normalise): 395 396 subpath.join ps == subpath.normalise (subpath.join ps) 397 398 - For non-empty lists, the implementation is equivalent to [normalising](#function-library-lib.path.subpath.normalise) the result of `concatStringsSep "/"`. 399 Note that the above laws can be derived from this one: 400 401 ps != [] -> subpath.join ps == subpath.normalise (concatStringsSep "/" ps) 402 403 Type: 404 subpath.join :: [ String ] -> String 405 406 Example: 407 subpath.join [ "foo" "bar/baz" ] 408 => "./foo/bar/baz" 409 410 # normalise the result 411 subpath.join [ "./foo" "." "bar//./baz/" ] 412 => "./foo/bar/baz" 413 414 # passing an empty list results in the current directory 415 subpath.join [ ] 416 => "./." 417 418 # elements must be valid subpath strings 419 subpath.join [ /foo ] 420 => <error> 421 subpath.join [ "" ] 422 => <error> 423 subpath.join [ "/foo" ] 424 => <error> 425 subpath.join [ "../foo" ] 426 => <error> 427 */ 428 subpath.join = 429 # The list of subpaths to join together 430 subpaths: 431 # Fast in case all paths are valid 432 if all isValid subpaths 433 then joinRelPath (concatMap splitRelPath subpaths) 434 else 435 # Otherwise we take our time to gather more info for a better error message 436 # Strictly go through each path, throwing on the first invalid one 437 # Tracks the list index in the fold accumulator 438 foldl' (i: path: 439 if isValid path 440 then i + 1 441 else throw '' 442 lib.path.subpath.join: Element at index ${toString i} is not a valid subpath string: 443 ${subpathInvalidReason path}'' 444 ) 0 subpaths; 445 446 /* 447 Split [a subpath](#function-library-lib.path.subpath.isValid) into its path component strings. 448 Throw an error if the subpath isn't valid. 449 Note that the returned path components are also [valid subpath strings](#function-library-lib.path.subpath.isValid), though they are intentionally not [normalised](#function-library-lib.path.subpath.normalise). 450 451 Laws: 452 453 - Splitting a subpath into components and [joining](#function-library-lib.path.subpath.join) the components gives the same subpath but [normalised](#function-library-lib.path.subpath.normalise): 454 455 subpath.join (subpath.components s) == subpath.normalise s 456 457 Type: 458 subpath.components :: String -> [ String ] 459 460 Example: 461 subpath.components "." 462 => [ ] 463 464 subpath.components "./foo//bar/./baz/" 465 => [ "foo" "bar" "baz" ] 466 467 subpath.components "/foo" 468 => <error> 469 */ 470 subpath.components = 471 # The subpath string to split into components 472 subpath: 473 assert assertMsg (isValid subpath) '' 474 lib.path.subpath.components: Argument is not a valid subpath string: 475 ${subpathInvalidReason subpath}''; 476 splitRelPath subpath; 477 478 /* 479 Normalise a subpath. Throw an error if the subpath isn't [valid](#function-library-lib.path.subpath.isValid). 480 481 - Limit repeating `/` to a single one. 482 483 - Remove redundant `.` components. 484 485 - Remove trailing `/` and `/.`. 486 487 - Add leading `./`. 488 489 Laws: 490 491 - Idempotency - normalising multiple times gives the same result: 492 493 subpath.normalise (subpath.normalise p) == subpath.normalise p 494 495 - Uniqueness - there's only a single normalisation for the paths that lead to the same file system node: 496 497 subpath.normalise p != subpath.normalise q -> $(realpath ${p}) != $(realpath ${q}) 498 499 - Don't change the result when [appended](#function-library-lib.path.append) to a Nix path value: 500 501 append base p == append base (subpath.normalise p) 502 503 - Don't change the path according to `realpath`: 504 505 $(realpath ${p}) == $(realpath ${subpath.normalise p}) 506 507 - Only error on [invalid subpaths](#function-library-lib.path.subpath.isValid): 508 509 builtins.tryEval (subpath.normalise p)).success == subpath.isValid p 510 511 Type: 512 subpath.normalise :: String -> String 513 514 Example: 515 # limit repeating `/` to a single one 516 subpath.normalise "foo//bar" 517 => "./foo/bar" 518 519 # remove redundant `.` components 520 subpath.normalise "foo/./bar" 521 => "./foo/bar" 522 523 # add leading `./` 524 subpath.normalise "foo/bar" 525 => "./foo/bar" 526 527 # remove trailing `/` 528 subpath.normalise "foo/bar/" 529 => "./foo/bar" 530 531 # remove trailing `/.` 532 subpath.normalise "foo/bar/." 533 => "./foo/bar" 534 535 # Return the current directory as `./.` 536 subpath.normalise "." 537 => "./." 538 539 # error on `..` path components 540 subpath.normalise "foo/../bar" 541 => <error> 542 543 # error on empty string 544 subpath.normalise "" 545 => <error> 546 547 # error on absolute path 548 subpath.normalise "/foo" 549 => <error> 550 */ 551 subpath.normalise = 552 # The subpath string to normalise 553 subpath: 554 assert assertMsg (isValid subpath) '' 555 lib.path.subpath.normalise: Argument is not a valid subpath string: 556 ${subpathInvalidReason subpath}''; 557 joinRelPath (splitRelPath subpath); 558 559}