at v6.3-rc2 26 kB view raw
1// SPDX-License-Identifier: GPL-2.0-only 2// 3// Traverse the source tree, parsing all .gitignore files, and print file paths 4// that are ignored by git. 5// The output is suitable to the --exclude-from option of tar. 6// This is useful until the --exclude-vcs-ignores option gets working correctly. 7// 8// Copyright (C) 2023 Masahiro Yamada <masahiroy@kernel.org> 9// (a lot of code imported from GIT) 10 11#include <assert.h> 12#include <dirent.h> 13#include <errno.h> 14#include <fcntl.h> 15#include <getopt.h> 16#include <stdarg.h> 17#include <stdbool.h> 18#include <stdio.h> 19#include <stdlib.h> 20#include <string.h> 21#include <sys/stat.h> 22#include <sys/types.h> 23#include <unistd.h> 24 25// Imported from commit 23c56f7bd5f1667f8b793d796bf30e39545920f6 in GIT 26// 27//---------------------------(IMPORT FROM GIT BEGIN)--------------------------- 28 29// Copied from environment.c 30 31static bool ignore_case; 32 33// Copied from git-compat-util.h 34 35/* Sane ctype - no locale, and works with signed chars */ 36#undef isascii 37#undef isspace 38#undef isdigit 39#undef isalpha 40#undef isalnum 41#undef isprint 42#undef islower 43#undef isupper 44#undef tolower 45#undef toupper 46#undef iscntrl 47#undef ispunct 48#undef isxdigit 49 50static const unsigned char sane_ctype[256]; 51#define GIT_SPACE 0x01 52#define GIT_DIGIT 0x02 53#define GIT_ALPHA 0x04 54#define GIT_GLOB_SPECIAL 0x08 55#define GIT_REGEX_SPECIAL 0x10 56#define GIT_PATHSPEC_MAGIC 0x20 57#define GIT_CNTRL 0x40 58#define GIT_PUNCT 0x80 59#define sane_istest(x,mask) ((sane_ctype[(unsigned char)(x)] & (mask)) != 0) 60#define isascii(x) (((x) & ~0x7f) == 0) 61#define isspace(x) sane_istest(x,GIT_SPACE) 62#define isdigit(x) sane_istest(x,GIT_DIGIT) 63#define isalpha(x) sane_istest(x,GIT_ALPHA) 64#define isalnum(x) sane_istest(x,GIT_ALPHA | GIT_DIGIT) 65#define isprint(x) ((x) >= 0x20 && (x) <= 0x7e) 66#define islower(x) sane_iscase(x, 1) 67#define isupper(x) sane_iscase(x, 0) 68#define is_glob_special(x) sane_istest(x,GIT_GLOB_SPECIAL) 69#define iscntrl(x) (sane_istest(x,GIT_CNTRL)) 70#define ispunct(x) sane_istest(x, GIT_PUNCT | GIT_REGEX_SPECIAL | \ 71 GIT_GLOB_SPECIAL | GIT_PATHSPEC_MAGIC) 72#define isxdigit(x) (hexval_table[(unsigned char)(x)] != -1) 73#define tolower(x) sane_case((unsigned char)(x), 0x20) 74#define toupper(x) sane_case((unsigned char)(x), 0) 75 76static inline int sane_case(int x, int high) 77{ 78 if (sane_istest(x, GIT_ALPHA)) 79 x = (x & ~0x20) | high; 80 return x; 81} 82 83static inline int sane_iscase(int x, int is_lower) 84{ 85 if (!sane_istest(x, GIT_ALPHA)) 86 return 0; 87 88 if (is_lower) 89 return (x & 0x20) != 0; 90 else 91 return (x & 0x20) == 0; 92} 93 94// Copied from ctype.c 95 96enum { 97 S = GIT_SPACE, 98 A = GIT_ALPHA, 99 D = GIT_DIGIT, 100 G = GIT_GLOB_SPECIAL, /* *, ?, [, \\ */ 101 R = GIT_REGEX_SPECIAL, /* $, (, ), +, ., ^, {, | */ 102 P = GIT_PATHSPEC_MAGIC, /* other non-alnum, except for ] and } */ 103 X = GIT_CNTRL, 104 U = GIT_PUNCT, 105 Z = GIT_CNTRL | GIT_SPACE 106}; 107 108static const unsigned char sane_ctype[256] = { 109 X, X, X, X, X, X, X, X, X, Z, Z, X, X, Z, X, X, /* 0.. 15 */ 110 X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, /* 16.. 31 */ 111 S, P, P, P, R, P, P, P, R, R, G, R, P, P, R, P, /* 32.. 47 */ 112 D, D, D, D, D, D, D, D, D, D, P, P, P, P, P, G, /* 48.. 63 */ 113 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 64.. 79 */ 114 A, A, A, A, A, A, A, A, A, A, A, G, G, U, R, P, /* 80.. 95 */ 115 P, A, A, A, A, A, A, A, A, A, A, A, A, A, A, A, /* 96..111 */ 116 A, A, A, A, A, A, A, A, A, A, A, R, R, U, P, X, /* 112..127 */ 117 /* Nothing in the 128.. range */ 118}; 119 120// Copied from hex.c 121 122static const signed char hexval_table[256] = { 123 -1, -1, -1, -1, -1, -1, -1, -1, /* 00-07 */ 124 -1, -1, -1, -1, -1, -1, -1, -1, /* 08-0f */ 125 -1, -1, -1, -1, -1, -1, -1, -1, /* 10-17 */ 126 -1, -1, -1, -1, -1, -1, -1, -1, /* 18-1f */ 127 -1, -1, -1, -1, -1, -1, -1, -1, /* 20-27 */ 128 -1, -1, -1, -1, -1, -1, -1, -1, /* 28-2f */ 129 0, 1, 2, 3, 4, 5, 6, 7, /* 30-37 */ 130 8, 9, -1, -1, -1, -1, -1, -1, /* 38-3f */ 131 -1, 10, 11, 12, 13, 14, 15, -1, /* 40-47 */ 132 -1, -1, -1, -1, -1, -1, -1, -1, /* 48-4f */ 133 -1, -1, -1, -1, -1, -1, -1, -1, /* 50-57 */ 134 -1, -1, -1, -1, -1, -1, -1, -1, /* 58-5f */ 135 -1, 10, 11, 12, 13, 14, 15, -1, /* 60-67 */ 136 -1, -1, -1, -1, -1, -1, -1, -1, /* 68-67 */ 137 -1, -1, -1, -1, -1, -1, -1, -1, /* 70-77 */ 138 -1, -1, -1, -1, -1, -1, -1, -1, /* 78-7f */ 139 -1, -1, -1, -1, -1, -1, -1, -1, /* 80-87 */ 140 -1, -1, -1, -1, -1, -1, -1, -1, /* 88-8f */ 141 -1, -1, -1, -1, -1, -1, -1, -1, /* 90-97 */ 142 -1, -1, -1, -1, -1, -1, -1, -1, /* 98-9f */ 143 -1, -1, -1, -1, -1, -1, -1, -1, /* a0-a7 */ 144 -1, -1, -1, -1, -1, -1, -1, -1, /* a8-af */ 145 -1, -1, -1, -1, -1, -1, -1, -1, /* b0-b7 */ 146 -1, -1, -1, -1, -1, -1, -1, -1, /* b8-bf */ 147 -1, -1, -1, -1, -1, -1, -1, -1, /* c0-c7 */ 148 -1, -1, -1, -1, -1, -1, -1, -1, /* c8-cf */ 149 -1, -1, -1, -1, -1, -1, -1, -1, /* d0-d7 */ 150 -1, -1, -1, -1, -1, -1, -1, -1, /* d8-df */ 151 -1, -1, -1, -1, -1, -1, -1, -1, /* e0-e7 */ 152 -1, -1, -1, -1, -1, -1, -1, -1, /* e8-ef */ 153 -1, -1, -1, -1, -1, -1, -1, -1, /* f0-f7 */ 154 -1, -1, -1, -1, -1, -1, -1, -1, /* f8-ff */ 155}; 156 157// Copied from wildmatch.h 158 159#define WM_CASEFOLD 1 160#define WM_PATHNAME 2 161 162#define WM_NOMATCH 1 163#define WM_MATCH 0 164#define WM_ABORT_ALL -1 165#define WM_ABORT_TO_STARSTAR -2 166 167// Copied from wildmatch.c 168 169typedef unsigned char uchar; 170 171// local modification: remove NEGATE_CLASS(2) 172 173#define CC_EQ(class, len, litmatch) ((len) == sizeof (litmatch)-1 \ 174 && *(class) == *(litmatch) \ 175 && strncmp((char*)class, litmatch, len) == 0) 176 177// local modification: simpilify macros 178#define ISBLANK(c) ((c) == ' ' || (c) == '\t') 179#define ISGRAPH(c) (isprint(c) && !isspace(c)) 180#define ISPRINT(c) isprint(c) 181#define ISDIGIT(c) isdigit(c) 182#define ISALNUM(c) isalnum(c) 183#define ISALPHA(c) isalpha(c) 184#define ISCNTRL(c) iscntrl(c) 185#define ISLOWER(c) islower(c) 186#define ISPUNCT(c) ispunct(c) 187#define ISSPACE(c) isspace(c) 188#define ISUPPER(c) isupper(c) 189#define ISXDIGIT(c) isxdigit(c) 190 191/* Match pattern "p" against "text" */ 192static int dowild(const uchar *p, const uchar *text, unsigned int flags) 193{ 194 uchar p_ch; 195 const uchar *pattern = p; 196 197 for ( ; (p_ch = *p) != '\0'; text++, p++) { 198 int matched, match_slash, negated; 199 uchar t_ch, prev_ch; 200 if ((t_ch = *text) == '\0' && p_ch != '*') 201 return WM_ABORT_ALL; 202 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) 203 t_ch = tolower(t_ch); 204 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) 205 p_ch = tolower(p_ch); 206 switch (p_ch) { 207 case '\\': 208 /* Literal match with following character. Note that the test 209 * in "default" handles the p[1] == '\0' failure case. */ 210 p_ch = *++p; 211 /* FALLTHROUGH */ 212 default: 213 if (t_ch != p_ch) 214 return WM_NOMATCH; 215 continue; 216 case '?': 217 /* Match anything but '/'. */ 218 if ((flags & WM_PATHNAME) && t_ch == '/') 219 return WM_NOMATCH; 220 continue; 221 case '*': 222 if (*++p == '*') { 223 const uchar *prev_p = p - 2; 224 while (*++p == '*') {} 225 if (!(flags & WM_PATHNAME)) 226 /* without WM_PATHNAME, '*' == '**' */ 227 match_slash = 1; 228 else if ((prev_p < pattern || *prev_p == '/') && 229 (*p == '\0' || *p == '/' || 230 (p[0] == '\\' && p[1] == '/'))) { 231 /* 232 * Assuming we already match 'foo/' and are at 233 * <star star slash>, just assume it matches 234 * nothing and go ahead match the rest of the 235 * pattern with the remaining string. This 236 * helps make foo/<*><*>/bar (<> because 237 * otherwise it breaks C comment syntax) match 238 * both foo/bar and foo/a/bar. 239 */ 240 if (p[0] == '/' && 241 dowild(p + 1, text, flags) == WM_MATCH) 242 return WM_MATCH; 243 match_slash = 1; 244 } else /* WM_PATHNAME is set */ 245 match_slash = 0; 246 } else 247 /* without WM_PATHNAME, '*' == '**' */ 248 match_slash = flags & WM_PATHNAME ? 0 : 1; 249 if (*p == '\0') { 250 /* Trailing "**" matches everything. Trailing "*" matches 251 * only if there are no more slash characters. */ 252 if (!match_slash) { 253 if (strchr((char *)text, '/')) 254 return WM_NOMATCH; 255 } 256 return WM_MATCH; 257 } else if (!match_slash && *p == '/') { 258 /* 259 * _one_ asterisk followed by a slash 260 * with WM_PATHNAME matches the next 261 * directory 262 */ 263 const char *slash = strchr((char*)text, '/'); 264 if (!slash) 265 return WM_NOMATCH; 266 text = (const uchar*)slash; 267 /* the slash is consumed by the top-level for loop */ 268 break; 269 } 270 while (1) { 271 if (t_ch == '\0') 272 break; 273 /* 274 * Try to advance faster when an asterisk is 275 * followed by a literal. We know in this case 276 * that the string before the literal 277 * must belong to "*". 278 * If match_slash is false, do not look past 279 * the first slash as it cannot belong to '*'. 280 */ 281 if (!is_glob_special(*p)) { 282 p_ch = *p; 283 if ((flags & WM_CASEFOLD) && ISUPPER(p_ch)) 284 p_ch = tolower(p_ch); 285 while ((t_ch = *text) != '\0' && 286 (match_slash || t_ch != '/')) { 287 if ((flags & WM_CASEFOLD) && ISUPPER(t_ch)) 288 t_ch = tolower(t_ch); 289 if (t_ch == p_ch) 290 break; 291 text++; 292 } 293 if (t_ch != p_ch) 294 return WM_NOMATCH; 295 } 296 if ((matched = dowild(p, text, flags)) != WM_NOMATCH) { 297 if (!match_slash || matched != WM_ABORT_TO_STARSTAR) 298 return matched; 299 } else if (!match_slash && t_ch == '/') 300 return WM_ABORT_TO_STARSTAR; 301 t_ch = *++text; 302 } 303 return WM_ABORT_ALL; 304 case '[': 305 p_ch = *++p; 306 if (p_ch == '^') 307 p_ch = '!'; 308 /* Assign literal 1/0 because of "matched" comparison. */ 309 negated = p_ch == '!' ? 1 : 0; 310 if (negated) { 311 /* Inverted character class. */ 312 p_ch = *++p; 313 } 314 prev_ch = 0; 315 matched = 0; 316 do { 317 if (!p_ch) 318 return WM_ABORT_ALL; 319 if (p_ch == '\\') { 320 p_ch = *++p; 321 if (!p_ch) 322 return WM_ABORT_ALL; 323 if (t_ch == p_ch) 324 matched = 1; 325 } else if (p_ch == '-' && prev_ch && p[1] && p[1] != ']') { 326 p_ch = *++p; 327 if (p_ch == '\\') { 328 p_ch = *++p; 329 if (!p_ch) 330 return WM_ABORT_ALL; 331 } 332 if (t_ch <= p_ch && t_ch >= prev_ch) 333 matched = 1; 334 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) { 335 uchar t_ch_upper = toupper(t_ch); 336 if (t_ch_upper <= p_ch && t_ch_upper >= prev_ch) 337 matched = 1; 338 } 339 p_ch = 0; /* This makes "prev_ch" get set to 0. */ 340 } else if (p_ch == '[' && p[1] == ':') { 341 const uchar *s; 342 int i; 343 for (s = p += 2; (p_ch = *p) && p_ch != ']'; p++) {} /*SHARED ITERATOR*/ 344 if (!p_ch) 345 return WM_ABORT_ALL; 346 i = p - s - 1; 347 if (i < 0 || p[-1] != ':') { 348 /* Didn't find ":]", so treat like a normal set. */ 349 p = s - 2; 350 p_ch = '['; 351 if (t_ch == p_ch) 352 matched = 1; 353 continue; 354 } 355 if (CC_EQ(s,i, "alnum")) { 356 if (ISALNUM(t_ch)) 357 matched = 1; 358 } else if (CC_EQ(s,i, "alpha")) { 359 if (ISALPHA(t_ch)) 360 matched = 1; 361 } else if (CC_EQ(s,i, "blank")) { 362 if (ISBLANK(t_ch)) 363 matched = 1; 364 } else if (CC_EQ(s,i, "cntrl")) { 365 if (ISCNTRL(t_ch)) 366 matched = 1; 367 } else if (CC_EQ(s,i, "digit")) { 368 if (ISDIGIT(t_ch)) 369 matched = 1; 370 } else if (CC_EQ(s,i, "graph")) { 371 if (ISGRAPH(t_ch)) 372 matched = 1; 373 } else if (CC_EQ(s,i, "lower")) { 374 if (ISLOWER(t_ch)) 375 matched = 1; 376 } else if (CC_EQ(s,i, "print")) { 377 if (ISPRINT(t_ch)) 378 matched = 1; 379 } else if (CC_EQ(s,i, "punct")) { 380 if (ISPUNCT(t_ch)) 381 matched = 1; 382 } else if (CC_EQ(s,i, "space")) { 383 if (ISSPACE(t_ch)) 384 matched = 1; 385 } else if (CC_EQ(s,i, "upper")) { 386 if (ISUPPER(t_ch)) 387 matched = 1; 388 else if ((flags & WM_CASEFOLD) && ISLOWER(t_ch)) 389 matched = 1; 390 } else if (CC_EQ(s,i, "xdigit")) { 391 if (ISXDIGIT(t_ch)) 392 matched = 1; 393 } else /* malformed [:class:] string */ 394 return WM_ABORT_ALL; 395 p_ch = 0; /* This makes "prev_ch" get set to 0. */ 396 } else if (t_ch == p_ch) 397 matched = 1; 398 } while (prev_ch = p_ch, (p_ch = *++p) != ']'); 399 if (matched == negated || 400 ((flags & WM_PATHNAME) && t_ch == '/')) 401 return WM_NOMATCH; 402 continue; 403 } 404 } 405 406 return *text ? WM_NOMATCH : WM_MATCH; 407} 408 409/* Match the "pattern" against the "text" string. */ 410static int wildmatch(const char *pattern, const char *text, unsigned int flags) 411{ 412 // local modification: move WM_CASEFOLD here 413 if (ignore_case) 414 flags |= WM_CASEFOLD; 415 416 return dowild((const uchar*)pattern, (const uchar*)text, flags); 417} 418 419// Copied from dir.h 420 421#define PATTERN_FLAG_NODIR 1 422#define PATTERN_FLAG_ENDSWITH 4 423#define PATTERN_FLAG_MUSTBEDIR 8 424#define PATTERN_FLAG_NEGATIVE 16 425 426// Copied from dir.c 427 428static int fspathncmp(const char *a, const char *b, size_t count) 429{ 430 return ignore_case ? strncasecmp(a, b, count) : strncmp(a, b, count); 431} 432 433static int simple_length(const char *match) 434{ 435 int len = -1; 436 437 for (;;) { 438 unsigned char c = *match++; 439 len++; 440 if (c == '\0' || is_glob_special(c)) 441 return len; 442 } 443} 444 445static int no_wildcard(const char *string) 446{ 447 return string[simple_length(string)] == '\0'; 448} 449 450static void parse_path_pattern(const char **pattern, 451 int *patternlen, 452 unsigned *flags, 453 int *nowildcardlen) 454{ 455 const char *p = *pattern; 456 size_t i, len; 457 458 *flags = 0; 459 if (*p == '!') { 460 *flags |= PATTERN_FLAG_NEGATIVE; 461 p++; 462 } 463 len = strlen(p); 464 if (len && p[len - 1] == '/') { 465 len--; 466 *flags |= PATTERN_FLAG_MUSTBEDIR; 467 } 468 for (i = 0; i < len; i++) { 469 if (p[i] == '/') 470 break; 471 } 472 if (i == len) 473 *flags |= PATTERN_FLAG_NODIR; 474 *nowildcardlen = simple_length(p); 475 /* 476 * we should have excluded the trailing slash from 'p' too, 477 * but that's one more allocation. Instead just make sure 478 * nowildcardlen does not exceed real patternlen 479 */ 480 if (*nowildcardlen > len) 481 *nowildcardlen = len; 482 if (*p == '*' && no_wildcard(p + 1)) 483 *flags |= PATTERN_FLAG_ENDSWITH; 484 *pattern = p; 485 *patternlen = len; 486} 487 488static void trim_trailing_spaces(char *buf) 489{ 490 char *p, *last_space = NULL; 491 492 for (p = buf; *p; p++) 493 switch (*p) { 494 case ' ': 495 if (!last_space) 496 last_space = p; 497 break; 498 case '\\': 499 p++; 500 if (!*p) 501 return; 502 /* fallthrough */ 503 default: 504 last_space = NULL; 505 } 506 507 if (last_space) 508 *last_space = '\0'; 509} 510 511static int match_basename(const char *basename, int basenamelen, 512 const char *pattern, int prefix, int patternlen, 513 unsigned flags) 514{ 515 if (prefix == patternlen) { 516 if (patternlen == basenamelen && 517 !fspathncmp(pattern, basename, basenamelen)) 518 return 1; 519 } else if (flags & PATTERN_FLAG_ENDSWITH) { 520 /* "*literal" matching against "fooliteral" */ 521 if (patternlen - 1 <= basenamelen && 522 !fspathncmp(pattern + 1, 523 basename + basenamelen - (patternlen - 1), 524 patternlen - 1)) 525 return 1; 526 } else { 527 // local modification: call wildmatch() directly 528 if (!wildmatch(pattern, basename, flags)) 529 return 1; 530 } 531 return 0; 532} 533 534static int match_pathname(const char *pathname, int pathlen, 535 const char *base, int baselen, 536 const char *pattern, int prefix, int patternlen) 537{ 538 // local modification: remove local variables 539 540 /* 541 * match with FNM_PATHNAME; the pattern has base implicitly 542 * in front of it. 543 */ 544 if (*pattern == '/') { 545 pattern++; 546 patternlen--; 547 prefix--; 548 } 549 550 /* 551 * baselen does not count the trailing slash. base[] may or 552 * may not end with a trailing slash though. 553 */ 554 if (pathlen < baselen + 1 || 555 (baselen && pathname[baselen] != '/') || 556 fspathncmp(pathname, base, baselen)) 557 return 0; 558 559 // local modification: simplified because always baselen > 0 560 pathname += baselen + 1; 561 pathlen -= baselen + 1; 562 563 if (prefix) { 564 /* 565 * if the non-wildcard part is longer than the 566 * remaining pathname, surely it cannot match. 567 */ 568 if (prefix > pathlen) 569 return 0; 570 571 if (fspathncmp(pattern, pathname, prefix)) 572 return 0; 573 pattern += prefix; 574 patternlen -= prefix; 575 pathname += prefix; 576 pathlen -= prefix; 577 578 /* 579 * If the whole pattern did not have a wildcard, 580 * then our prefix match is all we need; we 581 * do not need to call fnmatch at all. 582 */ 583 if (!patternlen && !pathlen) 584 return 1; 585 } 586 587 // local modification: call wildmatch() directly 588 return !wildmatch(pattern, pathname, WM_PATHNAME); 589} 590 591// Copied from git/utf8.c 592 593static const char utf8_bom[] = "\357\273\277"; 594 595//----------------------------(IMPORT FROM GIT END)---------------------------- 596 597struct pattern { 598 unsigned int flags; 599 int nowildcardlen; 600 int patternlen; 601 int dirlen; 602 char pattern[]; 603}; 604 605static struct pattern **pattern_list; 606static int nr_patterns, alloced_patterns; 607 608// Remember the number of patterns at each directory level 609static int *nr_patterns_at; 610// Track the current/max directory level; 611static int depth, max_depth; 612static bool debug_on; 613static FILE *out_fp, *stat_fp; 614static char *prefix = ""; 615static char *progname; 616 617static void __attribute__((noreturn)) perror_exit(const char *s) 618{ 619 perror(s); 620 621 exit(EXIT_FAILURE); 622} 623 624static void __attribute__((noreturn)) error_exit(const char *fmt, ...) 625{ 626 va_list args; 627 628 fprintf(stderr, "%s: error: ", progname); 629 630 va_start(args, fmt); 631 vfprintf(stderr, fmt, args); 632 va_end(args); 633 634 exit(EXIT_FAILURE); 635} 636 637static void debug(const char *fmt, ...) 638{ 639 va_list args; 640 int i; 641 642 if (!debug_on) 643 return; 644 645 fprintf(stderr, "[DEBUG] "); 646 647 for (i = 0; i < depth * 2; i++) 648 fputc(' ', stderr); 649 650 va_start(args, fmt); 651 vfprintf(stderr, fmt, args); 652 va_end(args); 653} 654 655static void *xrealloc(void *ptr, size_t size) 656{ 657 ptr = realloc(ptr, size); 658 if (!ptr) 659 perror_exit(progname); 660 661 return ptr; 662} 663 664static void *xmalloc(size_t size) 665{ 666 return xrealloc(NULL, size); 667} 668 669// similar to last_matching_pattern_from_list() in GIT 670static bool is_ignored(const char *path, int pathlen, int dirlen, bool is_dir) 671{ 672 int i; 673 674 // Search in the reverse order because the last matching pattern wins. 675 for (i = nr_patterns - 1; i >= 0; i--) { 676 struct pattern *p = pattern_list[i]; 677 unsigned int flags = p->flags; 678 const char *gitignore_dir = p->pattern + p->patternlen + 1; 679 bool ignored; 680 681 if ((flags & PATTERN_FLAG_MUSTBEDIR) && !is_dir) 682 continue; 683 684 if (flags & PATTERN_FLAG_NODIR) { 685 if (!match_basename(path + dirlen + 1, 686 pathlen - dirlen - 1, 687 p->pattern, 688 p->nowildcardlen, 689 p->patternlen, 690 p->flags)) 691 continue; 692 } else { 693 if (!match_pathname(path, pathlen, 694 gitignore_dir, p->dirlen, 695 p->pattern, 696 p->nowildcardlen, 697 p->patternlen)) 698 continue; 699 } 700 701 debug("%s: matches %s%s%s (%s/.gitignore)\n", path, 702 flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern, 703 flags & PATTERN_FLAG_MUSTBEDIR ? "/" : "", 704 gitignore_dir); 705 706 ignored = (flags & PATTERN_FLAG_NEGATIVE) == 0; 707 if (ignored) 708 debug("Ignore: %s\n", path); 709 710 return ignored; 711 } 712 713 debug("%s: no match\n", path); 714 715 return false; 716} 717 718static void add_pattern(const char *string, const char *dir, int dirlen) 719{ 720 struct pattern *p; 721 int patternlen, nowildcardlen; 722 unsigned int flags; 723 724 parse_path_pattern(&string, &patternlen, &flags, &nowildcardlen); 725 726 if (patternlen == 0) 727 return; 728 729 p = xmalloc(sizeof(*p) + patternlen + dirlen + 2); 730 731 memcpy(p->pattern, string, patternlen); 732 p->pattern[patternlen] = 0; 733 memcpy(p->pattern + patternlen + 1, dir, dirlen); 734 p->pattern[patternlen + 1 + dirlen] = 0; 735 736 p->patternlen = patternlen; 737 p->nowildcardlen = nowildcardlen; 738 p->dirlen = dirlen; 739 p->flags = flags; 740 741 debug("Add pattern: %s%s%s\n", 742 flags & PATTERN_FLAG_NEGATIVE ? "!" : "", p->pattern, 743 flags & PATTERN_FLAG_MUSTBEDIR ? "/" : ""); 744 745 if (nr_patterns >= alloced_patterns) { 746 alloced_patterns += 128; 747 pattern_list = xrealloc(pattern_list, 748 sizeof(*pattern_list) * alloced_patterns); 749 } 750 751 pattern_list[nr_patterns++] = p; 752} 753 754// similar to add_patterns_from_buffer() in GIT 755static void add_patterns_from_gitignore(const char *dir, int dirlen) 756{ 757 struct stat st; 758 char path[PATH_MAX], *buf, *entry; 759 size_t size; 760 int fd, pathlen, i; 761 762 pathlen = snprintf(path, sizeof(path), "%s/.gitignore", dir); 763 if (pathlen >= sizeof(path)) 764 error_exit("%s: too long path was truncated\n", path); 765 766 fd = open(path, O_RDONLY | O_NOFOLLOW); 767 if (fd < 0) { 768 if (errno != ENOENT) 769 return perror_exit(path); 770 return; 771 } 772 773 if (fstat(fd, &st) < 0) 774 perror_exit(path); 775 776 size = st.st_size; 777 778 buf = xmalloc(size + 1); 779 if (read(fd, buf, st.st_size) != st.st_size) 780 perror_exit(path); 781 782 buf[st.st_size] = '\n'; 783 if (close(fd)) 784 perror_exit(path); 785 786 debug("Parse %s\n", path); 787 788 entry = buf; 789 790 // skip utf8 bom 791 if (!strncmp(entry, utf8_bom, strlen(utf8_bom))) 792 entry += strlen(utf8_bom); 793 794 for (i = entry - buf; i < size; i++) { 795 if (buf[i] == '\n') { 796 if (entry != buf + i && entry[0] != '#') { 797 buf[i - (i && buf[i-1] == '\r')] = 0; 798 trim_trailing_spaces(entry); 799 add_pattern(entry, dir, dirlen); 800 } 801 entry = buf + i + 1; 802 } 803 } 804 805 free(buf); 806} 807 808// Save the current number of patterns and increment the depth 809static void increment_depth(void) 810{ 811 if (depth >= max_depth) { 812 max_depth += 1; 813 nr_patterns_at = xrealloc(nr_patterns_at, 814 sizeof(*nr_patterns_at) * max_depth); 815 } 816 817 nr_patterns_at[depth] = nr_patterns; 818 depth++; 819} 820 821// Decrement the depth, and free up the patterns of this directory level. 822static void decrement_depth(void) 823{ 824 depth--; 825 assert(depth >= 0); 826 827 while (nr_patterns > nr_patterns_at[depth]) 828 free(pattern_list[--nr_patterns]); 829} 830 831static void print_path(const char *path) 832{ 833 // The path always starts with "./" 834 assert(strlen(path) >= 2); 835 836 // Replace the root directory with a preferred prefix. 837 // This is useful for the tar command. 838 fprintf(out_fp, "%s%s\n", prefix, path + 2); 839} 840 841static void print_stat(const char *path, struct stat *st) 842{ 843 if (!stat_fp) 844 return; 845 846 if (!S_ISREG(st->st_mode) && !S_ISLNK(st->st_mode)) 847 return; 848 849 assert(strlen(path) >= 2); 850 851 fprintf(stat_fp, "%c %9ld %10ld %s\n", 852 S_ISLNK(st->st_mode) ? 'l' : '-', 853 st->st_size, st->st_mtim.tv_sec, path + 2); 854} 855 856// Traverse the entire directory tree, parsing .gitignore files. 857// Print file paths that are not tracked by git. 858// 859// Return true if all files under the directory are ignored, false otherwise. 860static bool traverse_directory(const char *dir, int dirlen) 861{ 862 bool all_ignored = true; 863 DIR *dirp; 864 865 debug("Enter[%d]: %s\n", depth, dir); 866 increment_depth(); 867 868 add_patterns_from_gitignore(dir, dirlen); 869 870 dirp = opendir(dir); 871 if (!dirp) 872 perror_exit(dir); 873 874 while (1) { 875 struct dirent *d; 876 struct stat st; 877 char path[PATH_MAX]; 878 int pathlen; 879 bool ignored; 880 881 errno = 0; 882 d = readdir(dirp); 883 if (!d) { 884 if (errno) 885 perror_exit(dir); 886 break; 887 } 888 889 if (!strcmp(d->d_name, "..") || !strcmp(d->d_name, ".")) 890 continue; 891 892 pathlen = snprintf(path, sizeof(path), "%s/%s", dir, d->d_name); 893 if (pathlen >= sizeof(path)) 894 error_exit("%s: too long path was truncated\n", path); 895 896 if (lstat(path, &st) < 0) 897 perror_exit(path); 898 899 if ((!S_ISREG(st.st_mode) && !S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) || 900 is_ignored(path, pathlen, dirlen, S_ISDIR(st.st_mode))) { 901 ignored = true; 902 } else { 903 if (S_ISDIR(st.st_mode) && !S_ISLNK(st.st_mode)) 904 // If all the files in a directory are ignored, 905 // let's ignore that directory as well. This 906 // will avoid empty directories in the tarball. 907 ignored = traverse_directory(path, pathlen); 908 else 909 ignored = false; 910 } 911 912 if (ignored) { 913 print_path(path); 914 } else { 915 print_stat(path, &st); 916 all_ignored = false; 917 } 918 } 919 920 if (closedir(dirp)) 921 perror_exit(dir); 922 923 decrement_depth(); 924 debug("Leave[%d]: %s\n", depth, dir); 925 926 return all_ignored; 927} 928 929static void usage(void) 930{ 931 fprintf(stderr, 932 "usage: %s [options]\n" 933 "\n" 934 "Show files that are ignored by git\n" 935 "\n" 936 "options:\n" 937 " -d, --debug print debug messages to stderr\n" 938 " -e, --exclude PATTERN add the given exclude pattern\n" 939 " -h, --help show this help message and exit\n" 940 " -i, --ignore-case Ignore case differences between the patterns and the files\n" 941 " -o, --output FILE output the ignored files to a file (default: '-', i.e. stdout)\n" 942 " -p, --prefix PREFIX prefix added to each path (default: empty string)\n" 943 " -r, --rootdir DIR root of the source tree (default: current working directory)\n" 944 " -s, --stat FILE output the file stat of non-ignored files to a file\n", 945 progname); 946} 947 948static void open_output(const char *pathname, FILE **fp) 949{ 950 if (strcmp(pathname, "-")) { 951 *fp = fopen(pathname, "w"); 952 if (!*fp) 953 perror_exit(pathname); 954 } else { 955 *fp = stdout; 956 } 957} 958 959static void close_output(const char *pathname, FILE *fp) 960{ 961 fflush(fp); 962 963 if (ferror(fp)) 964 error_exit("not all data was written to the output\n"); 965 966 if (fclose(fp)) 967 perror_exit(pathname); 968} 969 970int main(int argc, char *argv[]) 971{ 972 const char *output = "-"; 973 const char *rootdir = "."; 974 const char *stat = NULL; 975 976 progname = strrchr(argv[0], '/'); 977 if (progname) 978 progname++; 979 else 980 progname = argv[0]; 981 982 while (1) { 983 static struct option long_options[] = { 984 {"debug", no_argument, NULL, 'd'}, 985 {"help", no_argument, NULL, 'h'}, 986 {"ignore-case", no_argument, NULL, 'i'}, 987 {"output", required_argument, NULL, 'o'}, 988 {"prefix", required_argument, NULL, 'p'}, 989 {"rootdir", required_argument, NULL, 'r'}, 990 {"stat", required_argument, NULL, 's'}, 991 {"exclude", required_argument, NULL, 'x'}, 992 {}, 993 }; 994 995 int c = getopt_long(argc, argv, "dhino:p:r:s:x:", long_options, NULL); 996 997 if (c == -1) 998 break; 999 1000 switch (c) { 1001 case 'd': 1002 debug_on = true; 1003 break; 1004 case 'h': 1005 usage(); 1006 exit(0); 1007 case 'i': 1008 ignore_case = true; 1009 break; 1010 case 'o': 1011 output = optarg; 1012 break; 1013 case 'p': 1014 prefix = optarg; 1015 break; 1016 case 'r': 1017 rootdir = optarg; 1018 break; 1019 case 's': 1020 stat = optarg; 1021 break; 1022 case 'x': 1023 add_pattern(optarg, ".", strlen(".")); 1024 break; 1025 case '?': 1026 usage(); 1027 /* fallthrough */ 1028 default: 1029 exit(EXIT_FAILURE); 1030 } 1031 } 1032 1033 open_output(output, &out_fp); 1034 if (stat && stat[0]) 1035 open_output(stat, &stat_fp); 1036 1037 if (chdir(rootdir)) 1038 perror_exit(rootdir); 1039 1040 add_pattern(".git/", ".", strlen(".")); 1041 1042 if (traverse_directory(".", strlen("."))) 1043 print_path("./"); 1044 1045 assert(depth == 0); 1046 1047 while (nr_patterns > 0) 1048 free(pattern_list[--nr_patterns]); 1049 free(pattern_list); 1050 free(nr_patterns_at); 1051 1052 close_output(output, out_fp); 1053 if (stat_fp) 1054 close_output(stat, stat_fp); 1055 1056 return 0; 1057}