Git fork
at reftables-rust 313 lines 9.6 kB view raw
1/* 2 * Copyright (C) 2005 Junio C Hamano 3 */ 4 5#define USE_THE_REPOSITORY_VARIABLE 6 7#include "git-compat-util.h" 8#include "diffcore.h" 9#include "hash.h" 10#include "object.h" 11#include "promisor-remote.h" 12 13static int should_break(struct repository *r, 14 struct diff_filespec *src, 15 struct diff_filespec *dst, 16 int break_score, 17 int *merge_score_p) 18{ 19 /* dst is recorded as a modification of src. Are they so 20 * different that we are better off recording this as a pair 21 * of delete and create? 22 * 23 * There are two criteria used in this algorithm. For the 24 * purposes of helping later rename/copy, we take both delete 25 * and insert into account and estimate the amount of "edit". 26 * If the edit is very large, we break this pair so that 27 * rename/copy can pick the pieces up to match with other 28 * files. 29 * 30 * On the other hand, we would want to ignore inserts for the 31 * pure "complete rewrite" detection. As long as most of the 32 * existing contents were removed from the file, it is a 33 * complete rewrite, and if sizable chunk from the original 34 * still remains in the result, it is not a rewrite. It does 35 * not matter how much or how little new material is added to 36 * the file. 37 * 38 * The score we leave for such a broken filepair uses the 39 * latter definition so that later clean-up stage can find the 40 * pieces that should not have been broken according to the 41 * latter definition after rename/copy runs, and merge the 42 * broken pair that have a score lower than given criteria 43 * back together. The break operation itself happens 44 * according to the former definition. 45 * 46 * The minimum_edit parameter tells us when to break (the 47 * amount of "edit" required for us to consider breaking the 48 * pair). We leave the amount of deletion in *merge_score_p 49 * when we return. 50 * 51 * The value we return is 1 if we want the pair to be broken, 52 * or 0 if we do not. 53 */ 54 unsigned long delta_size, max_size; 55 unsigned long src_copied, literal_added, src_removed; 56 57 struct diff_populate_filespec_options options = { 0 }; 58 59 *merge_score_p = 0; /* assume no deletion --- "do not break" 60 * is the default. 61 */ 62 63 if (S_ISREG(src->mode) != S_ISREG(dst->mode)) { 64 *merge_score_p = (int)MAX_SCORE; 65 return 1; /* even their types are different */ 66 } 67 68 if (src->oid_valid && dst->oid_valid && 69 oideq(&src->oid, &dst->oid)) 70 return 0; /* they are the same */ 71 72 if (r == the_repository && repo_has_promisor_remote(the_repository)) { 73 options.missing_object_cb = diff_queued_diff_prefetch; 74 options.missing_object_data = r; 75 } 76 77 if (diff_populate_filespec(r, src, &options) || 78 diff_populate_filespec(r, dst, &options)) 79 return 0; /* error but caught downstream */ 80 81 max_size = ((src->size > dst->size) ? src->size : dst->size); 82 if (max_size < MINIMUM_BREAK_SIZE) 83 return 0; /* we do not break too small filepair */ 84 85 if (!src->size) 86 return 0; /* we do not let empty files get renamed */ 87 88 if (diffcore_count_changes(r, src, dst, 89 &src->cnt_data, &dst->cnt_data, 90 &src_copied, &literal_added)) 91 return 0; 92 93 /* sanity */ 94 if (src->size < src_copied) 95 src_copied = src->size; 96 if (dst->size < literal_added + src_copied) { 97 if (src_copied < dst->size) 98 literal_added = dst->size - src_copied; 99 else 100 literal_added = 0; 101 } 102 src_removed = src->size - src_copied; 103 104 /* Compute merge-score, which is "how much is removed 105 * from the source material". The clean-up stage will 106 * merge the surviving pair together if the score is 107 * less than the minimum, after rename/copy runs. 108 */ 109 *merge_score_p = (int)(src_removed * MAX_SCORE / src->size); 110 if (*merge_score_p > break_score) 111 return 1; 112 113 /* Extent of damage, which counts both inserts and 114 * deletes. 115 */ 116 delta_size = src_removed + literal_added; 117 if (delta_size * MAX_SCORE / max_size < break_score) 118 return 0; 119 120 /* If you removed a lot without adding new material, that is 121 * not really a rewrite. 122 */ 123 if ((src->size * break_score < src_removed * MAX_SCORE) && 124 (literal_added * 20 < src_removed) && 125 (literal_added * 20 < src_copied)) 126 return 0; 127 128 return 1; 129} 130 131void diffcore_break(struct repository *r, int break_score) 132{ 133 struct diff_queue_struct *q = &diff_queued_diff; 134 struct diff_queue_struct outq = DIFF_QUEUE_INIT; 135 136 /* When the filepair has this much edit (insert and delete), 137 * it is first considered to be a rewrite and broken into a 138 * create and delete filepair. This is to help breaking a 139 * file that had too much new stuff added, possibly from 140 * moving contents from another file, so that rename/copy can 141 * match it with the other file. 142 * 143 * int break_score; we reuse incoming parameter for this. 144 */ 145 146 /* After a pair is broken according to break_score and 147 * subjected to rename/copy, both of them may survive intact, 148 * due to lack of suitable rename/copy peer. Or, the caller 149 * may be calling us without using rename/copy. When that 150 * happens, we merge the broken pieces back into one 151 * modification together if the pair did not have more than 152 * this much delete. For this computation, we do not take 153 * insert into account at all. If you start from a 100-line 154 * file and delete 97 lines of it, it does not matter if you 155 * add 27 lines to it to make a new 30-line file or if you add 156 * 997 lines to it to make a 1000-line file. Either way what 157 * you did was a rewrite of 97%. On the other hand, if you 158 * delete 3 lines, keeping 97 lines intact, it does not matter 159 * if you add 3 lines to it to make a new 100-line file or if 160 * you add 903 lines to it to make a new 1000-line file. 161 * Either way you did a lot of additions and not a rewrite. 162 * This merge happens to catch the latter case. A merge_score 163 * of 80% would be a good default value (a broken pair that 164 * has score lower than merge_score will be merged back 165 * together). 166 */ 167 int merge_score; 168 int i; 169 170 /* See comment on DEFAULT_BREAK_SCORE and 171 * DEFAULT_MERGE_SCORE in diffcore.h 172 */ 173 merge_score = (break_score >> 16) & 0xFFFF; 174 break_score = (break_score & 0xFFFF); 175 176 if (!break_score) 177 break_score = DEFAULT_BREAK_SCORE; 178 if (!merge_score) 179 merge_score = DEFAULT_MERGE_SCORE; 180 181 for (i = 0; i < q->nr; i++) { 182 struct diff_filepair *p = q->queue[i]; 183 int score; 184 185 /* 186 * We deal only with in-place edit of blobs. 187 * We do not break anything else. 188 */ 189 if (DIFF_FILE_VALID(p->one) && DIFF_FILE_VALID(p->two) && 190 object_type(p->one->mode) == OBJ_BLOB && 191 object_type(p->two->mode) == OBJ_BLOB && 192 !strcmp(p->one->path, p->two->path)) { 193 if (should_break(r, p->one, p->two, 194 break_score, &score)) { 195 /* Split this into delete and create */ 196 struct diff_filespec *null_one, *null_two; 197 struct diff_filepair *dp; 198 199 /* Set score to 0 for the pair that 200 * needs to be merged back together 201 * should they survive rename/copy. 202 * Also we do not want to break very 203 * small files. 204 */ 205 if (score < merge_score) 206 score = 0; 207 208 /* deletion of one */ 209 null_one = alloc_filespec(p->one->path); 210 dp = diff_queue(&outq, p->one, null_one); 211 dp->score = score; 212 dp->broken_pair = 1; 213 214 /* creation of two */ 215 null_two = alloc_filespec(p->two->path); 216 dp = diff_queue(&outq, null_two, p->two); 217 dp->score = score; 218 dp->broken_pair = 1; 219 220 diff_free_filespec_blob(p->one); 221 diff_free_filespec_blob(p->two); 222 free(p); /* not diff_free_filepair(), we are 223 * reusing one and two here. 224 */ 225 continue; 226 } 227 } 228 diff_free_filespec_data(p->one); 229 diff_free_filespec_data(p->two); 230 diff_q(&outq, p); 231 } 232 free(q->queue); 233 *q = outq; 234 235 return; 236} 237 238static void merge_broken(struct diff_filepair *p, 239 struct diff_filepair *pp, 240 struct diff_queue_struct *outq) 241{ 242 /* p and pp are broken pairs we want to merge */ 243 struct diff_filepair *c = p, *d = pp, *dp; 244 if (DIFF_FILE_VALID(p->one)) { 245 /* this must be a delete half */ 246 d = p; c = pp; 247 } 248 /* Sanity check */ 249 if (!DIFF_FILE_VALID(d->one)) 250 die("internal error in merge #1"); 251 if (DIFF_FILE_VALID(d->two)) 252 die("internal error in merge #2"); 253 if (DIFF_FILE_VALID(c->one)) 254 die("internal error in merge #3"); 255 if (!DIFF_FILE_VALID(c->two)) 256 die("internal error in merge #4"); 257 258 dp = diff_queue(outq, d->one, c->two); 259 dp->score = p->score; 260 /* 261 * We will be one extra user of the same src side of the 262 * broken pair, if it was used as the rename source for other 263 * paths elsewhere. Increment to mark that the path stays 264 * in the resulting tree. 265 */ 266 d->one->rename_used++; 267 free_filespec(d->two); 268 free_filespec(c->one); 269 free(d); 270 free(c); 271} 272 273void diffcore_merge_broken(void) 274{ 275 struct diff_queue_struct *q = &diff_queued_diff; 276 struct diff_queue_struct outq = DIFF_QUEUE_INIT; 277 int i, j; 278 279 for (i = 0; i < q->nr; i++) { 280 struct diff_filepair *p = q->queue[i]; 281 if (!p) 282 /* we already merged this with its peer */ 283 continue; 284 else if (p->broken_pair && 285 !strcmp(p->one->path, p->two->path)) { 286 /* If the peer also survived rename/copy, then 287 * we merge them back together. 288 */ 289 for (j = i + 1; j < q->nr; j++) { 290 struct diff_filepair *pp = q->queue[j]; 291 if (pp->broken_pair && 292 !strcmp(pp->one->path, pp->two->path) && 293 !strcmp(p->one->path, pp->two->path)) { 294 /* Peer survived. Merge them */ 295 merge_broken(p, pp, &outq); 296 q->queue[j] = NULL; 297 goto next; 298 } 299 } 300 /* The peer did not survive, so we keep 301 * it in the output. 302 */ 303 diff_q(&outq, p); 304 } 305 else 306 diff_q(&outq, p); 307next:; 308 } 309 free(q->queue); 310 *q = outq; 311 312 return; 313}