Reactos
1/*
2 * PROJECT: ReactOS FC Command
3 * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4 * PURPOSE: Comparing text files
5 * COPYRIGHT: Copyright 2021 Katayama Hirofumi MZ (katayama.hirofumi.mz@gmail.com)
6 */
7
8#include "fc.h"
9
10#define IS_SPACE(ch) ((ch) == TEXT(' ') || (ch) == TEXT('\t'))
11
12#ifdef UNICODE
13 #define NODE NODE_W
14 #define PrintLine PrintLineW
15 #define TextCompare TextCompareW
16#else
17 #define NODE NODE_A
18 #define PrintLine PrintLineA
19 #define TextCompare TextCompareA
20#endif
21
22static LPTSTR AllocLine(LPCTSTR pch, DWORD cch)
23{
24 LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
25 if (!pszNew)
26 return NULL;
27 memcpy(pszNew, pch, cch * sizeof(TCHAR));
28 pszNew[cch] = 0;
29 return pszNew;
30}
31
32static NODE *AllocNode(LPTSTR psz, DWORD lineno)
33{
34 NODE *node;
35 if (!psz)
36 return NULL;
37 node = calloc(1, sizeof(NODE));
38 if (!node)
39 {
40 free(psz);
41 return NULL;
42 }
43 node->pszLine = psz;
44 node->lineno = lineno;
45 return node;
46}
47
48static __inline VOID DeleteNode(NODE *node)
49{
50 if (node)
51 {
52 free(node->pszLine);
53 free(node->pszComp);
54 free(node);
55 }
56}
57
58static VOID DeleteList(struct list *list)
59{
60 struct list *ptr;
61 NODE *node;
62 while ((ptr = list_head(list)) != NULL)
63 {
64 list_remove(ptr);
65 node = LIST_ENTRY(ptr, NODE, entry);
66 DeleteNode(node);
67 }
68}
69
70static __inline LPCTSTR SkipSpace(LPCTSTR pch)
71{
72 while (IS_SPACE(*pch))
73 ++pch;
74 return pch;
75}
76
77static __inline LPCTSTR FindLastNonSpace(LPCTSTR pch)
78{
79 LPCTSTR pchLast = NULL;
80 while (*pch)
81 {
82 if (!IS_SPACE(*pch))
83 pchLast = pch;
84 ++pch;
85 }
86 return pchLast;
87}
88
89static VOID DeleteDuplicateSpaces(LPTSTR psz)
90{
91 LPTSTR pch0, pch1;
92 for (pch0 = pch1 = psz; *pch0; ++pch0)
93 {
94 *pch1++ = *pch0;
95 if (IS_SPACE(*pch0))
96 {
97 do
98 {
99 ++pch0;
100 } while (IS_SPACE(*pch0));
101 --pch0;
102 }
103 }
104 *pch1 = 0;
105}
106
107static LPTSTR CompressSpace(LPCTSTR line)
108{
109 LPTSTR pszNew;
110 LPCTSTR pchLast;
111
112 line = SkipSpace(line);
113 pchLast = FindLastNonSpace(line);
114 if (pchLast == NULL)
115 return AllocLine(NULL, 0);
116
117 pszNew = AllocLine(line, (DWORD)(pchLast - line) + 1);
118 if (!pszNew)
119 return NULL;
120
121 DeleteDuplicateSpaces(pszNew);
122 return pszNew;
123}
124
125#define TAB_WIDTH 8
126
127static INT ExpandTabLength(LPCTSTR line)
128{
129 LPCTSTR pch;
130 INT cch = 0;
131 for (pch = line; *pch; ++pch)
132 {
133 if (*pch == TEXT('\t'))
134 cch += TAB_WIDTH - (cch % TAB_WIDTH);
135 else
136 ++cch;
137 }
138 return cch;
139}
140
141static LPTSTR ExpandTab(LPCTSTR line)
142{
143 INT spaces, cch = ExpandTabLength(line), ich;
144 LPTSTR pszNew = malloc((cch + 1) * sizeof(TCHAR));
145 LPCTSTR pch;
146 if (!pszNew)
147 return NULL;
148 ich = 0;
149 for (pch = line; *pch; ++pch)
150 {
151 if (*pch == TEXT('\t'))
152 {
153 spaces = TAB_WIDTH - (ich % TAB_WIDTH);
154 while (spaces-- > 0)
155 {
156 pszNew[ich++] = TEXT(' ');
157 }
158 }
159 else
160 {
161 pszNew[ich++] = *pch;
162 }
163 }
164 pszNew[ich] = 0;
165 return pszNew;
166}
167
168#define HASH_EOF 0xFFFFFFFF
169#define HASH_MASK 0x7FFFFFFF
170
171static DWORD GetHash(LPCTSTR psz, BOOL bIgnoreCase)
172{
173 DWORD ret = 0xDEADFACE;
174 while (*psz)
175 {
176 ret += (bIgnoreCase ? towupper(*psz) : *psz);
177 ret <<= 2;
178 ++psz;
179 }
180 return (ret & HASH_MASK);
181}
182
183static NODE *AllocEOFNode(DWORD lineno)
184{
185 NODE *node = AllocNode(AllocLine(NULL, 0), 0);
186 if (node == NULL)
187 return NULL;
188 node->pszComp = AllocLine(NULL, 0);
189 if (node->pszComp == NULL)
190 {
191 DeleteNode(node);
192 return NULL;
193 }
194 node->lineno = lineno;
195 node->hash = HASH_EOF;
196 return node;
197}
198
199static __inline BOOL IsEOFNode(NODE *node)
200{
201 return !node || node->hash == HASH_EOF;
202}
203
204static BOOL ConvertNode(const FILECOMPARE *pFC, NODE *node)
205{
206 if (!(pFC->dwFlags & FLAG_T))
207 {
208 LPTSTR tmp = ExpandTab(node->pszLine);
209 if (!tmp)
210 return FALSE;
211 free(node->pszLine);
212 node->pszLine = tmp;
213 if (!(pFC->dwFlags & FLAG_W))
214 node->hash = GetHash(node->pszLine, !!(pFC->dwFlags & FLAG_C));
215 }
216 if (pFC->dwFlags & FLAG_W)
217 {
218 node->pszComp = CompressSpace(node->pszLine);
219 if (!node->pszComp)
220 return FALSE;
221 node->hash = GetHash(node->pszComp, !!(pFC->dwFlags & FLAG_C));
222 }
223 return TRUE;
224}
225
226static FCRET CompareNode(const FILECOMPARE *pFC, const NODE *node0, const NODE *node1)
227{
228 DWORD dwCmpFlags;
229 LPTSTR psz0, psz1;
230 INT ret;
231 if (node0->hash != node1->hash)
232 return FCRET_DIFFERENT;
233
234 psz0 = (pFC->dwFlags & FLAG_W) ? node0->pszComp : node0->pszLine;
235 psz1 = (pFC->dwFlags & FLAG_W) ? node1->pszComp : node1->pszLine;
236 dwCmpFlags = ((pFC->dwFlags & FLAG_C) ? NORM_IGNORECASE : 0);
237 ret = CompareString(LOCALE_USER_DEFAULT, dwCmpFlags, psz0, -1, psz1, -1);
238 return (ret == CSTR_EQUAL) ? FCRET_IDENTICAL : FCRET_DIFFERENT;
239}
240
241static BOOL FindNextLine(LPCTSTR pch, DWORD ich, DWORD cch, LPDWORD pich)
242{
243 while (ich < cch)
244 {
245 if (pch[ich] == TEXT('\n') || pch[ich] == TEXT('\0'))
246 {
247 *pich = ich;
248 return TRUE;
249 }
250 ++ich;
251 }
252 *pich = cch;
253 return FALSE;
254}
255
256static FCRET
257ParseLines(const FILECOMPARE *pFC, HANDLE *phMapping,
258 LARGE_INTEGER *pib, const LARGE_INTEGER *pcb, struct list *list)
259{
260 DWORD lineno = 1, ich, cch, ichNext, cbView, cchNode;
261 LPTSTR psz, pszLine;
262 BOOL fLast, bCR;
263 NODE *node;
264
265 if (*phMapping == NULL)
266 return FCRET_NO_MORE_DATA;
267
268 if (pib->QuadPart >= pcb->QuadPart)
269 {
270 CloseHandle(*phMapping);
271 *phMapping = NULL;
272 return FCRET_NO_MORE_DATA;
273 }
274
275 cbView = (DWORD)min(pcb->QuadPart - pib->QuadPart, MAX_VIEW_SIZE);
276 psz = MapViewOfFile(*phMapping, FILE_MAP_READ, pib->HighPart, pib->LowPart, cbView);
277 if (!psz)
278 {
279 return OutOfMemory();
280 }
281
282 ich = 0;
283 cch = cbView / sizeof(TCHAR);
284 fLast = (pib->QuadPart + cbView >= pcb->QuadPart);
285 while (ich < cch &&
286 (FindNextLine(psz, ich, cch, &ichNext) ||
287 (ichNext == cch && (fLast || ich == 0))))
288 {
289 bCR = (ichNext > 0) && (psz[ichNext - 1] == TEXT('\r'));
290 cchNode = ichNext - ich - bCR;
291 pszLine = AllocLine(&psz[ich], cchNode);
292 node = AllocNode(pszLine, lineno++);
293 if (!node || !ConvertNode(pFC, node))
294 {
295 DeleteNode(node);
296 UnmapViewOfFile(psz);
297 return OutOfMemory();
298 }
299 list_add_tail(list, &node->entry);
300 ich = ichNext + 1;
301 }
302
303 UnmapViewOfFile(psz);
304
305 pib->QuadPart += ichNext * sizeof(WCHAR);
306
307 if (pib->QuadPart < pcb->QuadPart)
308 return FCRET_IDENTICAL;
309
310 // append EOF node
311 node = AllocEOFNode(lineno);
312 if (!node)
313 return OutOfMemory();
314 list_add_tail(list, &node->entry);
315
316 return FCRET_NO_MORE_DATA;
317}
318
319static VOID
320ShowDiff(FILECOMPARE *pFC, INT i, struct list *begin, struct list *end)
321{
322 NODE* node;
323 struct list *list = &pFC->list[i];
324 struct list *first = NULL, *last = NULL;
325 PrintCaption(pFC->file[i]);
326 if (begin && end && list_prev(list, begin))
327 begin = list_prev(list, begin);
328 while (begin != end)
329 {
330 node = LIST_ENTRY(begin, NODE, entry);
331 if (IsEOFNode(node))
332 break;
333 if (!first)
334 first = begin;
335 last = begin;
336 if (!(pFC->dwFlags & FLAG_A))
337 PrintLine(pFC, node->lineno, node->pszLine);
338 begin = list_next(list, begin);
339 }
340 if ((pFC->dwFlags & FLAG_A) && first)
341 {
342 node = LIST_ENTRY(first, NODE, entry);
343 PrintLine(pFC, node->lineno, node->pszLine);
344 first = list_next(list, first);
345 if (first != last)
346 {
347 if (list_next(list, first) == last)
348 {
349 node = LIST_ENTRY(first, NODE, entry);
350 PrintLine(pFC, node->lineno, node->pszLine);
351 }
352 else
353 {
354 PrintDots();
355 }
356 }
357 node = LIST_ENTRY(last, NODE, entry);
358 PrintLine(pFC, node->lineno, node->pszLine);
359 }
360}
361
362static VOID
363SkipIdentical(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
364{
365 struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
366 while (ptr0 && ptr1)
367 {
368 NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
369 NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
370 if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
371 break;
372 ptr0 = list_next(&pFC->list[0], ptr0);
373 ptr1 = list_next(&pFC->list[1], ptr1);
374 }
375 *pptr0 = ptr0;
376 *pptr1 = ptr1;
377}
378
379static DWORD
380SkipIdenticalN(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
381 DWORD nnnn, DWORD lineno0, DWORD lineno1)
382{
383 struct list *ptr0 = *pptr0, *ptr1 = *pptr1;
384 DWORD count = 0;
385 while (ptr0 && ptr1)
386 {
387 NODE *node0 = LIST_ENTRY(ptr0, NODE, entry);
388 NODE *node1 = LIST_ENTRY(ptr1, NODE, entry);
389 if (node0->lineno >= lineno0)
390 break;
391 if (node1->lineno >= lineno1)
392 break;
393 if (CompareNode(pFC, node0, node1) != FCRET_IDENTICAL)
394 break;
395 ptr0 = list_next(&pFC->list[0], ptr0);
396 ptr1 = list_next(&pFC->list[1], ptr1);
397 ++count;
398 if (count >= nnnn)
399 break;
400 }
401 *pptr0 = ptr0;
402 *pptr1 = ptr1;
403 return count;
404}
405
406static FCRET
407ScanDiff(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1,
408 DWORD lineno0, DWORD lineno1)
409{
410 struct list *ptr0 = *pptr0, *ptr1 = *pptr1, *tmp0, *tmp1;
411 NODE *node0, *node1;
412 INT count;
413 while (ptr0 && ptr1)
414 {
415 node0 = LIST_ENTRY(ptr0, NODE, entry);
416 node1 = LIST_ENTRY(ptr1, NODE, entry);
417 if (node0->lineno >= lineno0)
418 return FCRET_DIFFERENT;
419 if (node1->lineno >= lineno1)
420 return FCRET_DIFFERENT;
421 tmp0 = ptr0;
422 tmp1 = ptr1;
423 count = SkipIdenticalN(pFC, &tmp0, &tmp1, pFC->nnnn, lineno0, lineno1);
424 if (count >= pFC->nnnn)
425 break;
426 if (count > 0)
427 {
428 ptr0 = tmp0;
429 ptr1 = tmp1;
430 }
431 else
432 {
433 ptr0 = list_next(&pFC->list[0], ptr0);
434 ptr1 = list_next(&pFC->list[1], ptr1);
435 }
436 }
437 *pptr0 = ptr0;
438 *pptr1 = ptr1;
439 return FCRET_IDENTICAL;
440}
441
442static FCRET
443Resync(FILECOMPARE *pFC, struct list **pptr0, struct list **pptr1)
444{
445 FCRET ret;
446 struct list *ptr0, *ptr1, *save0 = NULL, *save1 = NULL;
447 NODE *node0, *node1;
448 struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
449 DWORD lineno0, lineno1;
450 INT penalty, i0, i1, min_penalty = MAXLONG;
451
452 node0 = LIST_ENTRY(*pptr0, NODE, entry);
453 node1 = LIST_ENTRY(*pptr1, NODE, entry);
454 lineno0 = node0->lineno + pFC->n;
455 lineno1 = node1->lineno + pFC->n;
456
457 // ``If the files that you are comparing have more than pFC->n consecutive
458 // differing lines, FC cancels the comparison,,
459 // ``If the number of matching lines in the files is less than pFC->nnnn,
460 // FC displays the matching lines as differences,,
461 for (ptr1 = list_next(list1, *pptr1), i1 = 0; ptr1; ptr1 = list_next(list1, ptr1), ++i1)
462 {
463 node1 = LIST_ENTRY(ptr1, NODE, entry);
464 if (node1->lineno >= lineno1)
465 break;
466 for (ptr0 = list_next(list0, *pptr0), i0 = 0; ptr0; ptr0 = list_next(list0, ptr0), ++i0)
467 {
468 node0 = LIST_ENTRY(ptr0, NODE, entry);
469 if (node0->lineno >= lineno0)
470 break;
471 if (CompareNode(pFC, node0, node1) == FCRET_IDENTICAL)
472 {
473 penalty = min(i0, i1) + abs(i1 - i0);
474 if (min_penalty > penalty)
475 {
476 min_penalty = penalty;
477 save0 = ptr0;
478 save1 = ptr1;
479 }
480 }
481 }
482 }
483
484 if (save0 && save1)
485 {
486 *pptr0 = save0;
487 *pptr1 = save1;
488 ret = ScanDiff(pFC, &save0, &save1, lineno0, lineno1);
489 if (save0 && save1)
490 {
491 *pptr0 = save0;
492 *pptr1 = save1;
493 }
494 return ret;
495 }
496
497 for (ptr0 = *pptr0; ptr0; ptr0 = list_next(list0, ptr0))
498 {
499 node0 = LIST_ENTRY(ptr0, NODE, entry);
500 if (node0->lineno == lineno0)
501 break;
502 }
503 for (ptr1 = *pptr1; ptr1; ptr1 = list_next(list1, ptr1))
504 {
505 node1 = LIST_ENTRY(ptr1, NODE, entry);
506 if (node1->lineno == lineno1)
507 break;
508 }
509 *pptr0 = ptr0;
510 *pptr1 = ptr1;
511 return FCRET_DIFFERENT;
512}
513
514static FCRET
515Finalize(FILECOMPARE* pFC, struct list *ptr0, struct list* ptr1, BOOL fDifferent)
516{
517 if (!ptr0 && !ptr1)
518 {
519 if (fDifferent)
520 return Different(pFC->file[0], pFC->file[1]);
521 return NoDifference();
522 }
523 else
524 {
525 ShowDiff(pFC, 0, ptr0, NULL);
526 ShowDiff(pFC, 1, ptr1, NULL);
527 PrintEndOfDiff();
528 return FCRET_DIFFERENT;
529 }
530}
531
532// FIXME: "cmd_apitest fc" has some failures.
533FCRET TextCompare(FILECOMPARE *pFC, HANDLE *phMapping0, const LARGE_INTEGER *pcb0,
534 HANDLE *phMapping1, const LARGE_INTEGER *pcb1)
535{
536 FCRET ret, ret0, ret1;
537 struct list *ptr0, *ptr1, *save0, *save1, *next0, *next1;
538 NODE* node0, * node1;
539 BOOL fDifferent = FALSE;
540 LARGE_INTEGER ib0 = { .QuadPart = 0 }, ib1 = { .QuadPart = 0 };
541 struct list *list0 = &pFC->list[0], *list1 = &pFC->list[1];
542 list_init(list0);
543 list_init(list1);
544
545 do
546 {
547 ret0 = ParseLines(pFC, phMapping0, &ib0, pcb0, list0);
548 if (ret0 == FCRET_INVALID)
549 {
550 ret = ret0;
551 goto cleanup;
552 }
553 ret1 = ParseLines(pFC, phMapping1, &ib1, pcb1, list1);
554 if (ret1 == FCRET_INVALID)
555 {
556 ret = ret1;
557 goto cleanup;
558 }
559
560 ptr0 = list_head(list0);
561 ptr1 = list_head(list1);
562 for (;;)
563 {
564 if (!ptr0 || !ptr1)
565 goto quit;
566
567 // skip identical (sync'ed)
568 SkipIdentical(pFC, &ptr0, &ptr1);
569 if (ptr0 || ptr1)
570 fDifferent = TRUE;
571 node0 = LIST_ENTRY(ptr0, NODE, entry);
572 node1 = LIST_ENTRY(ptr1, NODE, entry);
573 if (IsEOFNode(node0) || IsEOFNode(node1))
574 goto quit;
575
576 // try to resync
577 save0 = ptr0;
578 save1 = ptr1;
579 ret = Resync(pFC, &ptr0, &ptr1);
580 if (ret == FCRET_INVALID)
581 goto cleanup;
582 if (ret == FCRET_DIFFERENT)
583 {
584 // resync failed
585 ret = ResyncFailed();
586 // show the difference
587 ShowDiff(pFC, 0, save0, ptr0);
588 ShowDiff(pFC, 1, save1, ptr1);
589 PrintEndOfDiff();
590 goto cleanup;
591 }
592
593 // show the difference
594 fDifferent = TRUE;
595 next0 = ptr0 ? list_next(list0, ptr0) : ptr0;
596 next1 = ptr1 ? list_next(list1, ptr1) : ptr1;
597 ShowDiff(pFC, 0, save0, (next0 ? next0 : ptr0));
598 ShowDiff(pFC, 1, save1, (next1 ? next1 : ptr1));
599 PrintEndOfDiff();
600
601 // now resync'ed
602 }
603 } while (ret0 != FCRET_NO_MORE_DATA || ret1 != FCRET_NO_MORE_DATA);
604
605quit:
606 ret = Finalize(pFC, ptr0, ptr1, fDifferent);
607cleanup:
608 DeleteList(list0);
609 DeleteList(list1);
610 return ret;
611}