jcs's openbsd hax
openbsd
1/* $OpenBSD: coll.c,v 1.13 2024/09/20 02:00:46 jsg Exp $ */
2
3/*-
4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org>
5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com>
6 * All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <sys/types.h>
31
32#include <errno.h>
33#include <err.h>
34#include <langinfo.h>
35#include <limits.h>
36#include <math.h>
37#include <md5.h>
38#include <stdlib.h>
39#include <string.h>
40#include <wchar.h>
41#include <wctype.h>
42
43#include "coll.h"
44#include "vsort.h"
45
46struct key_specs *keys;
47size_t keys_num = 0;
48
49static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset);
50static int gnumcoll(struct key_value*, struct key_value *, size_t offset);
51static int monthcoll(struct key_value*, struct key_value *, size_t offset);
52static int numcoll(struct key_value*, struct key_value *, size_t offset);
53static int hnumcoll(struct key_value*, struct key_value *, size_t offset);
54static int randomcoll(struct key_value*, struct key_value *, size_t offset);
55static int versioncoll(struct key_value*, struct key_value *, size_t offset);
56
57/*
58 * Allocate keys array
59 */
60struct keys_array *
61keys_array_alloc(void)
62{
63 struct keys_array *ka;
64 size_t sz;
65
66 sz = keys_array_size();
67 ka = sort_calloc(1, sz);
68
69 return ka;
70}
71
72/*
73 * Calculate whether we need key hint space
74 */
75static size_t
76key_hint_size(void)
77{
78 return need_hint ? sizeof(struct key_hint) : 0;
79}
80
81/*
82 * Calculate keys array size
83 */
84size_t
85keys_array_size(void)
86{
87 return keys_num * (sizeof(struct key_value) + key_hint_size());
88}
89
90/*
91 * Clean data of keys array
92 */
93void
94clean_keys_array(const struct bwstring *s, struct keys_array *ka)
95{
96 if (ka) {
97 size_t i;
98
99 for (i = 0; i < keys_num; ++i)
100 if (ka->key[i].k && ka->key[i].k != s)
101 bwsfree(ka->key[i].k);
102 memset(ka, 0, keys_array_size());
103 }
104}
105
106/*
107 * Set value of a key in the keys set
108 */
109void
110set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind)
111{
112 if (ka && keys_num > ind) {
113 struct key_value *kv;
114
115 kv = &(ka->key[ind]);
116
117 if (kv->k != s)
118 bwsfree(kv->k);
119 kv->k = s;
120 }
121}
122
123/*
124 * Initialize a sort list item
125 */
126struct sort_list_item *
127sort_list_item_alloc(void)
128{
129 struct sort_list_item *si;
130 size_t sz;
131
132 sz = sizeof(struct sort_list_item) + keys_array_size();
133 si = sort_calloc(1, sz);
134
135 return si;
136}
137
138size_t
139sort_list_item_size(struct sort_list_item *si)
140{
141 size_t i, ret = 0;
142
143 if (si) {
144 ret = sizeof(struct sort_list_item) + keys_array_size();
145 if (si->str)
146 ret += bws_memsize(si->str);
147 for (i = 0; i < keys_num; ++i) {
148 struct key_value *kv;
149
150 kv = &(si->ka.key[i]);
151
152 if (kv->k != si->str)
153 ret += bws_memsize(kv->k);
154 }
155 }
156 return ret;
157}
158
159/*
160 * Calculate key for a sort list item
161 */
162static void
163sort_list_item_make_key(struct sort_list_item *si)
164{
165 preproc(si->str, &(si->ka));
166}
167
168/*
169 * Set value of a sort list item.
170 * Return combined string and keys memory size.
171 */
172void
173sort_list_item_set(struct sort_list_item *si, struct bwstring *str)
174{
175 if (si) {
176 clean_keys_array(si->str, &(si->ka));
177 if (si->str) {
178 if (si->str == str) {
179 /* we are trying to reset the same string */
180 return;
181 } else {
182 bwsfree(si->str);
183 si->str = NULL;
184 }
185 }
186 si->str = str;
187 sort_list_item_make_key(si);
188 }
189}
190
191/*
192 * De-allocate a sort list item object memory
193 */
194void
195sort_list_item_clean(struct sort_list_item *si)
196{
197 if (si) {
198 clean_keys_array(si->str, &(si->ka));
199 if (si->str) {
200 bwsfree(si->str);
201 si->str = NULL;
202 }
203 }
204}
205
206/*
207 * Skip columns according to specs
208 */
209static size_t
210skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start,
211 bool skip_blanks, bool *empty_key)
212{
213 if (cols < 1)
214 return BWSLEN(s) + 1;
215
216 if (skip_blanks)
217 while (start < BWSLEN(s) && iswblank(BWS_GET(s, start)))
218 ++start;
219
220 while (start < BWSLEN(s) && cols > 1) {
221 --cols;
222 ++start;
223 }
224
225 if (start >= BWSLEN(s))
226 *empty_key = true;
227
228 return start;
229}
230
231/*
232 * Skip fields according to specs
233 */
234static size_t
235skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field)
236{
237 if (fields < 2) {
238 if (BWSLEN(s) == 0)
239 *empty_field = true;
240 return 0;
241 } else if (!(sort_opts_vals.tflag)) {
242 size_t cpos = 0;
243 bool pb = true;
244
245 while (cpos < BWSLEN(s)) {
246 bool isblank;
247
248 isblank = iswblank(BWS_GET(s, cpos));
249
250 if (isblank && !pb) {
251 --fields;
252 if (fields <= 1)
253 return cpos;
254 }
255 pb = isblank;
256 ++cpos;
257 }
258 if (fields > 1)
259 *empty_field = true;
260 return cpos;
261 } else {
262 size_t cpos = 0;
263
264 while (cpos < BWSLEN(s)) {
265 if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) {
266 --fields;
267 if (fields <= 1)
268 return cpos + 1;
269 }
270 ++cpos;
271 }
272 if (fields > 1)
273 *empty_field = true;
274 return cpos;
275 }
276}
277
278/*
279 * Find fields start
280 */
281static void
282find_field_start(const struct bwstring *s, struct key_specs *ks,
283 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key)
284{
285 *field_start = skip_fields_to_start(s, ks->f1, empty_field);
286 if (!*empty_field)
287 *key_start = skip_cols_to_start(s, ks->c1, *field_start,
288 ks->pos1b, empty_key);
289 else
290 *empty_key = true;
291}
292
293/*
294 * Find end key position
295 */
296static size_t
297find_field_end(const struct bwstring *s, struct key_specs *ks)
298{
299 size_t f2, next_field_start, pos_end;
300 bool empty_field, empty_key;
301
302 empty_field = false;
303 empty_key = false;
304 f2 = ks->f2;
305
306 if (f2 == 0)
307 return BWSLEN(s) + 1;
308 else {
309 if (ks->c2 == 0) {
310 next_field_start = skip_fields_to_start(s, f2 + 1,
311 &empty_field);
312 if ((next_field_start > 0) && sort_opts_vals.tflag &&
313 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s,
314 next_field_start - 1)))
315 --next_field_start;
316 } else
317 next_field_start = skip_fields_to_start(s, f2,
318 &empty_field);
319 }
320
321 if (empty_field || (next_field_start >= BWSLEN(s)))
322 return BWSLEN(s) + 1;
323
324 if (ks->c2) {
325 pos_end = skip_cols_to_start(s, ks->c2, next_field_start,
326 ks->pos2b, &empty_key);
327 if (pos_end < BWSLEN(s))
328 ++pos_end;
329 } else
330 pos_end = next_field_start;
331
332 return pos_end;
333}
334
335/*
336 * Cut a field according to the key specs
337 */
338static struct bwstring *
339cut_field(const struct bwstring *s, struct key_specs *ks)
340{
341 struct bwstring *ret = NULL;
342
343 if (s && ks) {
344 size_t field_start, key_end, key_start, sz;
345 bool empty_field, empty_key;
346
347 field_start = 0;
348 key_start = 0;
349 empty_field = false;
350 empty_key = false;
351
352 find_field_start(s, ks, &field_start, &key_start,
353 &empty_field, &empty_key);
354
355 if (empty_key)
356 sz = 0;
357 else {
358 key_end = find_field_end(s, ks);
359 sz = (key_end < key_start) ? 0 : (key_end - key_start);
360 }
361
362 ret = bwsalloc(sz);
363 if (sz)
364 bwsnocpy(ret, s, key_start, sz);
365 } else
366 ret = bwsalloc(0);
367
368 return ret;
369}
370
371/*
372 * Preprocesses a line applying the necessary transformations
373 * specified by command line options and returns the preprocessed
374 * string, which can be used to compare.
375 */
376int
377preproc(struct bwstring *s, struct keys_array *ka)
378{
379 if (sort_opts_vals.kflag) {
380 size_t i;
381 for (i = 0; i < keys_num; i++) {
382 struct bwstring *key;
383 struct key_specs *kspecs;
384 struct sort_mods *sm;
385
386 kspecs = &(keys[i]);
387 key = cut_field(s, kspecs);
388
389 sm = &(kspecs->sm);
390 if (sm->dflag)
391 key = dictionary_order(key);
392 else if (sm->iflag)
393 key = ignore_nonprinting(key);
394 if (sm->fflag || sm->Mflag)
395 key = ignore_case(key);
396
397 set_key_on_keys_array(ka, key, i);
398 }
399 } else {
400 struct bwstring *ret = NULL;
401 struct sort_mods *sm = default_sort_mods;
402
403#ifdef GNUSORT_COMPATIBILITY
404 if (sm->bflag) {
405 if (ret == NULL)
406 ret = bwsdup(s);
407 ret = ignore_leading_blanks(ret);
408 }
409#endif
410 if (sm->dflag) {
411 if (ret == NULL)
412 ret = bwsdup(s);
413 ret = dictionary_order(ret);
414 } else if (sm->iflag) {
415 if (ret == NULL)
416 ret = bwsdup(s);
417 ret = ignore_nonprinting(ret);
418 }
419 if (sm->fflag || sm->Mflag) {
420 if (ret == NULL)
421 ret = bwsdup(s);
422 ret = ignore_case(ret);
423 }
424 if (ret == NULL)
425 set_key_on_keys_array(ka, s, 0);
426 else
427 set_key_on_keys_array(ka, ret, 0);
428 }
429
430 return 0;
431}
432
433cmpcoll_t
434get_sort_func(struct sort_mods *sm)
435{
436 if (sm->nflag)
437 return numcoll;
438 else if (sm->hflag)
439 return hnumcoll;
440 else if (sm->gflag)
441 return gnumcoll;
442 else if (sm->Mflag)
443 return monthcoll;
444 else if (sm->Rflag)
445 return randomcoll;
446 else if (sm->Vflag)
447 return versioncoll;
448 else
449 return wstrcoll;
450}
451
452/*
453 * Compares the given strings. Returns a positive number if
454 * the first precedes the second, a negative number if the second is
455 * the preceding one, and zero if they are equal. This function calls
456 * the underlying collate functions, which done the actual comparison.
457 */
458int
459key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset)
460{
461 struct sort_mods *sm;
462 int res = 0;
463 size_t i;
464
465 for (i = 0; i < keys_num; ++i) {
466 sm = &(keys[i].sm);
467
468 if (sm->rflag)
469 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset);
470 else
471 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset);
472
473 if (res)
474 break;
475
476 /* offset applies to only the first key */
477 offset = 0;
478 }
479 return res;
480}
481
482/*
483 * Compare two strings.
484 * Plain symbol-by-symbol comparison.
485 */
486int
487top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2)
488{
489 if (default_sort_mods->rflag) {
490 const struct bwstring *tmp;
491
492 tmp = s1;
493 s1 = s2;
494 s2 = tmp;
495 }
496
497 return bwscoll(s1, s2, 0);
498}
499
500/*
501 * Compare a string and a sort list item, according to the sort specs.
502 */
503int
504str_list_coll(struct bwstring *str1, struct sort_list_item **ss2)
505{
506 struct keys_array *ka1;
507 int ret = 0;
508
509 ka1 = keys_array_alloc();
510
511 preproc(str1, ka1);
512
513 sort_list_item_make_key(*ss2);
514
515 if (debug_sort) {
516 bwsprintf(stdout, str1, "; s1=<", ">");
517 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">");
518 }
519
520 ret = key_coll(ka1, &((*ss2)->ka), 0);
521
522 if (debug_sort)
523 printf("; cmp1=%d", ret);
524
525 clean_keys_array(str1, ka1);
526 sort_free(ka1);
527
528 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
529 ret = top_level_str_coll(str1, ((*ss2)->str));
530 if (debug_sort)
531 printf("; cmp2=%d", ret);
532 }
533
534 if (debug_sort)
535 putchar('\n');
536
537 return ret;
538}
539
540/*
541 * Compare two sort list items, according to the sort specs.
542 */
543int
544list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2,
545 size_t offset)
546{
547 int ret;
548
549 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset);
550
551 if (debug_sort) {
552 if (offset)
553 printf("; offset=%zu", offset);
554 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">");
555 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">");
556 printf("; cmp1=%d\n", ret);
557 }
558
559 if (ret)
560 return ret;
561
562 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) {
563 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str));
564 if (debug_sort)
565 printf("; cmp2=%d\n", ret);
566 }
567
568 return ret;
569}
570
571/*
572 * Compare two sort list items, according to the sort specs.
573 */
574int
575list_coll(const void *ss1, const void *ss2)
576{
577 return list_coll_offset((struct sort_list_item **)ss1,
578 (struct sort_list_item **)ss2, 0);
579}
580
581#define LSCDEF(N) \
582static int \
583list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \
584{ \
585 \
586 return list_coll_offset(ss1, ss2, N); \
587}
588
589LSCDEF(0)
590LSCDEF(1)
591LSCDEF(2)
592LSCDEF(3)
593LSCDEF(4)
594LSCDEF(5)
595LSCDEF(6)
596LSCDEF(7)
597LSCDEF(8)
598LSCDEF(9)
599LSCDEF(10)
600LSCDEF(11)
601LSCDEF(12)
602LSCDEF(13)
603LSCDEF(14)
604LSCDEF(15)
605LSCDEF(16)
606LSCDEF(17)
607LSCDEF(18)
608LSCDEF(19)
609LSCDEF(20)
610
611listcoll_t
612get_list_call_func(size_t offset)
613{
614 static const listcoll_t lsarray[] = { list_coll_0, list_coll_1,
615 list_coll_2, list_coll_3, list_coll_4, list_coll_5,
616 list_coll_6, list_coll_7, list_coll_8, list_coll_9,
617 list_coll_10, list_coll_11, list_coll_12, list_coll_13,
618 list_coll_14, list_coll_15, list_coll_16, list_coll_17,
619 list_coll_18, list_coll_19, list_coll_20 };
620
621 if (offset <= 20)
622 return lsarray[offset];
623
624 return list_coll_0;
625}
626
627/*
628 * Compare two sort list items, only by their original string.
629 */
630int
631list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2)
632{
633 return top_level_str_coll(((*ss1)->str), ((*ss2)->str));
634}
635
636/*
637 * Maximum size of a number in the string (before or after decimal point)
638 */
639#define MAX_NUM_SIZE (128)
640
641/*
642 * Set suffix value
643 */
644static void
645setsuffix(wchar_t c, unsigned char *si)
646{
647 switch (c){
648 case L'k':
649 case L'K':
650 *si = 1;
651 break;
652 case L'M':
653 *si = 2;
654 break;
655 case L'G':
656 *si = 3;
657 break;
658 case L'T':
659 *si = 4;
660 break;
661 case L'P':
662 *si = 5;
663 break;
664 case L'E':
665 *si = 6;
666 break;
667 case L'Z':
668 *si = 7;
669 break;
670 case L'Y':
671 *si = 8;
672 break;
673 default:
674 *si = 0;
675 }
676}
677
678/*
679 * Read string s and parse the string into a fixed-decimal-point number.
680 * sign equals -1 if the number is negative (explicit plus is not allowed,
681 * according to GNU sort's "info sort".
682 * The number part before decimal point is in the smain, after the decimal
683 * point is in sfrac, tail is the pointer to the remainder of the string.
684 */
685static int
686read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si)
687{
688 bwstring_iterator s;
689
690 s = bws_begin(s0);
691
692 /* always end the fraction with zero, even if we have no fraction */
693 sfrac[0] = 0;
694
695 while (iswblank(bws_get_iter_value(s)))
696 s = bws_iterator_inc(s, 1);
697
698 if (bws_get_iter_value(s) == L'-') {
699 *sign = -1;
700 s = bws_iterator_inc(s, 1);
701 }
702
703 // This is '0', not '\0', do not change this
704 while (iswdigit(bws_get_iter_value(s)) &&
705 (bws_get_iter_value(s) == L'0'))
706 s = bws_iterator_inc(s, 1);
707
708 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) {
709 if (iswdigit(bws_get_iter_value(s))) {
710 smain[*main_len] = bws_get_iter_value(s);
711 s = bws_iterator_inc(s, 1);
712 *main_len += 1;
713 } else
714 break;
715 }
716
717 smain[*main_len] = 0;
718
719 if (bws_get_iter_value(s) == L'.') {
720 s = bws_iterator_inc(s, 1);
721 while (iswdigit(bws_get_iter_value(s)) &&
722 *frac_len < MAX_NUM_SIZE) {
723 sfrac[*frac_len] = bws_get_iter_value(s);
724 s = bws_iterator_inc(s, 1);
725 *frac_len += 1;
726 }
727 sfrac[*frac_len] = 0;
728
729 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') {
730 --(*frac_len);
731 sfrac[*frac_len] = L'\0';
732 }
733 }
734
735 setsuffix(bws_get_iter_value(s), si);
736
737 if ((*main_len + *frac_len) == 0)
738 *sign = 0;
739
740 return 0;
741}
742
743/*
744 * Implements string sort.
745 */
746static int
747wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
748{
749
750 if (debug_sort) {
751 if (offset)
752 printf("; offset=%zu\n", offset);
753 bwsprintf(stdout, kv1->k, "; k1=<", ">");
754 printf("(%zu)", BWSLEN(kv1->k));
755 bwsprintf(stdout, kv2->k, ", k2=<", ">");
756 printf("(%zu)", BWSLEN(kv2->k));
757 }
758
759 return bwscoll(kv1->k, kv2->k, offset);
760}
761
762/*
763 * Compare two suffixes
764 */
765static inline int
766cmpsuffix(unsigned char si1, unsigned char si2)
767{
768 return (char)si1 - (char)si2;
769}
770
771/*
772 * Implements numeric sort for -n and -h.
773 */
774static int
775numcoll_impl(struct key_value *kv1, struct key_value *kv2,
776 size_t offset __unused, bool use_suffix)
777{
778 struct bwstring *s1, *s2;
779 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1];
780 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1];
781 int cmp_res, sign1, sign2;
782 size_t frac1, frac2, main1, main2;
783 unsigned char SI1, SI2;
784 bool e1, e2, key1_read, key2_read;
785
786 s1 = kv1->k;
787 s2 = kv2->k;
788 sign1 = sign2 = 0;
789 main1 = main2 = 0;
790 frac1 = frac2 = 0;
791
792 key1_read = key2_read = false;
793
794 if (debug_sort) {
795 bwsprintf(stdout, s1, "; k1=<", ">");
796 bwsprintf(stdout, s2, ", k2=<", ">");
797 }
798
799 if (s1 == s2)
800 return 0;
801
802 if (kv1->hint->status == HS_UNINITIALIZED) {
803 /* read the number from the string */
804 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
805 key1_read = true;
806 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10);
807 if (main1 < 1 && frac1 < 1)
808 kv1->hint->v.nh.empty=true;
809 kv1->hint->v.nh.si = SI1;
810 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ?
811 HS_INITIALIZED : HS_ERROR;
812 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false;
813 }
814
815 if (kv2->hint->status == HS_UNINITIALIZED) {
816 /* read the number from the string */
817 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
818 key2_read = true;
819 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10);
820 if (main2 < 1 && frac2 < 1)
821 kv2->hint->v.nh.empty=true;
822 kv2->hint->v.nh.si = SI2;
823 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ?
824 HS_INITIALIZED : HS_ERROR;
825 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false;
826 }
827
828 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status ==
829 HS_INITIALIZED) {
830 unsigned long long n1, n2;
831 bool neg1, neg2;
832
833 e1 = kv1->hint->v.nh.empty;
834 e2 = kv2->hint->v.nh.empty;
835
836 if (e1 && e2)
837 return 0;
838
839 neg1 = kv1->hint->v.nh.neg;
840 neg2 = kv2->hint->v.nh.neg;
841
842 if (neg1 && !neg2)
843 return -1;
844 if (neg2 && !neg1)
845 return 1;
846
847 if (e1)
848 return neg2 ? 1 : -1;
849 else if (e2)
850 return neg1 ? -1 : 1;
851
852
853 if (use_suffix) {
854 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si);
855 if (cmp_res)
856 return neg1 ? -cmp_res : cmp_res;
857 }
858
859 n1 = kv1->hint->v.nh.n1;
860 n2 = kv2->hint->v.nh.n1;
861 if (n1 < n2)
862 return neg1 ? 1 : -1;
863 else if (n1 > n2)
864 return neg1 ? -1 : 1;
865 }
866
867 /* read the numbers from the strings */
868 if (!key1_read)
869 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1);
870 if (!key2_read)
871 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2);
872
873 e1 = ((main1 + frac1) == 0);
874 e2 = ((main2 + frac2) == 0);
875
876 if (e1 && e2)
877 return 0;
878
879 /* we know the result if the signs are different */
880 if (sign1 < 0 && sign2 >= 0)
881 return -1;
882 if (sign1 >= 0 && sign2 < 0)
883 return 1;
884
885 if (e1)
886 return (sign2 < 0) ? +1 : -1;
887 else if (e2)
888 return (sign1 < 0) ? -1 : 1;
889
890 if (use_suffix) {
891 cmp_res = cmpsuffix(SI1, SI2);
892 if (cmp_res)
893 return (sign1 < 0) ? -cmp_res : cmp_res;
894 }
895
896 /* if both numbers are empty assume that the strings are equal */
897 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1)
898 return 0;
899
900 /*
901 * if the main part is of different size, we know the result
902 * (because the leading zeros are removed)
903 */
904 if (main1 < main2)
905 cmp_res = -1;
906 else if (main1 > main2)
907 cmp_res = +1;
908 /* if the sizes are equal then simple non-collate string compare gives the correct result */
909 else
910 cmp_res = wcscmp(smain1, smain2);
911
912 /* check fraction */
913 if (!cmp_res)
914 cmp_res = wcscmp(sfrac1, sfrac2);
915
916 if (!cmp_res)
917 return 0;
918
919 /* reverse result if the signs are negative */
920 if (sign1 < 0 && sign2 < 0)
921 cmp_res = -cmp_res;
922
923 return cmp_res;
924}
925
926/*
927 * Implements numeric sort (-n).
928 */
929static int
930numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
931{
932 return numcoll_impl(kv1, kv2, offset, false);
933}
934
935/*
936 * Implements 'human' numeric sort (-h).
937 */
938static int
939hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset)
940{
941 return numcoll_impl(kv1, kv2, offset, true);
942}
943
944/*
945 * Implements random sort (-R).
946 */
947static int
948randomcoll(struct key_value *kv1, struct key_value *kv2,
949 size_t offset __unused)
950{
951 struct bwstring *s1, *s2;
952 MD5_CTX ctx1, ctx2;
953 char *b1, *b2;
954
955 s1 = kv1->k;
956 s2 = kv2->k;
957
958 if (debug_sort) {
959 bwsprintf(stdout, s1, "; k1=<", ">");
960 bwsprintf(stdout, s2, ", k2=<", ">");
961 }
962
963 if (s1 == s2)
964 return 0;
965
966 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX));
967 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX));
968
969 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1));
970 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2));
971 b1 = MD5End(&ctx1, NULL);
972 b2 = MD5End(&ctx2, NULL);
973 if (b1 == NULL) {
974 if (b2 == NULL)
975 return 0;
976 else {
977 sort_free(b2);
978 return -1;
979 }
980 } else if (b2 == NULL) {
981 sort_free(b1);
982 return 1;
983 } else {
984 int cmp_res;
985
986 cmp_res = strcmp(b1, b2);
987 sort_free(b1);
988 sort_free(b2);
989
990 if (!cmp_res)
991 cmp_res = bwscoll(s1, s2, 0);
992
993 return cmp_res;
994 }
995}
996
997/*
998 * Implements version sort (-V).
999 */
1000static int
1001versioncoll(struct key_value *kv1, struct key_value *kv2,
1002 size_t offset __unused)
1003{
1004 struct bwstring *s1, *s2;
1005
1006 s1 = kv1->k;
1007 s2 = kv2->k;
1008
1009 if (debug_sort) {
1010 bwsprintf(stdout, s1, "; k1=<", ">");
1011 bwsprintf(stdout, s2, ", k2=<", ">");
1012 }
1013
1014 if (s1 == s2)
1015 return 0;
1016
1017 return vcmp(s1, s2);
1018}
1019
1020/*
1021 * Check for minus infinity
1022 */
1023static inline bool
1024huge_minus(double d, int err1)
1025{
1026 if (err1 == ERANGE)
1027 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL)
1028 return 1;
1029
1030 return 0;
1031}
1032
1033/*
1034 * Check for plus infinity
1035 */
1036static inline bool
1037huge_plus(double d, int err1)
1038{
1039 if (err1 == ERANGE)
1040 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL)
1041 return 1;
1042
1043 return 0;
1044}
1045
1046/*
1047 * Check whether a function is a NAN
1048 */
1049static bool
1050is_nan(double d)
1051{
1052#if defined(NAN)
1053 return (d == NAN || isnan(d));
1054#else
1055 return (isnan(d));
1056#endif
1057}
1058
1059/*
1060 * Compare two NANs
1061 */
1062static int
1063cmp_nans(double d1, double d2)
1064{
1065 if (d1 == d2)
1066 return 0;
1067 return d1 < d2 ? -1 : 1;
1068}
1069
1070/*
1071 * Implements general numeric sort (-g).
1072 */
1073static int
1074gnumcoll(struct key_value *kv1, struct key_value *kv2,
1075 size_t offset __unused)
1076{
1077 double d1, d2;
1078 int err1, err2;
1079 bool empty1, empty2, key1_read, key2_read;
1080
1081 d1 = d2 = 0;
1082 err1 = err2 = 0;
1083 key1_read = key2_read = false;
1084
1085 if (debug_sort) {
1086 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1087 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1088 }
1089
1090 if (kv1->hint->status == HS_UNINITIALIZED) {
1091 errno = 0;
1092 d1 = bwstod(kv1->k, &empty1);
1093 err1 = errno;
1094
1095 if (empty1) {
1096 kv1->hint->v.gh.notnum = true;
1097 kv1->hint->status = HS_INITIALIZED;
1098 } else if (err1 == 0) {
1099 kv1->hint->v.gh.d = d1;
1100 kv1->hint->v.gh.nan = is_nan(d1);
1101 kv1->hint->status = HS_INITIALIZED;
1102 } else
1103 kv1->hint->status = HS_ERROR;
1104
1105 key1_read = true;
1106 }
1107
1108 if (kv2->hint->status == HS_UNINITIALIZED) {
1109 errno = 0;
1110 d2 = bwstod(kv2->k, &empty2);
1111 err2 = errno;
1112
1113 if (empty2) {
1114 kv2->hint->v.gh.notnum = true;
1115 kv2->hint->status = HS_INITIALIZED;
1116 } else if (err2 == 0) {
1117 kv2->hint->v.gh.d = d2;
1118 kv2->hint->v.gh.nan = is_nan(d2);
1119 kv2->hint->status = HS_INITIALIZED;
1120 } else
1121 kv2->hint->status = HS_ERROR;
1122
1123 key2_read = true;
1124 }
1125
1126 if (kv1->hint->status == HS_INITIALIZED &&
1127 kv2->hint->status == HS_INITIALIZED) {
1128#ifdef GNUSORT_COMPATIBILITY
1129 if (kv1->hint->v.gh.notnum)
1130 return kv2->hint->v.gh.notnum ? 0 : -1;
1131 else if (kv2->hint->v.gh.notnum)
1132 return 1;
1133#else
1134 if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum)
1135 return 0;
1136#endif
1137
1138 if (kv1->hint->v.gh.nan)
1139 return kv2->hint->v.gh.nan ?
1140 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1;
1141 else if (kv2->hint->v.gh.nan)
1142 return 1;
1143
1144 d1 = kv1->hint->v.gh.d;
1145 d2 = kv2->hint->v.gh.d;
1146
1147 if (d1 < d2)
1148 return -1;
1149 else if (d1 > d2)
1150 return 1;
1151 else
1152 return 0;
1153 }
1154
1155 if (!key1_read) {
1156 errno = 0;
1157 d1 = bwstod(kv1->k, &empty1);
1158 err1 = errno;
1159 }
1160
1161 if (!key2_read) {
1162 errno = 0;
1163 d2 = bwstod(kv2->k, &empty2);
1164 err2 = errno;
1165 }
1166
1167 /* Non-value case */
1168#ifdef GNUSORT_COMPATIBILITY
1169 if (empty1)
1170 return empty2 ? 0 : -1;
1171 else if (empty2)
1172 return 1;
1173#else
1174 if (empty1 && empty2)
1175 return 0;
1176#endif
1177
1178 /* NAN case */
1179 if (is_nan(d1))
1180 return is_nan(d2) ? cmp_nans(d1, d2) : -1;
1181 else if (is_nan(d2))
1182 return 1;
1183
1184 /* Infinities */
1185 if (err1 == ERANGE || err2 == ERANGE) {
1186 /* Minus infinity case */
1187 if (huge_minus(d1, err1)) {
1188 if (huge_minus(d2, err2)) {
1189 if (d1 == d2)
1190 return 0;
1191 return d1 < d2 ? -1 : 1;
1192 } else
1193 return -1;
1194
1195 } else if (huge_minus(d2, err2)) {
1196 if (huge_minus(d1, err1)) {
1197 if (d1 == d2)
1198 return 0;
1199 return d1 < d2 ? -1 : 1;
1200 } else
1201 return 1;
1202 }
1203
1204 /* Plus infinity case */
1205 if (huge_plus(d1, err1)) {
1206 if (huge_plus(d2, err2)) {
1207 if (d1 == d2)
1208 return 0;
1209 return d1 < d2 ? -1 : 1;
1210 } else
1211 return 1;
1212 } else if (huge_plus(d2, err2)) {
1213 if (huge_plus(d1, err1)) {
1214 if (d1 == d2)
1215 return 0;
1216 return d1 < d2 ? -1 : 1;
1217 } else
1218 return -1;
1219 }
1220 }
1221
1222 if (d1 == d2)
1223 return 0;
1224 return d1 < d2 ? -1 : 1;
1225}
1226
1227/*
1228 * Implements month sort (-M).
1229 */
1230static int
1231monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused)
1232{
1233 int val1, val2;
1234 bool key1_read, key2_read;
1235
1236 val1 = val2 = 0;
1237 key1_read = key2_read = false;
1238
1239 if (debug_sort) {
1240 bwsprintf(stdout, kv1->k, "; k1=<", ">");
1241 bwsprintf(stdout, kv2->k, "; k2=<", ">");
1242 }
1243
1244 if (kv1->hint->status == HS_UNINITIALIZED) {
1245 kv1->hint->v.Mh.m = bws_month_score(kv1->k);
1246 key1_read = true;
1247 kv1->hint->status = HS_INITIALIZED;
1248 }
1249
1250 if (kv2->hint->status == HS_UNINITIALIZED) {
1251 kv2->hint->v.Mh.m = bws_month_score(kv2->k);
1252 key2_read = true;
1253 kv2->hint->status = HS_INITIALIZED;
1254 }
1255
1256 if (kv1->hint->status == HS_INITIALIZED) {
1257 val1 = kv1->hint->v.Mh.m;
1258 key1_read = true;
1259 }
1260
1261 if (kv2->hint->status == HS_INITIALIZED) {
1262 val2 = kv2->hint->v.Mh.m;
1263 key2_read = true;
1264 }
1265
1266 if (!key1_read)
1267 val1 = bws_month_score(kv1->k);
1268 if (!key2_read)
1269 val2 = bws_month_score(kv2->k);
1270
1271 if (val1 == val2)
1272 return 0;
1273 return val1 < val2 ? -1 : 1;
1274}