jcs's openbsd hax
openbsd
at jcs 1274 lines 26 kB view raw
1/* $OpenBSD: coll.c,v 1.13 2024/09/20 02:00:46 jsg Exp $ */ 2 3/*- 4 * Copyright (C) 2009 Gabor Kovesdan <gabor@FreeBSD.org> 5 * Copyright (C) 2012 Oleg Moskalenko <mom040267@gmail.com> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#include <sys/types.h> 31 32#include <errno.h> 33#include <err.h> 34#include <langinfo.h> 35#include <limits.h> 36#include <math.h> 37#include <md5.h> 38#include <stdlib.h> 39#include <string.h> 40#include <wchar.h> 41#include <wctype.h> 42 43#include "coll.h" 44#include "vsort.h" 45 46struct key_specs *keys; 47size_t keys_num = 0; 48 49static int wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset); 50static int gnumcoll(struct key_value*, struct key_value *, size_t offset); 51static int monthcoll(struct key_value*, struct key_value *, size_t offset); 52static int numcoll(struct key_value*, struct key_value *, size_t offset); 53static int hnumcoll(struct key_value*, struct key_value *, size_t offset); 54static int randomcoll(struct key_value*, struct key_value *, size_t offset); 55static int versioncoll(struct key_value*, struct key_value *, size_t offset); 56 57/* 58 * Allocate keys array 59 */ 60struct keys_array * 61keys_array_alloc(void) 62{ 63 struct keys_array *ka; 64 size_t sz; 65 66 sz = keys_array_size(); 67 ka = sort_calloc(1, sz); 68 69 return ka; 70} 71 72/* 73 * Calculate whether we need key hint space 74 */ 75static size_t 76key_hint_size(void) 77{ 78 return need_hint ? sizeof(struct key_hint) : 0; 79} 80 81/* 82 * Calculate keys array size 83 */ 84size_t 85keys_array_size(void) 86{ 87 return keys_num * (sizeof(struct key_value) + key_hint_size()); 88} 89 90/* 91 * Clean data of keys array 92 */ 93void 94clean_keys_array(const struct bwstring *s, struct keys_array *ka) 95{ 96 if (ka) { 97 size_t i; 98 99 for (i = 0; i < keys_num; ++i) 100 if (ka->key[i].k && ka->key[i].k != s) 101 bwsfree(ka->key[i].k); 102 memset(ka, 0, keys_array_size()); 103 } 104} 105 106/* 107 * Set value of a key in the keys set 108 */ 109void 110set_key_on_keys_array(struct keys_array *ka, struct bwstring *s, size_t ind) 111{ 112 if (ka && keys_num > ind) { 113 struct key_value *kv; 114 115 kv = &(ka->key[ind]); 116 117 if (kv->k != s) 118 bwsfree(kv->k); 119 kv->k = s; 120 } 121} 122 123/* 124 * Initialize a sort list item 125 */ 126struct sort_list_item * 127sort_list_item_alloc(void) 128{ 129 struct sort_list_item *si; 130 size_t sz; 131 132 sz = sizeof(struct sort_list_item) + keys_array_size(); 133 si = sort_calloc(1, sz); 134 135 return si; 136} 137 138size_t 139sort_list_item_size(struct sort_list_item *si) 140{ 141 size_t i, ret = 0; 142 143 if (si) { 144 ret = sizeof(struct sort_list_item) + keys_array_size(); 145 if (si->str) 146 ret += bws_memsize(si->str); 147 for (i = 0; i < keys_num; ++i) { 148 struct key_value *kv; 149 150 kv = &(si->ka.key[i]); 151 152 if (kv->k != si->str) 153 ret += bws_memsize(kv->k); 154 } 155 } 156 return ret; 157} 158 159/* 160 * Calculate key for a sort list item 161 */ 162static void 163sort_list_item_make_key(struct sort_list_item *si) 164{ 165 preproc(si->str, &(si->ka)); 166} 167 168/* 169 * Set value of a sort list item. 170 * Return combined string and keys memory size. 171 */ 172void 173sort_list_item_set(struct sort_list_item *si, struct bwstring *str) 174{ 175 if (si) { 176 clean_keys_array(si->str, &(si->ka)); 177 if (si->str) { 178 if (si->str == str) { 179 /* we are trying to reset the same string */ 180 return; 181 } else { 182 bwsfree(si->str); 183 si->str = NULL; 184 } 185 } 186 si->str = str; 187 sort_list_item_make_key(si); 188 } 189} 190 191/* 192 * De-allocate a sort list item object memory 193 */ 194void 195sort_list_item_clean(struct sort_list_item *si) 196{ 197 if (si) { 198 clean_keys_array(si->str, &(si->ka)); 199 if (si->str) { 200 bwsfree(si->str); 201 si->str = NULL; 202 } 203 } 204} 205 206/* 207 * Skip columns according to specs 208 */ 209static size_t 210skip_cols_to_start(const struct bwstring *s, size_t cols, size_t start, 211 bool skip_blanks, bool *empty_key) 212{ 213 if (cols < 1) 214 return BWSLEN(s) + 1; 215 216 if (skip_blanks) 217 while (start < BWSLEN(s) && iswblank(BWS_GET(s, start))) 218 ++start; 219 220 while (start < BWSLEN(s) && cols > 1) { 221 --cols; 222 ++start; 223 } 224 225 if (start >= BWSLEN(s)) 226 *empty_key = true; 227 228 return start; 229} 230 231/* 232 * Skip fields according to specs 233 */ 234static size_t 235skip_fields_to_start(const struct bwstring *s, size_t fields, bool *empty_field) 236{ 237 if (fields < 2) { 238 if (BWSLEN(s) == 0) 239 *empty_field = true; 240 return 0; 241 } else if (!(sort_opts_vals.tflag)) { 242 size_t cpos = 0; 243 bool pb = true; 244 245 while (cpos < BWSLEN(s)) { 246 bool isblank; 247 248 isblank = iswblank(BWS_GET(s, cpos)); 249 250 if (isblank && !pb) { 251 --fields; 252 if (fields <= 1) 253 return cpos; 254 } 255 pb = isblank; 256 ++cpos; 257 } 258 if (fields > 1) 259 *empty_field = true; 260 return cpos; 261 } else { 262 size_t cpos = 0; 263 264 while (cpos < BWSLEN(s)) { 265 if (BWS_GET(s, cpos) == (wchar_t)sort_opts_vals.field_sep) { 266 --fields; 267 if (fields <= 1) 268 return cpos + 1; 269 } 270 ++cpos; 271 } 272 if (fields > 1) 273 *empty_field = true; 274 return cpos; 275 } 276} 277 278/* 279 * Find fields start 280 */ 281static void 282find_field_start(const struct bwstring *s, struct key_specs *ks, 283 size_t *field_start, size_t *key_start, bool *empty_field, bool *empty_key) 284{ 285 *field_start = skip_fields_to_start(s, ks->f1, empty_field); 286 if (!*empty_field) 287 *key_start = skip_cols_to_start(s, ks->c1, *field_start, 288 ks->pos1b, empty_key); 289 else 290 *empty_key = true; 291} 292 293/* 294 * Find end key position 295 */ 296static size_t 297find_field_end(const struct bwstring *s, struct key_specs *ks) 298{ 299 size_t f2, next_field_start, pos_end; 300 bool empty_field, empty_key; 301 302 empty_field = false; 303 empty_key = false; 304 f2 = ks->f2; 305 306 if (f2 == 0) 307 return BWSLEN(s) + 1; 308 else { 309 if (ks->c2 == 0) { 310 next_field_start = skip_fields_to_start(s, f2 + 1, 311 &empty_field); 312 if ((next_field_start > 0) && sort_opts_vals.tflag && 313 ((wchar_t)sort_opts_vals.field_sep == BWS_GET(s, 314 next_field_start - 1))) 315 --next_field_start; 316 } else 317 next_field_start = skip_fields_to_start(s, f2, 318 &empty_field); 319 } 320 321 if (empty_field || (next_field_start >= BWSLEN(s))) 322 return BWSLEN(s) + 1; 323 324 if (ks->c2) { 325 pos_end = skip_cols_to_start(s, ks->c2, next_field_start, 326 ks->pos2b, &empty_key); 327 if (pos_end < BWSLEN(s)) 328 ++pos_end; 329 } else 330 pos_end = next_field_start; 331 332 return pos_end; 333} 334 335/* 336 * Cut a field according to the key specs 337 */ 338static struct bwstring * 339cut_field(const struct bwstring *s, struct key_specs *ks) 340{ 341 struct bwstring *ret = NULL; 342 343 if (s && ks) { 344 size_t field_start, key_end, key_start, sz; 345 bool empty_field, empty_key; 346 347 field_start = 0; 348 key_start = 0; 349 empty_field = false; 350 empty_key = false; 351 352 find_field_start(s, ks, &field_start, &key_start, 353 &empty_field, &empty_key); 354 355 if (empty_key) 356 sz = 0; 357 else { 358 key_end = find_field_end(s, ks); 359 sz = (key_end < key_start) ? 0 : (key_end - key_start); 360 } 361 362 ret = bwsalloc(sz); 363 if (sz) 364 bwsnocpy(ret, s, key_start, sz); 365 } else 366 ret = bwsalloc(0); 367 368 return ret; 369} 370 371/* 372 * Preprocesses a line applying the necessary transformations 373 * specified by command line options and returns the preprocessed 374 * string, which can be used to compare. 375 */ 376int 377preproc(struct bwstring *s, struct keys_array *ka) 378{ 379 if (sort_opts_vals.kflag) { 380 size_t i; 381 for (i = 0; i < keys_num; i++) { 382 struct bwstring *key; 383 struct key_specs *kspecs; 384 struct sort_mods *sm; 385 386 kspecs = &(keys[i]); 387 key = cut_field(s, kspecs); 388 389 sm = &(kspecs->sm); 390 if (sm->dflag) 391 key = dictionary_order(key); 392 else if (sm->iflag) 393 key = ignore_nonprinting(key); 394 if (sm->fflag || sm->Mflag) 395 key = ignore_case(key); 396 397 set_key_on_keys_array(ka, key, i); 398 } 399 } else { 400 struct bwstring *ret = NULL; 401 struct sort_mods *sm = default_sort_mods; 402 403#ifdef GNUSORT_COMPATIBILITY 404 if (sm->bflag) { 405 if (ret == NULL) 406 ret = bwsdup(s); 407 ret = ignore_leading_blanks(ret); 408 } 409#endif 410 if (sm->dflag) { 411 if (ret == NULL) 412 ret = bwsdup(s); 413 ret = dictionary_order(ret); 414 } else if (sm->iflag) { 415 if (ret == NULL) 416 ret = bwsdup(s); 417 ret = ignore_nonprinting(ret); 418 } 419 if (sm->fflag || sm->Mflag) { 420 if (ret == NULL) 421 ret = bwsdup(s); 422 ret = ignore_case(ret); 423 } 424 if (ret == NULL) 425 set_key_on_keys_array(ka, s, 0); 426 else 427 set_key_on_keys_array(ka, ret, 0); 428 } 429 430 return 0; 431} 432 433cmpcoll_t 434get_sort_func(struct sort_mods *sm) 435{ 436 if (sm->nflag) 437 return numcoll; 438 else if (sm->hflag) 439 return hnumcoll; 440 else if (sm->gflag) 441 return gnumcoll; 442 else if (sm->Mflag) 443 return monthcoll; 444 else if (sm->Rflag) 445 return randomcoll; 446 else if (sm->Vflag) 447 return versioncoll; 448 else 449 return wstrcoll; 450} 451 452/* 453 * Compares the given strings. Returns a positive number if 454 * the first precedes the second, a negative number if the second is 455 * the preceding one, and zero if they are equal. This function calls 456 * the underlying collate functions, which done the actual comparison. 457 */ 458int 459key_coll(struct keys_array *ps1, struct keys_array *ps2, size_t offset) 460{ 461 struct sort_mods *sm; 462 int res = 0; 463 size_t i; 464 465 for (i = 0; i < keys_num; ++i) { 466 sm = &(keys[i].sm); 467 468 if (sm->rflag) 469 res = sm->func(&(ps2->key[i]), &(ps1->key[i]), offset); 470 else 471 res = sm->func(&(ps1->key[i]), &(ps2->key[i]), offset); 472 473 if (res) 474 break; 475 476 /* offset applies to only the first key */ 477 offset = 0; 478 } 479 return res; 480} 481 482/* 483 * Compare two strings. 484 * Plain symbol-by-symbol comparison. 485 */ 486int 487top_level_str_coll(const struct bwstring *s1, const struct bwstring *s2) 488{ 489 if (default_sort_mods->rflag) { 490 const struct bwstring *tmp; 491 492 tmp = s1; 493 s1 = s2; 494 s2 = tmp; 495 } 496 497 return bwscoll(s1, s2, 0); 498} 499 500/* 501 * Compare a string and a sort list item, according to the sort specs. 502 */ 503int 504str_list_coll(struct bwstring *str1, struct sort_list_item **ss2) 505{ 506 struct keys_array *ka1; 507 int ret = 0; 508 509 ka1 = keys_array_alloc(); 510 511 preproc(str1, ka1); 512 513 sort_list_item_make_key(*ss2); 514 515 if (debug_sort) { 516 bwsprintf(stdout, str1, "; s1=<", ">"); 517 bwsprintf(stdout, (*ss2)->str, ", s2=<", ">"); 518 } 519 520 ret = key_coll(ka1, &((*ss2)->ka), 0); 521 522 if (debug_sort) 523 printf("; cmp1=%d", ret); 524 525 clean_keys_array(str1, ka1); 526 sort_free(ka1); 527 528 if ((ret == 0) && !(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 529 ret = top_level_str_coll(str1, ((*ss2)->str)); 530 if (debug_sort) 531 printf("; cmp2=%d", ret); 532 } 533 534 if (debug_sort) 535 putchar('\n'); 536 537 return ret; 538} 539 540/* 541 * Compare two sort list items, according to the sort specs. 542 */ 543int 544list_coll_offset(struct sort_list_item **ss1, struct sort_list_item **ss2, 545 size_t offset) 546{ 547 int ret; 548 549 ret = key_coll(&((*ss1)->ka), &((*ss2)->ka), offset); 550 551 if (debug_sort) { 552 if (offset) 553 printf("; offset=%zu", offset); 554 bwsprintf(stdout, ((*ss1)->str), "; s1=<", ">"); 555 bwsprintf(stdout, ((*ss2)->str), ", s2=<", ">"); 556 printf("; cmp1=%d\n", ret); 557 } 558 559 if (ret) 560 return ret; 561 562 if (!(sort_opts_vals.sflag) && sort_opts_vals.complex_sort) { 563 ret = top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 564 if (debug_sort) 565 printf("; cmp2=%d\n", ret); 566 } 567 568 return ret; 569} 570 571/* 572 * Compare two sort list items, according to the sort specs. 573 */ 574int 575list_coll(const void *ss1, const void *ss2) 576{ 577 return list_coll_offset((struct sort_list_item **)ss1, 578 (struct sort_list_item **)ss2, 0); 579} 580 581#define LSCDEF(N) \ 582static int \ 583list_coll_##N(struct sort_list_item **ss1, struct sort_list_item **ss2) \ 584{ \ 585 \ 586 return list_coll_offset(ss1, ss2, N); \ 587} 588 589LSCDEF(0) 590LSCDEF(1) 591LSCDEF(2) 592LSCDEF(3) 593LSCDEF(4) 594LSCDEF(5) 595LSCDEF(6) 596LSCDEF(7) 597LSCDEF(8) 598LSCDEF(9) 599LSCDEF(10) 600LSCDEF(11) 601LSCDEF(12) 602LSCDEF(13) 603LSCDEF(14) 604LSCDEF(15) 605LSCDEF(16) 606LSCDEF(17) 607LSCDEF(18) 608LSCDEF(19) 609LSCDEF(20) 610 611listcoll_t 612get_list_call_func(size_t offset) 613{ 614 static const listcoll_t lsarray[] = { list_coll_0, list_coll_1, 615 list_coll_2, list_coll_3, list_coll_4, list_coll_5, 616 list_coll_6, list_coll_7, list_coll_8, list_coll_9, 617 list_coll_10, list_coll_11, list_coll_12, list_coll_13, 618 list_coll_14, list_coll_15, list_coll_16, list_coll_17, 619 list_coll_18, list_coll_19, list_coll_20 }; 620 621 if (offset <= 20) 622 return lsarray[offset]; 623 624 return list_coll_0; 625} 626 627/* 628 * Compare two sort list items, only by their original string. 629 */ 630int 631list_coll_by_str_only(struct sort_list_item **ss1, struct sort_list_item **ss2) 632{ 633 return top_level_str_coll(((*ss1)->str), ((*ss2)->str)); 634} 635 636/* 637 * Maximum size of a number in the string (before or after decimal point) 638 */ 639#define MAX_NUM_SIZE (128) 640 641/* 642 * Set suffix value 643 */ 644static void 645setsuffix(wchar_t c, unsigned char *si) 646{ 647 switch (c){ 648 case L'k': 649 case L'K': 650 *si = 1; 651 break; 652 case L'M': 653 *si = 2; 654 break; 655 case L'G': 656 *si = 3; 657 break; 658 case L'T': 659 *si = 4; 660 break; 661 case L'P': 662 *si = 5; 663 break; 664 case L'E': 665 *si = 6; 666 break; 667 case L'Z': 668 *si = 7; 669 break; 670 case L'Y': 671 *si = 8; 672 break; 673 default: 674 *si = 0; 675 } 676} 677 678/* 679 * Read string s and parse the string into a fixed-decimal-point number. 680 * sign equals -1 if the number is negative (explicit plus is not allowed, 681 * according to GNU sort's "info sort". 682 * The number part before decimal point is in the smain, after the decimal 683 * point is in sfrac, tail is the pointer to the remainder of the string. 684 */ 685static int 686read_number(struct bwstring *s0, int *sign, wchar_t *smain, size_t *main_len, wchar_t *sfrac, size_t *frac_len, unsigned char *si) 687{ 688 bwstring_iterator s; 689 690 s = bws_begin(s0); 691 692 /* always end the fraction with zero, even if we have no fraction */ 693 sfrac[0] = 0; 694 695 while (iswblank(bws_get_iter_value(s))) 696 s = bws_iterator_inc(s, 1); 697 698 if (bws_get_iter_value(s) == L'-') { 699 *sign = -1; 700 s = bws_iterator_inc(s, 1); 701 } 702 703 // This is '0', not '\0', do not change this 704 while (iswdigit(bws_get_iter_value(s)) && 705 (bws_get_iter_value(s) == L'0')) 706 s = bws_iterator_inc(s, 1); 707 708 while (bws_get_iter_value(s) && *main_len < MAX_NUM_SIZE) { 709 if (iswdigit(bws_get_iter_value(s))) { 710 smain[*main_len] = bws_get_iter_value(s); 711 s = bws_iterator_inc(s, 1); 712 *main_len += 1; 713 } else 714 break; 715 } 716 717 smain[*main_len] = 0; 718 719 if (bws_get_iter_value(s) == L'.') { 720 s = bws_iterator_inc(s, 1); 721 while (iswdigit(bws_get_iter_value(s)) && 722 *frac_len < MAX_NUM_SIZE) { 723 sfrac[*frac_len] = bws_get_iter_value(s); 724 s = bws_iterator_inc(s, 1); 725 *frac_len += 1; 726 } 727 sfrac[*frac_len] = 0; 728 729 while (*frac_len > 0 && sfrac[*frac_len - 1] == L'0') { 730 --(*frac_len); 731 sfrac[*frac_len] = L'\0'; 732 } 733 } 734 735 setsuffix(bws_get_iter_value(s), si); 736 737 if ((*main_len + *frac_len) == 0) 738 *sign = 0; 739 740 return 0; 741} 742 743/* 744 * Implements string sort. 745 */ 746static int 747wstrcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 748{ 749 750 if (debug_sort) { 751 if (offset) 752 printf("; offset=%zu\n", offset); 753 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 754 printf("(%zu)", BWSLEN(kv1->k)); 755 bwsprintf(stdout, kv2->k, ", k2=<", ">"); 756 printf("(%zu)", BWSLEN(kv2->k)); 757 } 758 759 return bwscoll(kv1->k, kv2->k, offset); 760} 761 762/* 763 * Compare two suffixes 764 */ 765static inline int 766cmpsuffix(unsigned char si1, unsigned char si2) 767{ 768 return (char)si1 - (char)si2; 769} 770 771/* 772 * Implements numeric sort for -n and -h. 773 */ 774static int 775numcoll_impl(struct key_value *kv1, struct key_value *kv2, 776 size_t offset __unused, bool use_suffix) 777{ 778 struct bwstring *s1, *s2; 779 wchar_t sfrac1[MAX_NUM_SIZE + 1], sfrac2[MAX_NUM_SIZE + 1]; 780 wchar_t smain1[MAX_NUM_SIZE + 1], smain2[MAX_NUM_SIZE + 1]; 781 int cmp_res, sign1, sign2; 782 size_t frac1, frac2, main1, main2; 783 unsigned char SI1, SI2; 784 bool e1, e2, key1_read, key2_read; 785 786 s1 = kv1->k; 787 s2 = kv2->k; 788 sign1 = sign2 = 0; 789 main1 = main2 = 0; 790 frac1 = frac2 = 0; 791 792 key1_read = key2_read = false; 793 794 if (debug_sort) { 795 bwsprintf(stdout, s1, "; k1=<", ">"); 796 bwsprintf(stdout, s2, ", k2=<", ">"); 797 } 798 799 if (s1 == s2) 800 return 0; 801 802 if (kv1->hint->status == HS_UNINITIALIZED) { 803 /* read the number from the string */ 804 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 805 key1_read = true; 806 kv1->hint->v.nh.n1 = wcstoull(smain1, NULL, 10); 807 if (main1 < 1 && frac1 < 1) 808 kv1->hint->v.nh.empty=true; 809 kv1->hint->v.nh.si = SI1; 810 kv1->hint->status = (kv1->hint->v.nh.n1 != ULLONG_MAX) ? 811 HS_INITIALIZED : HS_ERROR; 812 kv1->hint->v.nh.neg = (sign1 < 0) ? true : false; 813 } 814 815 if (kv2->hint->status == HS_UNINITIALIZED) { 816 /* read the number from the string */ 817 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 818 key2_read = true; 819 kv2->hint->v.nh.n1 = wcstoull(smain2, NULL, 10); 820 if (main2 < 1 && frac2 < 1) 821 kv2->hint->v.nh.empty=true; 822 kv2->hint->v.nh.si = SI2; 823 kv2->hint->status = (kv2->hint->v.nh.n1 != ULLONG_MAX) ? 824 HS_INITIALIZED : HS_ERROR; 825 kv2->hint->v.nh.neg = (sign2 < 0) ? true : false; 826 } 827 828 if (kv1->hint->status == HS_INITIALIZED && kv2->hint->status == 829 HS_INITIALIZED) { 830 unsigned long long n1, n2; 831 bool neg1, neg2; 832 833 e1 = kv1->hint->v.nh.empty; 834 e2 = kv2->hint->v.nh.empty; 835 836 if (e1 && e2) 837 return 0; 838 839 neg1 = kv1->hint->v.nh.neg; 840 neg2 = kv2->hint->v.nh.neg; 841 842 if (neg1 && !neg2) 843 return -1; 844 if (neg2 && !neg1) 845 return 1; 846 847 if (e1) 848 return neg2 ? 1 : -1; 849 else if (e2) 850 return neg1 ? -1 : 1; 851 852 853 if (use_suffix) { 854 cmp_res = cmpsuffix(kv1->hint->v.nh.si, kv2->hint->v.nh.si); 855 if (cmp_res) 856 return neg1 ? -cmp_res : cmp_res; 857 } 858 859 n1 = kv1->hint->v.nh.n1; 860 n2 = kv2->hint->v.nh.n1; 861 if (n1 < n2) 862 return neg1 ? 1 : -1; 863 else if (n1 > n2) 864 return neg1 ? -1 : 1; 865 } 866 867 /* read the numbers from the strings */ 868 if (!key1_read) 869 read_number(s1, &sign1, smain1, &main1, sfrac1, &frac1, &SI1); 870 if (!key2_read) 871 read_number(s2, &sign2, smain2, &main2, sfrac2, &frac2, &SI2); 872 873 e1 = ((main1 + frac1) == 0); 874 e2 = ((main2 + frac2) == 0); 875 876 if (e1 && e2) 877 return 0; 878 879 /* we know the result if the signs are different */ 880 if (sign1 < 0 && sign2 >= 0) 881 return -1; 882 if (sign1 >= 0 && sign2 < 0) 883 return 1; 884 885 if (e1) 886 return (sign2 < 0) ? +1 : -1; 887 else if (e2) 888 return (sign1 < 0) ? -1 : 1; 889 890 if (use_suffix) { 891 cmp_res = cmpsuffix(SI1, SI2); 892 if (cmp_res) 893 return (sign1 < 0) ? -cmp_res : cmp_res; 894 } 895 896 /* if both numbers are empty assume that the strings are equal */ 897 if (main1 < 1 && main2 < 1 && frac1 < 1 && frac2 < 1) 898 return 0; 899 900 /* 901 * if the main part is of different size, we know the result 902 * (because the leading zeros are removed) 903 */ 904 if (main1 < main2) 905 cmp_res = -1; 906 else if (main1 > main2) 907 cmp_res = +1; 908 /* if the sizes are equal then simple non-collate string compare gives the correct result */ 909 else 910 cmp_res = wcscmp(smain1, smain2); 911 912 /* check fraction */ 913 if (!cmp_res) 914 cmp_res = wcscmp(sfrac1, sfrac2); 915 916 if (!cmp_res) 917 return 0; 918 919 /* reverse result if the signs are negative */ 920 if (sign1 < 0 && sign2 < 0) 921 cmp_res = -cmp_res; 922 923 return cmp_res; 924} 925 926/* 927 * Implements numeric sort (-n). 928 */ 929static int 930numcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 931{ 932 return numcoll_impl(kv1, kv2, offset, false); 933} 934 935/* 936 * Implements 'human' numeric sort (-h). 937 */ 938static int 939hnumcoll(struct key_value *kv1, struct key_value *kv2, size_t offset) 940{ 941 return numcoll_impl(kv1, kv2, offset, true); 942} 943 944/* 945 * Implements random sort (-R). 946 */ 947static int 948randomcoll(struct key_value *kv1, struct key_value *kv2, 949 size_t offset __unused) 950{ 951 struct bwstring *s1, *s2; 952 MD5_CTX ctx1, ctx2; 953 char *b1, *b2; 954 955 s1 = kv1->k; 956 s2 = kv2->k; 957 958 if (debug_sort) { 959 bwsprintf(stdout, s1, "; k1=<", ">"); 960 bwsprintf(stdout, s2, ", k2=<", ">"); 961 } 962 963 if (s1 == s2) 964 return 0; 965 966 memcpy(&ctx1, &md5_ctx, sizeof(MD5_CTX)); 967 memcpy(&ctx2, &md5_ctx, sizeof(MD5_CTX)); 968 969 MD5Update(&ctx1, bwsrawdata(s1), bwsrawlen(s1)); 970 MD5Update(&ctx2, bwsrawdata(s2), bwsrawlen(s2)); 971 b1 = MD5End(&ctx1, NULL); 972 b2 = MD5End(&ctx2, NULL); 973 if (b1 == NULL) { 974 if (b2 == NULL) 975 return 0; 976 else { 977 sort_free(b2); 978 return -1; 979 } 980 } else if (b2 == NULL) { 981 sort_free(b1); 982 return 1; 983 } else { 984 int cmp_res; 985 986 cmp_res = strcmp(b1, b2); 987 sort_free(b1); 988 sort_free(b2); 989 990 if (!cmp_res) 991 cmp_res = bwscoll(s1, s2, 0); 992 993 return cmp_res; 994 } 995} 996 997/* 998 * Implements version sort (-V). 999 */ 1000static int 1001versioncoll(struct key_value *kv1, struct key_value *kv2, 1002 size_t offset __unused) 1003{ 1004 struct bwstring *s1, *s2; 1005 1006 s1 = kv1->k; 1007 s2 = kv2->k; 1008 1009 if (debug_sort) { 1010 bwsprintf(stdout, s1, "; k1=<", ">"); 1011 bwsprintf(stdout, s2, ", k2=<", ">"); 1012 } 1013 1014 if (s1 == s2) 1015 return 0; 1016 1017 return vcmp(s1, s2); 1018} 1019 1020/* 1021 * Check for minus infinity 1022 */ 1023static inline bool 1024huge_minus(double d, int err1) 1025{ 1026 if (err1 == ERANGE) 1027 if (d == -HUGE_VAL || d == -HUGE_VALF || d == -HUGE_VALL) 1028 return 1; 1029 1030 return 0; 1031} 1032 1033/* 1034 * Check for plus infinity 1035 */ 1036static inline bool 1037huge_plus(double d, int err1) 1038{ 1039 if (err1 == ERANGE) 1040 if (d == HUGE_VAL || d == HUGE_VALF || d == HUGE_VALL) 1041 return 1; 1042 1043 return 0; 1044} 1045 1046/* 1047 * Check whether a function is a NAN 1048 */ 1049static bool 1050is_nan(double d) 1051{ 1052#if defined(NAN) 1053 return (d == NAN || isnan(d)); 1054#else 1055 return (isnan(d)); 1056#endif 1057} 1058 1059/* 1060 * Compare two NANs 1061 */ 1062static int 1063cmp_nans(double d1, double d2) 1064{ 1065 if (d1 == d2) 1066 return 0; 1067 return d1 < d2 ? -1 : 1; 1068} 1069 1070/* 1071 * Implements general numeric sort (-g). 1072 */ 1073static int 1074gnumcoll(struct key_value *kv1, struct key_value *kv2, 1075 size_t offset __unused) 1076{ 1077 double d1, d2; 1078 int err1, err2; 1079 bool empty1, empty2, key1_read, key2_read; 1080 1081 d1 = d2 = 0; 1082 err1 = err2 = 0; 1083 key1_read = key2_read = false; 1084 1085 if (debug_sort) { 1086 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1087 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1088 } 1089 1090 if (kv1->hint->status == HS_UNINITIALIZED) { 1091 errno = 0; 1092 d1 = bwstod(kv1->k, &empty1); 1093 err1 = errno; 1094 1095 if (empty1) { 1096 kv1->hint->v.gh.notnum = true; 1097 kv1->hint->status = HS_INITIALIZED; 1098 } else if (err1 == 0) { 1099 kv1->hint->v.gh.d = d1; 1100 kv1->hint->v.gh.nan = is_nan(d1); 1101 kv1->hint->status = HS_INITIALIZED; 1102 } else 1103 kv1->hint->status = HS_ERROR; 1104 1105 key1_read = true; 1106 } 1107 1108 if (kv2->hint->status == HS_UNINITIALIZED) { 1109 errno = 0; 1110 d2 = bwstod(kv2->k, &empty2); 1111 err2 = errno; 1112 1113 if (empty2) { 1114 kv2->hint->v.gh.notnum = true; 1115 kv2->hint->status = HS_INITIALIZED; 1116 } else if (err2 == 0) { 1117 kv2->hint->v.gh.d = d2; 1118 kv2->hint->v.gh.nan = is_nan(d2); 1119 kv2->hint->status = HS_INITIALIZED; 1120 } else 1121 kv2->hint->status = HS_ERROR; 1122 1123 key2_read = true; 1124 } 1125 1126 if (kv1->hint->status == HS_INITIALIZED && 1127 kv2->hint->status == HS_INITIALIZED) { 1128#ifdef GNUSORT_COMPATIBILITY 1129 if (kv1->hint->v.gh.notnum) 1130 return kv2->hint->v.gh.notnum ? 0 : -1; 1131 else if (kv2->hint->v.gh.notnum) 1132 return 1; 1133#else 1134 if (kv1->hint->v.gh.notnum && kv2->hint->v.gh.notnum) 1135 return 0; 1136#endif 1137 1138 if (kv1->hint->v.gh.nan) 1139 return kv2->hint->v.gh.nan ? 1140 cmp_nans(kv1->hint->v.gh.d, kv2->hint->v.gh.d) : -1; 1141 else if (kv2->hint->v.gh.nan) 1142 return 1; 1143 1144 d1 = kv1->hint->v.gh.d; 1145 d2 = kv2->hint->v.gh.d; 1146 1147 if (d1 < d2) 1148 return -1; 1149 else if (d1 > d2) 1150 return 1; 1151 else 1152 return 0; 1153 } 1154 1155 if (!key1_read) { 1156 errno = 0; 1157 d1 = bwstod(kv1->k, &empty1); 1158 err1 = errno; 1159 } 1160 1161 if (!key2_read) { 1162 errno = 0; 1163 d2 = bwstod(kv2->k, &empty2); 1164 err2 = errno; 1165 } 1166 1167 /* Non-value case */ 1168#ifdef GNUSORT_COMPATIBILITY 1169 if (empty1) 1170 return empty2 ? 0 : -1; 1171 else if (empty2) 1172 return 1; 1173#else 1174 if (empty1 && empty2) 1175 return 0; 1176#endif 1177 1178 /* NAN case */ 1179 if (is_nan(d1)) 1180 return is_nan(d2) ? cmp_nans(d1, d2) : -1; 1181 else if (is_nan(d2)) 1182 return 1; 1183 1184 /* Infinities */ 1185 if (err1 == ERANGE || err2 == ERANGE) { 1186 /* Minus infinity case */ 1187 if (huge_minus(d1, err1)) { 1188 if (huge_minus(d2, err2)) { 1189 if (d1 == d2) 1190 return 0; 1191 return d1 < d2 ? -1 : 1; 1192 } else 1193 return -1; 1194 1195 } else if (huge_minus(d2, err2)) { 1196 if (huge_minus(d1, err1)) { 1197 if (d1 == d2) 1198 return 0; 1199 return d1 < d2 ? -1 : 1; 1200 } else 1201 return 1; 1202 } 1203 1204 /* Plus infinity case */ 1205 if (huge_plus(d1, err1)) { 1206 if (huge_plus(d2, err2)) { 1207 if (d1 == d2) 1208 return 0; 1209 return d1 < d2 ? -1 : 1; 1210 } else 1211 return 1; 1212 } else if (huge_plus(d2, err2)) { 1213 if (huge_plus(d1, err1)) { 1214 if (d1 == d2) 1215 return 0; 1216 return d1 < d2 ? -1 : 1; 1217 } else 1218 return -1; 1219 } 1220 } 1221 1222 if (d1 == d2) 1223 return 0; 1224 return d1 < d2 ? -1 : 1; 1225} 1226 1227/* 1228 * Implements month sort (-M). 1229 */ 1230static int 1231monthcoll(struct key_value *kv1, struct key_value *kv2, size_t offset __unused) 1232{ 1233 int val1, val2; 1234 bool key1_read, key2_read; 1235 1236 val1 = val2 = 0; 1237 key1_read = key2_read = false; 1238 1239 if (debug_sort) { 1240 bwsprintf(stdout, kv1->k, "; k1=<", ">"); 1241 bwsprintf(stdout, kv2->k, "; k2=<", ">"); 1242 } 1243 1244 if (kv1->hint->status == HS_UNINITIALIZED) { 1245 kv1->hint->v.Mh.m = bws_month_score(kv1->k); 1246 key1_read = true; 1247 kv1->hint->status = HS_INITIALIZED; 1248 } 1249 1250 if (kv2->hint->status == HS_UNINITIALIZED) { 1251 kv2->hint->v.Mh.m = bws_month_score(kv2->k); 1252 key2_read = true; 1253 kv2->hint->status = HS_INITIALIZED; 1254 } 1255 1256 if (kv1->hint->status == HS_INITIALIZED) { 1257 val1 = kv1->hint->v.Mh.m; 1258 key1_read = true; 1259 } 1260 1261 if (kv2->hint->status == HS_INITIALIZED) { 1262 val2 = kv2->hint->v.Mh.m; 1263 key2_read = true; 1264 } 1265 1266 if (!key1_read) 1267 val1 = bws_month_score(kv1->k); 1268 if (!key2_read) 1269 val2 = bws_month_score(kv2->k); 1270 1271 if (val1 == val2) 1272 return 0; 1273 return val1 < val2 ? -1 : 1; 1274}