Serenity Operating System
at master 618 lines 21 kB view raw
1/* 2 * Copyright (c) 2021, the SerenityOS developers. 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/Assertions.h> 8#include <AK/Format.h> 9#include <AK/GenericLexer.h> 10#include <ctype.h> 11#include <stdarg.h> 12#include <stdio.h> 13#include <stdlib.h> 14#include <string.h> 15 16enum class LengthModifier { 17 None, 18 Default, 19 Char, 20 Short, 21 Long, 22 LongLong, 23 IntMax, 24 Size, 25 PtrDiff, 26 LongDouble, 27}; 28 29enum class ConversionSpecifier { 30 Unspecified, 31 Decimal, 32 Integer, 33 Octal, 34 Unsigned, 35 Hex, 36 Floating, 37 String, 38 UseScanList, 39 Character, 40 Pointer, 41 OutputNumberOfBytes, 42 Invalid, 43}; 44 45enum class ReadKind { 46 Normal, 47 Octal, 48 Hex, 49 Infer, 50}; 51 52template<typename T, typename ApT, ReadKind kind = ReadKind::Normal> 53struct ReadElementConcrete { 54 bool operator()(GenericLexer&, va_list) 55 { 56 return false; 57 } 58}; 59 60template<typename ApT, ReadKind kind> 61struct ReadElementConcrete<int, ApT, kind> { 62 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment) 63 { 64 long value = 0; 65 char* endptr = nullptr; 66 auto nptr = lexer.remaining().characters_without_null_termination(); 67 if constexpr (kind == ReadKind::Normal) 68 value = strtol(nptr, &endptr, 10); 69 if constexpr (kind == ReadKind::Octal) 70 value = strtol(nptr, &endptr, 8); 71 if constexpr (kind == ReadKind::Hex) 72 value = strtol(nptr, &endptr, 16); 73 if constexpr (kind == ReadKind::Infer) 74 value = strtol(nptr, &endptr, 0); 75 76 if (!endptr) 77 return false; 78 79 if (endptr == nptr) 80 return false; 81 82 auto diff = endptr - nptr; 83 VERIFY(diff > 0); 84 lexer.ignore((size_t)diff); 85 86 if (!suppress_assignment) { 87 auto* ptr = va_arg(*ap, ApT*); 88 *ptr = value; 89 } 90 return true; 91 } 92}; 93 94template<typename ApT, ReadKind kind> 95struct ReadElementConcrete<char, ApT, kind> { 96 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment) 97 { 98 static_assert(kind == ReadKind::Normal, "Can't read a non-normal character"); 99 100 if (lexer.is_eof()) 101 return false; 102 103 auto ch = lexer.consume(); 104 if (!suppress_assignment) { 105 auto* ptr = va_arg(*ap, ApT*); 106 *ptr = ch; 107 } 108 return true; 109 } 110}; 111 112template<typename ApT, ReadKind kind> 113struct ReadElementConcrete<unsigned, ApT, kind> { 114 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment) 115 { 116 unsigned long value = 0; 117 char* endptr = nullptr; 118 auto nptr = lexer.remaining().characters_without_null_termination(); 119 if constexpr (kind == ReadKind::Normal) 120 value = strtoul(nptr, &endptr, 10); 121 if constexpr (kind == ReadKind::Octal) 122 value = strtoul(nptr, &endptr, 8); 123 if constexpr (kind == ReadKind::Hex) 124 value = strtoul(nptr, &endptr, 16); 125 if constexpr (kind == ReadKind::Infer) 126 value = strtoul(nptr, &endptr, 0); 127 128 if (!endptr) 129 return false; 130 131 if (endptr == nptr) 132 return false; 133 134 auto diff = endptr - nptr; 135 VERIFY(diff > 0); 136 lexer.ignore((size_t)diff); 137 138 if (!suppress_assignment) { 139 auto* ptr = va_arg(*ap, ApT*); 140 *ptr = value; 141 } 142 return true; 143 } 144}; 145 146template<typename ApT, ReadKind kind> 147struct ReadElementConcrete<long long, ApT, kind> { 148 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment) 149 { 150 long long value = 0; 151 char* endptr = nullptr; 152 auto nptr = lexer.remaining().characters_without_null_termination(); 153 if constexpr (kind == ReadKind::Normal) 154 value = strtoll(nptr, &endptr, 10); 155 if constexpr (kind == ReadKind::Octal) 156 value = strtoll(nptr, &endptr, 8); 157 if constexpr (kind == ReadKind::Hex) 158 value = strtoll(nptr, &endptr, 16); 159 if constexpr (kind == ReadKind::Infer) 160 value = strtoll(nptr, &endptr, 0); 161 162 if (!endptr) 163 return false; 164 165 if (endptr == nptr) 166 return false; 167 168 auto diff = endptr - nptr; 169 VERIFY(diff > 0); 170 lexer.ignore((size_t)diff); 171 172 if (!suppress_assignment) { 173 auto* ptr = va_arg(*ap, ApT*); 174 *ptr = value; 175 } 176 return true; 177 } 178}; 179 180template<typename ApT, ReadKind kind> 181struct ReadElementConcrete<unsigned long long, ApT, kind> { 182 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment) 183 { 184 unsigned long long value = 0; 185 char* endptr = nullptr; 186 auto nptr = lexer.remaining().characters_without_null_termination(); 187 if constexpr (kind == ReadKind::Normal) 188 value = strtoull(nptr, &endptr, 10); 189 if constexpr (kind == ReadKind::Octal) 190 value = strtoull(nptr, &endptr, 8); 191 if constexpr (kind == ReadKind::Hex) 192 value = strtoull(nptr, &endptr, 16); 193 if constexpr (kind == ReadKind::Infer) 194 value = strtoull(nptr, &endptr, 0); 195 196 if (!endptr) 197 return false; 198 199 if (endptr == nptr) 200 return false; 201 202 auto diff = endptr - nptr; 203 VERIFY(diff > 0); 204 lexer.ignore((size_t)diff); 205 206 if (!suppress_assignment) { 207 auto* ptr = va_arg(*ap, ApT*); 208 *ptr = value; 209 } 210 return true; 211 } 212}; 213 214template<typename ApT, ReadKind kind> 215struct ReadElementConcrete<float, ApT, kind> { 216 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment) 217 { 218 double value = 0; 219 char* endptr = nullptr; 220 auto nptr = lexer.remaining().characters_without_null_termination(); 221 if constexpr (kind == ReadKind::Normal) 222 value = strtod(nptr, &endptr); 223 else 224 return false; 225 226 if (!endptr) 227 return false; 228 229 if (endptr == nptr) 230 return false; 231 232 auto diff = endptr - nptr; 233 VERIFY(diff > 0); 234 lexer.ignore((size_t)diff); 235 236 if (!suppress_assignment) { 237 auto* ptr = va_arg(*ap, ApT*); 238 *ptr = value; 239 } 240 return true; 241 } 242}; 243 244template<typename T, ReadKind kind> 245struct ReadElement { 246 bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment) 247 { 248 switch (length_modifier) { 249 default: 250 case LengthModifier::None: 251 VERIFY_NOT_REACHED(); 252 case LengthModifier::Default: 253 return ReadElementConcrete<T, T, kind> {}(input_lexer, ap, suppress_assignment); 254 case LengthModifier::Char: 255 return ReadElementConcrete<T, char, kind> {}(input_lexer, ap, suppress_assignment); 256 case LengthModifier::Short: 257 return ReadElementConcrete<T, short, kind> {}(input_lexer, ap, suppress_assignment); 258 case LengthModifier::Long: 259 if constexpr (IsSame<T, int>) 260 return ReadElementConcrete<T, long, kind> {}(input_lexer, ap, suppress_assignment); 261 if constexpr (IsSame<T, unsigned>) 262 return ReadElementConcrete<T, unsigned long, kind> {}(input_lexer, ap, suppress_assignment); 263 if constexpr (IsSame<T, float>) 264 return ReadElementConcrete<int, double, kind> {}(input_lexer, ap, suppress_assignment); 265 return false; 266 case LengthModifier::LongLong: 267 if constexpr (IsSame<T, int>) 268 return ReadElementConcrete<long long, long long, kind> {}(input_lexer, ap, suppress_assignment); 269 if constexpr (IsSame<T, unsigned>) 270 return ReadElementConcrete<unsigned long long, unsigned long long, kind> {}(input_lexer, ap, suppress_assignment); 271 if constexpr (IsSame<T, float>) 272 return ReadElementConcrete<long long, double, kind> {}(input_lexer, ap, suppress_assignment); 273 return false; 274 case LengthModifier::IntMax: 275 return ReadElementConcrete<T, intmax_t, kind> {}(input_lexer, ap, suppress_assignment); 276 case LengthModifier::Size: 277 return ReadElementConcrete<T, size_t, kind> {}(input_lexer, ap, suppress_assignment); 278 case LengthModifier::PtrDiff: 279 return ReadElementConcrete<T, ptrdiff_t, kind> {}(input_lexer, ap, suppress_assignment); 280 case LengthModifier::LongDouble: 281 return ReadElementConcrete<T, long double, kind> {}(input_lexer, ap, suppress_assignment); 282 } 283 } 284}; 285 286template<> 287struct ReadElement<char*, ReadKind::Normal> { 288 ReadElement(StringView scan_set = {}, bool invert = false) 289 : scan_set(scan_set.is_null() ? " \t\n\f\r"sv : scan_set) 290 , invert(scan_set.is_null() ? true : invert) 291 { 292 } 293 294 bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment) 295 { 296 // FIXME: Implement wide strings and such. 297 if (length_modifier != LengthModifier::Default) 298 return false; 299 300 auto str = input_lexer.consume_while([this](auto c) { return this->matches(c); }); 301 if (str.is_empty()) 302 return false; 303 304 if (!suppress_assignment) { 305 auto* ptr = va_arg(*ap, char*); 306 memcpy(ptr, str.characters_without_null_termination(), str.length()); 307 ptr[str.length()] = 0; 308 } 309 310 return true; 311 } 312 313private: 314 bool matches(char c) const 315 { 316 return invert ^ scan_set.contains(c); 317 } 318 319 const StringView scan_set; 320 bool invert { false }; 321}; 322 323template<> 324struct ReadElement<void*, ReadKind::Normal> { 325 bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment) 326 { 327 if (length_modifier != LengthModifier::Default) 328 return false; 329 330 auto str = input_lexer.consume_while([this](auto c) { return this->should_consume(c); }); 331 332 if (count != 8) { 333 fail:; 334 for (size_t i = 0; i < count; ++i) 335 input_lexer.retreat(); 336 return false; 337 } 338 339 char buf[9] { 0 }; 340 memcpy(buf, str.characters_without_null_termination(), 8); 341 buf[8] = 0; 342 char* endptr = nullptr; 343 auto value = strtoull(buf, &endptr, 16); 344 345 if (endptr != &buf[8]) 346 goto fail; 347 348 if (!suppress_assignment) { 349 auto* ptr = va_arg(*ap, void**); 350 memcpy(ptr, &value, sizeof(value)); 351 } 352 return true; 353 } 354 355private: 356 bool should_consume(char c) 357 { 358 if (count == 8) 359 return false; 360 if (!isxdigit(c)) 361 return false; 362 363 ++count; 364 return true; 365 } 366 size_t count { 0 }; 367}; 368 369extern "C" int vsscanf(char const* input, char const* format, va_list ap) 370{ 371 GenericLexer format_lexer { { format, strlen(format) } }; 372 GenericLexer input_lexer { { input, strlen(input) } }; 373 374 int elements_matched = 0; 375 376 va_list copy; 377 __builtin_va_copy(copy, ap); 378 379 while (!format_lexer.is_eof()) { 380 if (format_lexer.next_is(isspace)) { 381 format_lexer.ignore_while(isspace); 382 input_lexer.ignore_while(isspace); 383 } 384 385 if (!format_lexer.next_is('%')) { 386 read_one_literal:; 387 if (format_lexer.is_eof()) 388 break; 389 390 auto next_char = format_lexer.consume(); 391 if (!input_lexer.consume_specific(next_char)) 392 return elements_matched; 393 continue; 394 } 395 396 if (format_lexer.next_is("%%")) { 397 format_lexer.ignore(); 398 goto read_one_literal; 399 } 400 401 format_lexer.ignore(); // '%' 402 403 bool suppress_assignment = false; 404 if (format_lexer.next_is('*')) { 405 suppress_assignment = true; 406 format_lexer.ignore(); 407 } 408 409 // Parse width specification 410 [[maybe_unused]] int width_specifier = 0; 411 if (format_lexer.next_is(isdigit)) { 412 auto width_digits = format_lexer.consume_while([](char c) { return isdigit(c); }); 413 width_specifier = width_digits.to_int().value(); 414 // FIXME: Actually use width specifier 415 } 416 417 bool invert_scanlist = false; 418 StringView scanlist; 419 LengthModifier length_modifier { LengthModifier::None }; 420 ConversionSpecifier conversion_specifier { ConversionSpecifier::Unspecified }; 421 reread_lookahead:; 422 auto format_lookahead = format_lexer.peek(); 423 if (length_modifier == LengthModifier::None) { 424 switch (format_lookahead) { 425 case 'h': 426 if (format_lexer.peek(1) == 'h') { 427 format_lexer.consume(2); 428 length_modifier = LengthModifier::Char; 429 } else { 430 format_lexer.consume(1); 431 length_modifier = LengthModifier::Short; 432 } 433 break; 434 case 'l': 435 if (format_lexer.peek(1) == 'l') { 436 format_lexer.consume(2); 437 length_modifier = LengthModifier::LongLong; 438 } else { 439 format_lexer.consume(1); 440 length_modifier = LengthModifier::Long; 441 } 442 break; 443 case 'j': 444 format_lexer.consume(); 445 length_modifier = LengthModifier::IntMax; 446 break; 447 case 'z': 448 format_lexer.consume(); 449 length_modifier = LengthModifier::Size; 450 break; 451 case 't': 452 format_lexer.consume(); 453 length_modifier = LengthModifier::PtrDiff; 454 break; 455 case 'L': 456 format_lexer.consume(); 457 length_modifier = LengthModifier::LongDouble; 458 break; 459 default: 460 length_modifier = LengthModifier::Default; 461 break; 462 } 463 goto reread_lookahead; 464 } 465 if (conversion_specifier == ConversionSpecifier::Unspecified) { 466 switch (format_lookahead) { 467 case 'd': 468 format_lexer.consume(); 469 conversion_specifier = ConversionSpecifier::Decimal; 470 break; 471 case 'i': 472 format_lexer.consume(); 473 conversion_specifier = ConversionSpecifier::Integer; 474 break; 475 case 'o': 476 format_lexer.consume(); 477 conversion_specifier = ConversionSpecifier::Octal; 478 break; 479 case 'u': 480 format_lexer.consume(); 481 conversion_specifier = ConversionSpecifier::Unsigned; 482 break; 483 case 'x': 484 case 'X': 485 format_lexer.consume(); 486 conversion_specifier = ConversionSpecifier::Hex; 487 break; 488 case 'a': 489 case 'e': 490 case 'f': 491 case 'g': 492 format_lexer.consume(); 493 conversion_specifier = ConversionSpecifier::Floating; 494 break; 495 case 's': 496 format_lexer.consume(); 497 conversion_specifier = ConversionSpecifier::String; 498 break; 499 case '[': 500 format_lexer.consume(); 501 scanlist = format_lexer.consume_until(']'); 502 format_lexer.ignore(); 503 if (scanlist.starts_with('^')) { 504 scanlist = scanlist.substring_view(1); 505 invert_scanlist = true; 506 } 507 conversion_specifier = ConversionSpecifier::UseScanList; 508 break; 509 case 'c': 510 format_lexer.consume(); 511 conversion_specifier = ConversionSpecifier::Character; 512 break; 513 case 'p': 514 format_lexer.consume(); 515 conversion_specifier = ConversionSpecifier::Pointer; 516 break; 517 case 'n': 518 format_lexer.consume(); 519 conversion_specifier = ConversionSpecifier::OutputNumberOfBytes; 520 break; 521 case 'C': 522 format_lexer.consume(); 523 length_modifier = LengthModifier::Long; 524 conversion_specifier = ConversionSpecifier::Character; 525 break; 526 case 'S': 527 format_lexer.consume(); 528 length_modifier = LengthModifier::Long; 529 conversion_specifier = ConversionSpecifier::String; 530 break; 531 default: 532 format_lexer.consume(); 533 conversion_specifier = ConversionSpecifier::Invalid; 534 break; 535 } 536 } 537 538 // Now try to read. 539 switch (conversion_specifier) { 540 case ConversionSpecifier::Invalid: 541 case ConversionSpecifier::Unspecified: 542 default: 543 // "undefined behavior", let's be nice and crash. 544 dbgln("Invalid conversion specifier {} in scanf!", (int)conversion_specifier); 545 VERIFY_NOT_REACHED(); 546 case ConversionSpecifier::Decimal: 547 if (!ReadElement<int, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment)) 548 format_lexer.consume_all(); 549 else if (!suppress_assignment) 550 ++elements_matched; 551 break; 552 case ConversionSpecifier::Integer: 553 if (!ReadElement<int, ReadKind::Infer> {}(length_modifier, input_lexer, &copy, suppress_assignment)) 554 format_lexer.consume_all(); 555 else if (!suppress_assignment) 556 ++elements_matched; 557 break; 558 case ConversionSpecifier::Octal: 559 if (!ReadElement<unsigned, ReadKind::Octal> {}(length_modifier, input_lexer, &copy, suppress_assignment)) 560 format_lexer.consume_all(); 561 else if (!suppress_assignment) 562 ++elements_matched; 563 break; 564 case ConversionSpecifier::Unsigned: 565 if (!ReadElement<unsigned, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment)) 566 format_lexer.consume_all(); 567 else if (!suppress_assignment) 568 ++elements_matched; 569 break; 570 case ConversionSpecifier::Hex: 571 if (!ReadElement<unsigned, ReadKind::Hex> {}(length_modifier, input_lexer, &copy, suppress_assignment)) 572 format_lexer.consume_all(); 573 else if (!suppress_assignment) 574 ++elements_matched; 575 break; 576 case ConversionSpecifier::Floating: 577 if (!ReadElement<float, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment)) 578 format_lexer.consume_all(); 579 else if (!suppress_assignment) 580 ++elements_matched; 581 break; 582 case ConversionSpecifier::String: 583 if (!ReadElement<char*, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment)) 584 format_lexer.consume_all(); 585 else if (!suppress_assignment) 586 ++elements_matched; 587 break; 588 case ConversionSpecifier::UseScanList: 589 if (!ReadElement<char*, ReadKind::Normal> { scanlist, invert_scanlist }(length_modifier, input_lexer, &copy, suppress_assignment)) 590 format_lexer.consume_all(); 591 else if (!suppress_assignment) 592 ++elements_matched; 593 break; 594 case ConversionSpecifier::Character: 595 if (!ReadElement<char, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment)) 596 format_lexer.consume_all(); 597 else if (!suppress_assignment) 598 ++elements_matched; 599 break; 600 case ConversionSpecifier::Pointer: 601 if (!ReadElement<void*, ReadKind::Normal> {}(length_modifier, input_lexer, &copy, suppress_assignment)) 602 format_lexer.consume_all(); 603 else if (!suppress_assignment) 604 ++elements_matched; 605 break; 606 case ConversionSpecifier::OutputNumberOfBytes: { 607 if (!suppress_assignment) { 608 auto* ptr = va_arg(copy, int*); 609 *ptr = input_lexer.tell(); 610 } 611 break; 612 } 613 } 614 } 615 va_end(copy); 616 617 return elements_matched; 618}