Serenity Operating System
1/*
2 * Copyright (c) 2021, the SerenityOS developers.
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/Assertions.h>
8#include <AK/Format.h>
9#include <AK/GenericLexer.h>
10#include <ctype.h>
11#include <stdarg.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15
16enum class LengthModifier {
17 None,
18 Default,
19 Char,
20 Short,
21 Long,
22 LongLong,
23 IntMax,
24 Size,
25 PtrDiff,
26 LongDouble,
27};
28
29enum class ConversionSpecifier {
30 Unspecified,
31 Decimal,
32 Integer,
33 Octal,
34 Unsigned,
35 Hex,
36 Floating,
37 String,
38 UseScanList,
39 Character,
40 Pointer,
41 OutputNumberOfBytes,
42 Invalid,
43};
44
45enum class ReadKind {
46 Normal,
47 Octal,
48 Hex,
49 Infer,
50};
51
52template<typename T, typename ApT, ReadKind kind = ReadKind::Normal>
53struct ReadElementConcrete {
54 bool operator()(GenericLexer&, va_list)
55 {
56 return false;
57 }
58};
59
60template<typename ApT, ReadKind kind>
61struct ReadElementConcrete<int, ApT, kind> {
62 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
63 {
64 long value = 0;
65 char* endptr = nullptr;
66 auto nptr = lexer.remaining().characters_without_null_termination();
67 if constexpr (kind == ReadKind::Normal)
68 value = strtol(nptr, &endptr, 10);
69 if constexpr (kind == ReadKind::Octal)
70 value = strtol(nptr, &endptr, 8);
71 if constexpr (kind == ReadKind::Hex)
72 value = strtol(nptr, &endptr, 16);
73 if constexpr (kind == ReadKind::Infer)
74 value = strtol(nptr, &endptr, 0);
75
76 if (!endptr)
77 return false;
78
79 if (endptr == nptr)
80 return false;
81
82 auto diff = endptr - nptr;
83 VERIFY(diff > 0);
84 lexer.ignore((size_t)diff);
85
86 if (!suppress_assignment) {
87 auto* ptr = va_arg(*ap, ApT*);
88 *ptr = value;
89 }
90 return true;
91 }
92};
93
94template<typename ApT, ReadKind kind>
95struct ReadElementConcrete<char, ApT, kind> {
96 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
97 {
98 static_assert(kind == ReadKind::Normal, "Can't read a non-normal character");
99
100 if (lexer.is_eof())
101 return false;
102
103 auto ch = lexer.consume();
104 if (!suppress_assignment) {
105 auto* ptr = va_arg(*ap, ApT*);
106 *ptr = ch;
107 }
108 return true;
109 }
110};
111
112template<typename ApT, ReadKind kind>
113struct ReadElementConcrete<unsigned, ApT, kind> {
114 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
115 {
116 unsigned long value = 0;
117 char* endptr = nullptr;
118 auto nptr = lexer.remaining().characters_without_null_termination();
119 if constexpr (kind == ReadKind::Normal)
120 value = strtoul(nptr, &endptr, 10);
121 if constexpr (kind == ReadKind::Octal)
122 value = strtoul(nptr, &endptr, 8);
123 if constexpr (kind == ReadKind::Hex)
124 value = strtoul(nptr, &endptr, 16);
125 if constexpr (kind == ReadKind::Infer)
126 value = strtoul(nptr, &endptr, 0);
127
128 if (!endptr)
129 return false;
130
131 if (endptr == nptr)
132 return false;
133
134 auto diff = endptr - nptr;
135 VERIFY(diff > 0);
136 lexer.ignore((size_t)diff);
137
138 if (!suppress_assignment) {
139 auto* ptr = va_arg(*ap, ApT*);
140 *ptr = value;
141 }
142 return true;
143 }
144};
145
146template<typename ApT, ReadKind kind>
147struct ReadElementConcrete<long long, ApT, kind> {
148 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
149 {
150 long long value = 0;
151 char* endptr = nullptr;
152 auto nptr = lexer.remaining().characters_without_null_termination();
153 if constexpr (kind == ReadKind::Normal)
154 value = strtoll(nptr, &endptr, 10);
155 if constexpr (kind == ReadKind::Octal)
156 value = strtoll(nptr, &endptr, 8);
157 if constexpr (kind == ReadKind::Hex)
158 value = strtoll(nptr, &endptr, 16);
159 if constexpr (kind == ReadKind::Infer)
160 value = strtoll(nptr, &endptr, 0);
161
162 if (!endptr)
163 return false;
164
165 if (endptr == nptr)
166 return false;
167
168 auto diff = endptr - nptr;
169 VERIFY(diff > 0);
170 lexer.ignore((size_t)diff);
171
172 if (!suppress_assignment) {
173 auto* ptr = va_arg(*ap, ApT*);
174 *ptr = value;
175 }
176 return true;
177 }
178};
179
180template<typename ApT, ReadKind kind>
181struct ReadElementConcrete<unsigned long long, ApT, kind> {
182 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
183 {
184 unsigned long long value = 0;
185 char* endptr = nullptr;
186 auto nptr = lexer.remaining().characters_without_null_termination();
187 if constexpr (kind == ReadKind::Normal)
188 value = strtoull(nptr, &endptr, 10);
189 if constexpr (kind == ReadKind::Octal)
190 value = strtoull(nptr, &endptr, 8);
191 if constexpr (kind == ReadKind::Hex)
192 value = strtoull(nptr, &endptr, 16);
193 if constexpr (kind == ReadKind::Infer)
194 value = strtoull(nptr, &endptr, 0);
195
196 if (!endptr)
197 return false;
198
199 if (endptr == nptr)
200 return false;
201
202 auto diff = endptr - nptr;
203 VERIFY(diff > 0);
204 lexer.ignore((size_t)diff);
205
206 if (!suppress_assignment) {
207 auto* ptr = va_arg(*ap, ApT*);
208 *ptr = value;
209 }
210 return true;
211 }
212};
213
214template<typename ApT, ReadKind kind>
215struct ReadElementConcrete<float, ApT, kind> {
216 bool operator()(GenericLexer& lexer, va_list* ap, bool suppress_assignment)
217 {
218 double value = 0;
219 char* endptr = nullptr;
220 auto nptr = lexer.remaining().characters_without_null_termination();
221 if constexpr (kind == ReadKind::Normal)
222 value = strtod(nptr, &endptr);
223 else
224 return false;
225
226 if (!endptr)
227 return false;
228
229 if (endptr == nptr)
230 return false;
231
232 auto diff = endptr - nptr;
233 VERIFY(diff > 0);
234 lexer.ignore((size_t)diff);
235
236 if (!suppress_assignment) {
237 auto* ptr = va_arg(*ap, ApT*);
238 *ptr = value;
239 }
240 return true;
241 }
242};
243
244template<typename T, ReadKind kind>
245struct ReadElement {
246 bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment)
247 {
248 switch (length_modifier) {
249 default:
250 case LengthModifier::None:
251 VERIFY_NOT_REACHED();
252 case LengthModifier::Default:
253 return ReadElementConcrete<T, T, kind> {}(input_lexer, ap, suppress_assignment);
254 case LengthModifier::Char:
255 return ReadElementConcrete<T, char, kind> {}(input_lexer, ap, suppress_assignment);
256 case LengthModifier::Short:
257 return ReadElementConcrete<T, short, kind> {}(input_lexer, ap, suppress_assignment);
258 case LengthModifier::Long:
259 if constexpr (IsSame<T, int>)
260 return ReadElementConcrete<T, long, kind> {}(input_lexer, ap, suppress_assignment);
261 if constexpr (IsSame<T, unsigned>)
262 return ReadElementConcrete<T, unsigned long, kind> {}(input_lexer, ap, suppress_assignment);
263 if constexpr (IsSame<T, float>)
264 return ReadElementConcrete<int, double, kind> {}(input_lexer, ap, suppress_assignment);
265 return false;
266 case LengthModifier::LongLong:
267 if constexpr (IsSame<T, int>)
268 return ReadElementConcrete<long long, long long, kind> {}(input_lexer, ap, suppress_assignment);
269 if constexpr (IsSame<T, unsigned>)
270 return ReadElementConcrete<unsigned long long, unsigned long long, kind> {}(input_lexer, ap, suppress_assignment);
271 if constexpr (IsSame<T, float>)
272 return ReadElementConcrete<long long, double, kind> {}(input_lexer, ap, suppress_assignment);
273 return false;
274 case LengthModifier::IntMax:
275 return ReadElementConcrete<T, intmax_t, kind> {}(input_lexer, ap, suppress_assignment);
276 case LengthModifier::Size:
277 return ReadElementConcrete<T, size_t, kind> {}(input_lexer, ap, suppress_assignment);
278 case LengthModifier::PtrDiff:
279 return ReadElementConcrete<T, ptrdiff_t, kind> {}(input_lexer, ap, suppress_assignment);
280 case LengthModifier::LongDouble:
281 return ReadElementConcrete<T, long double, kind> {}(input_lexer, ap, suppress_assignment);
282 }
283 }
284};
285
286template<>
287struct ReadElement<char*, ReadKind::Normal> {
288 ReadElement(StringView scan_set = {}, bool invert = false)
289 : scan_set(scan_set.is_null() ? " \t\n\f\r"sv : scan_set)
290 , invert(scan_set.is_null() ? true : invert)
291 {
292 }
293
294 bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment)
295 {
296 // FIXME: Implement wide strings and such.
297 if (length_modifier != LengthModifier::Default)
298 return false;
299
300 auto str = input_lexer.consume_while([this](auto c) { return this->matches(c); });
301 if (str.is_empty())
302 return false;
303
304 if (!suppress_assignment) {
305 auto* ptr = va_arg(*ap, char*);
306 memcpy(ptr, str.characters_without_null_termination(), str.length());
307 ptr[str.length()] = 0;
308 }
309
310 return true;
311 }
312
313private:
314 bool matches(char c) const
315 {
316 return invert ^ scan_set.contains(c);
317 }
318
319 const StringView scan_set;
320 bool invert { false };
321};
322
323template<>
324struct ReadElement<void*, ReadKind::Normal> {
325 bool operator()(LengthModifier length_modifier, GenericLexer& input_lexer, va_list* ap, bool suppress_assignment)
326 {
327 if (length_modifier != LengthModifier::Default)
328 return false;
329
330 auto str = input_lexer.consume_while([this](auto c) { return this->should_consume(c); });
331
332 if (count != 8) {
333 fail:;
334 for (size_t i = 0; i < count; ++i)
335 input_lexer.retreat();
336 return false;
337 }
338
339 char buf[9] { 0 };
340 memcpy(buf, str.characters_without_null_termination(), 8);
341 buf[8] = 0;
342 char* endptr = nullptr;
343 auto value = strtoull(buf, &endptr, 16);
344
345 if (endptr != &buf[8])
346 goto fail;
347
348 if (!suppress_assignment) {
349 auto* ptr = va_arg(*ap, void**);
350 memcpy(ptr, &value, sizeof(value));
351 }
352 return true;
353 }
354
355private:
356 bool should_consume(char c)
357 {
358 if (count == 8)
359 return false;
360 if (!isxdigit(c))
361 return false;
362
363 ++count;
364 return true;
365 }
366 size_t count { 0 };
367};
368
369extern "C" int vsscanf(char const* input, char const* format, va_list ap)
370{
371 GenericLexer format_lexer { { format, strlen(format) } };
372 GenericLexer input_lexer { { input, strlen(input) } };
373
374 int elements_matched = 0;
375
376 va_list copy;
377 __builtin_va_copy(copy, ap);
378
379 while (!format_lexer.is_eof()) {
380 if (format_lexer.next_is(isspace)) {
381 format_lexer.ignore_while(isspace);
382 input_lexer.ignore_while(isspace);
383 }
384
385 if (!format_lexer.next_is('%')) {
386 read_one_literal:;
387 if (format_lexer.is_eof())
388 break;
389
390 auto next_char = format_lexer.consume();
391 if (!input_lexer.consume_specific(next_char))
392 return elements_matched;
393 continue;
394 }
395
396 if (format_lexer.next_is("%%")) {
397 format_lexer.ignore();
398 goto read_one_literal;
399 }
400
401 format_lexer.ignore(); // '%'
402
403 bool suppress_assignment = false;
404 if (format_lexer.next_is('*')) {
405 suppress_assignment = true;
406 format_lexer.ignore();
407 }
408
409 // Parse width specification
410 [[maybe_unused]] int width_specifier = 0;
411 if (format_lexer.next_is(isdigit)) {
412 auto width_digits = format_lexer.consume_while([](char c) { return isdigit(c); });
413 width_specifier = width_digits.to_int().value();
414 // FIXME: Actually use width specifier
415 }
416
417 bool invert_scanlist = false;
418 StringView scanlist;
419 LengthModifier length_modifier { LengthModifier::None };
420 ConversionSpecifier conversion_specifier { ConversionSpecifier::Unspecified };
421 reread_lookahead:;
422 auto format_lookahead = format_lexer.peek();
423 if (length_modifier == LengthModifier::None) {
424 switch (format_lookahead) {
425 case 'h':
426 if (format_lexer.peek(1) == 'h') {
427 format_lexer.consume(2);
428 length_modifier = LengthModifier::Char;
429 } else {
430 format_lexer.consume(1);
431 length_modifier = LengthModifier::Short;
432 }
433 break;
434 case 'l':
435 if (format_lexer.peek(1) == 'l') {
436 format_lexer.consume(2);
437 length_modifier = LengthModifier::LongLong;
438 } else {
439 format_lexer.consume(1);
440 length_modifier = LengthModifier::Long;
441 }
442 break;
443 case 'j':
444 format_lexer.consume();
445 length_modifier = LengthModifier::IntMax;
446 break;
447 case 'z':
448 format_lexer.consume();
449 length_modifier = LengthModifier::Size;
450 break;
451 case 't':
452 format_lexer.consume();
453 length_modifier = LengthModifier::PtrDiff;
454 break;
455 case 'L':
456 format_lexer.consume();
457 length_modifier = LengthModifier::LongDouble;
458 break;
459 default:
460 length_modifier = LengthModifier::Default;
461 break;
462 }
463 goto reread_lookahead;
464 }
465 if (conversion_specifier == ConversionSpecifier::Unspecified) {
466 switch (format_lookahead) {
467 case 'd':
468 format_lexer.consume();
469 conversion_specifier = ConversionSpecifier::Decimal;
470 break;
471 case 'i':
472 format_lexer.consume();
473 conversion_specifier = ConversionSpecifier::Integer;
474 break;
475 case 'o':
476 format_lexer.consume();
477 conversion_specifier = ConversionSpecifier::Octal;
478 break;
479 case 'u':
480 format_lexer.consume();
481 conversion_specifier = ConversionSpecifier::Unsigned;
482 break;
483 case 'x':
484 case 'X':
485 format_lexer.consume();
486 conversion_specifier = ConversionSpecifier::Hex;
487 break;
488 case 'a':
489 case 'e':
490 case 'f':
491 case 'g':
492 format_lexer.consume();
493 conversion_specifier = ConversionSpecifier::Floating;
494 break;
495 case 's':
496 format_lexer.consume();
497 conversion_specifier = ConversionSpecifier::String;
498 break;
499 case '[':
500 format_lexer.consume();
501 scanlist = format_lexer.consume_until(']');
502 format_lexer.ignore();
503 if (scanlist.starts_with('^')) {
504 scanlist = scanlist.substring_view(1);
505 invert_scanlist = true;
506 }
507 conversion_specifier = ConversionSpecifier::UseScanList;
508 break;
509 case 'c':
510 format_lexer.consume();
511 conversion_specifier = ConversionSpecifier::Character;
512 break;
513 case 'p':
514 format_lexer.consume();
515 conversion_specifier = ConversionSpecifier::Pointer;
516 break;
517 case 'n':
518 format_lexer.consume();
519 conversion_specifier = ConversionSpecifier::OutputNumberOfBytes;
520 break;
521 case 'C':
522 format_lexer.consume();
523 length_modifier = LengthModifier::Long;
524 conversion_specifier = ConversionSpecifier::Character;
525 break;
526 case 'S':
527 format_lexer.consume();
528 length_modifier = LengthModifier::Long;
529 conversion_specifier = ConversionSpecifier::String;
530 break;
531 default:
532 format_lexer.consume();
533 conversion_specifier = ConversionSpecifier::Invalid;
534 break;
535 }
536 }
537
538 // Now try to read.
539 switch (conversion_specifier) {
540 case ConversionSpecifier::Invalid:
541 case ConversionSpecifier::Unspecified:
542 default:
543 // "undefined behavior", let's be nice and crash.
544 dbgln("Invalid conversion specifier {} in scanf!", (int)conversion_specifier);
545 VERIFY_NOT_REACHED();
546 case ConversionSpecifier::Decimal:
547 if (!ReadElement<int, ReadKind::Normal> {}(length_modifier, input_lexer, ©, suppress_assignment))
548 format_lexer.consume_all();
549 else if (!suppress_assignment)
550 ++elements_matched;
551 break;
552 case ConversionSpecifier::Integer:
553 if (!ReadElement<int, ReadKind::Infer> {}(length_modifier, input_lexer, ©, suppress_assignment))
554 format_lexer.consume_all();
555 else if (!suppress_assignment)
556 ++elements_matched;
557 break;
558 case ConversionSpecifier::Octal:
559 if (!ReadElement<unsigned, ReadKind::Octal> {}(length_modifier, input_lexer, ©, suppress_assignment))
560 format_lexer.consume_all();
561 else if (!suppress_assignment)
562 ++elements_matched;
563 break;
564 case ConversionSpecifier::Unsigned:
565 if (!ReadElement<unsigned, ReadKind::Normal> {}(length_modifier, input_lexer, ©, suppress_assignment))
566 format_lexer.consume_all();
567 else if (!suppress_assignment)
568 ++elements_matched;
569 break;
570 case ConversionSpecifier::Hex:
571 if (!ReadElement<unsigned, ReadKind::Hex> {}(length_modifier, input_lexer, ©, suppress_assignment))
572 format_lexer.consume_all();
573 else if (!suppress_assignment)
574 ++elements_matched;
575 break;
576 case ConversionSpecifier::Floating:
577 if (!ReadElement<float, ReadKind::Normal> {}(length_modifier, input_lexer, ©, suppress_assignment))
578 format_lexer.consume_all();
579 else if (!suppress_assignment)
580 ++elements_matched;
581 break;
582 case ConversionSpecifier::String:
583 if (!ReadElement<char*, ReadKind::Normal> {}(length_modifier, input_lexer, ©, suppress_assignment))
584 format_lexer.consume_all();
585 else if (!suppress_assignment)
586 ++elements_matched;
587 break;
588 case ConversionSpecifier::UseScanList:
589 if (!ReadElement<char*, ReadKind::Normal> { scanlist, invert_scanlist }(length_modifier, input_lexer, ©, suppress_assignment))
590 format_lexer.consume_all();
591 else if (!suppress_assignment)
592 ++elements_matched;
593 break;
594 case ConversionSpecifier::Character:
595 if (!ReadElement<char, ReadKind::Normal> {}(length_modifier, input_lexer, ©, suppress_assignment))
596 format_lexer.consume_all();
597 else if (!suppress_assignment)
598 ++elements_matched;
599 break;
600 case ConversionSpecifier::Pointer:
601 if (!ReadElement<void*, ReadKind::Normal> {}(length_modifier, input_lexer, ©, suppress_assignment))
602 format_lexer.consume_all();
603 else if (!suppress_assignment)
604 ++elements_matched;
605 break;
606 case ConversionSpecifier::OutputNumberOfBytes: {
607 if (!suppress_assignment) {
608 auto* ptr = va_arg(copy, int*);
609 *ptr = input_lexer.tell();
610 }
611 break;
612 }
613 }
614 }
615 va_end(copy);
616
617 return elements_matched;
618}