Serenity Operating System
1/*
2 * Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/CharacterTypes.h>
8#include <AK/Debug.h>
9#include <AK/StringUtils.h>
10#include <Shell/PosixParser.h>
11
12static Shell::AST::Position empty_position()
13{
14 return { 0, 0, { 0, 0 }, { 0, 0 } };
15}
16
17template<typename T, typename... Ts>
18static inline bool is_one_of(T const& value, Ts const&... values)
19{
20 return ((value == values) || ...);
21}
22
23static inline bool is_io_operator(Shell::Posix::Token const& token)
24{
25 using namespace Shell::Posix;
26 return is_one_of(token.type,
27 Token::Type::Less, Token::Type::Great,
28 Token::Type::LessAnd, Token::Type::GreatAnd,
29 Token::Type::DoubleLess, Token::Type::DoubleGreat,
30 Token::Type::DoubleLessDash, Token::Type::LessGreat,
31 Token::Type::Clobber);
32}
33
34static inline bool is_separator(Shell::Posix::Token const& token)
35{
36 using namespace Shell::Posix;
37 return is_one_of(token.type,
38 Token::Type::Semicolon, Token::Type::Newline,
39 Token::Type::AndIf, Token::Type::OrIf,
40 Token::Type::Pipe,
41 Token::Type::And);
42}
43
44static inline bool is_a_reserved_word_position(Shell::Posix::Token const& token, Optional<Shell::Posix::Token> const& previous_token, Optional<Shell::Posix::Token> const& previous_previous_token)
45{
46 using namespace Shell::Posix;
47
48 auto is_start_of_command = !previous_token.has_value()
49 || previous_token->value.is_empty()
50 || is_separator(*previous_token)
51 || is_one_of(previous_token->type,
52 Token::Type::OpenParen, Token::Type::CloseParen, Token::Type::Newline, Token::Type::DoubleSemicolon,
53 Token::Type::Semicolon, Token::Type::Pipe, Token::Type::OrIf, Token::Type::AndIf);
54 if (is_start_of_command)
55 return true;
56
57 if (!previous_token.has_value())
58 return false;
59
60 auto previous_is_reserved_word = is_one_of(previous_token->value,
61 "for"sv, "in"sv, "case"sv, "if"sv, "then"sv, "else"sv,
62 "elif"sv, "while"sv, "until"sv, "do"sv, "done"sv, "esac"sv,
63 "fi"sv, "!"sv, "{"sv, "}"sv);
64
65 if (previous_is_reserved_word)
66 return true;
67
68 if (!previous_previous_token.has_value())
69 return false;
70
71 auto is_third_in_case = previous_previous_token->value == "case"sv
72 && token.type == Token::Type::Token && token.value == "in"sv;
73
74 if (is_third_in_case)
75 return true;
76
77 auto is_third_in_for = previous_previous_token->value == "for"sv
78 && token.type == Token::Type::Token && is_one_of(token.value, "in"sv, "do"sv);
79
80 return is_third_in_for;
81}
82
83static inline bool is_reserved(Shell::Posix::Token const& token)
84{
85 using namespace Shell::Posix;
86 return is_one_of(token.type,
87 Token::Type::If, Token::Type::Then, Token::Type::Else,
88 Token::Type::Elif, Token::Type::Fi, Token::Type::Do,
89 Token::Type::Done, Token::Type::Case, Token::Type::Esac,
90 Token::Type::While, Token::Type::Until, Token::Type::For,
91 Token::Type::In, Token::Type::OpenBrace, Token::Type::CloseBrace,
92 Token::Type::Bang);
93}
94
95static inline bool is_valid_name(StringView word)
96{
97 // Dr.POSIX: a word consisting solely of underscores, digits, and alphabetics from the portable character set. The first character of a name is not a digit.
98 return !word.is_empty()
99 && !is_ascii_digit(word[0])
100 && all_of(word, [](auto ch) { return is_ascii_alphanumeric(ch) || ch == '_'; });
101}
102
103namespace Shell::Posix {
104ErrorOr<void> Parser::fill_token_buffer(Optional<Reduction> starting_reduction)
105{
106 for (;;) {
107 auto token = TRY(next_expanded_token(starting_reduction));
108 if (!token.has_value())
109 break;
110#if SHELL_POSIX_PARSER_DEBUG
111 DeprecatedString position = "(~)";
112 if (token->position.has_value())
113 position = DeprecatedString::formatted("{}:{}", token->position->start_offset, token->position->end_offset);
114 DeprecatedString expansions = "";
115 for (auto& exp : token->resolved_expansions)
116 exp.visit(
117 [&](ResolvedParameterExpansion& x) { expansions = DeprecatedString::formatted("{}param({}),", expansions, x.to_deprecated_string()); },
118 [&](ResolvedCommandExpansion& x) { expansions = DeprecatedString::formatted("{}command({:p})", expansions, x.command.ptr()); });
119 DeprecatedString rexpansions = "";
120 for (auto& exp : token->expansions)
121 exp.visit(
122 [&](ParameterExpansion& x) { rexpansions = DeprecatedString::formatted("{}param({}) from {} to {},", rexpansions, x.parameter.string_view(), x.range.start, x.range.length); },
123 [&](auto&) { rexpansions = DeprecatedString::formatted("{}...,", rexpansions); });
124 dbgln("Token @ {}: '{}' (type {}) - parsed expansions: {} - raw expansions: {}", position, token->value.replace("\n"sv, "\\n"sv, ReplaceMode::All), token->type_name(), expansions, rexpansions);
125#endif
126 }
127 m_token_index = 0;
128
129 return {};
130}
131
132RefPtr<AST::Node> Parser::parse()
133{
134 return parse_complete_command().release_value_but_fixme_should_propagate_errors();
135}
136
137void Parser::handle_heredoc_contents()
138{
139 while (!eof() && m_token_buffer[m_token_index].type == Token::Type::HeredocContents) {
140 auto& token = m_token_buffer[m_token_index++];
141 auto entry = m_unprocessed_heredoc_entries.get(token.relevant_heredoc_key.value());
142 if (!entry.has_value()) {
143 error(token, "Discarding unexpected heredoc contents for key '{}'", *token.relevant_heredoc_key);
144 continue;
145 }
146
147 auto& heredoc = **entry;
148
149 RefPtr<AST::Node> contents;
150 if (heredoc.allow_interpolation()) {
151 Parser parser { token.value, m_in_interactive_mode, Reduction::HeredocContents };
152 contents = parser.parse_word().release_value_but_fixme_should_propagate_errors();
153 } else {
154 contents = make_ref_counted<AST::StringLiteral>(token.position.value_or(empty_position()), String::from_utf8(token.value).release_value_but_fixme_should_propagate_errors(), AST::StringLiteral::EnclosureType::None);
155 }
156
157 if (contents)
158 heredoc.set_contents(contents);
159 m_unprocessed_heredoc_entries.remove(*token.relevant_heredoc_key);
160 }
161}
162
163ErrorOr<Optional<Token>> Parser::next_expanded_token(Optional<Reduction> starting_reduction)
164{
165 while (m_token_buffer.find_if([](auto& token) { return token.type == Token::Type::Eof; }).is_end()) {
166 auto tokens = TRY(m_lexer.batch_next(starting_reduction));
167 auto expanded = perform_expansions(move(tokens));
168 m_token_buffer.extend(expanded);
169 }
170
171 if (m_token_buffer.size() == m_token_index)
172 return OptionalNone {};
173
174 return m_token_buffer[m_token_index++];
175}
176
177Vector<Token> Parser::perform_expansions(Vector<Token> tokens)
178{
179 if (tokens.is_empty())
180 return {};
181
182 Vector<Token> expanded_tokens;
183 auto previous_token = Optional<Token>();
184 auto previous_previous_token = Optional<Token>();
185 auto tokens_taken_from_buffer = 0;
186
187 expanded_tokens.ensure_capacity(tokens.size());
188
189 auto swap_expansions = [&] {
190 if (previous_previous_token.has_value())
191 expanded_tokens.append(previous_previous_token.release_value());
192
193 if (previous_token.has_value())
194 expanded_tokens.append(previous_token.release_value());
195
196 for (; tokens_taken_from_buffer > 0; tokens_taken_from_buffer--)
197 m_token_buffer.append(expanded_tokens.take_first());
198
199 swap(tokens, expanded_tokens);
200 expanded_tokens.clear_with_capacity();
201 };
202
203 // (1) join all consecutive newlines (this works around a grammar ambiguity)
204 auto previous_was_newline = !m_token_buffer.is_empty() && m_token_buffer.last().type == Token::Type::Newline;
205 for (auto& token : tokens) {
206 if (token.type == Token::Type::Newline) {
207 if (previous_was_newline)
208 continue;
209 previous_was_newline = true;
210 } else {
211 previous_was_newline = false;
212 }
213 expanded_tokens.append(move(token));
214 }
215
216 swap_expansions();
217
218 // (2) Detect reserved words
219 if (m_token_buffer.size() >= 1) {
220 previous_token = m_token_buffer.take_last();
221 tokens_taken_from_buffer++;
222 }
223 if (m_token_buffer.size() >= 1) {
224 previous_previous_token = m_token_buffer.take_last();
225 tokens_taken_from_buffer++;
226 }
227
228 auto check_reserved_word = [&](auto& token) {
229 if (is_a_reserved_word_position(token, previous_token, previous_previous_token)) {
230 if (token.value == "if"sv)
231 token.type = Token::Type::If;
232 else if (token.value == "then"sv)
233 token.type = Token::Type::Then;
234 else if (token.value == "else"sv)
235 token.type = Token::Type::Else;
236 else if (token.value == "elif"sv)
237 token.type = Token::Type::Elif;
238 else if (token.value == "fi"sv)
239 token.type = Token::Type::Fi;
240 else if (token.value == "while"sv)
241 token.type = Token::Type::While;
242 else if (token.value == "until"sv)
243 token.type = Token::Type::Until;
244 else if (token.value == "do"sv)
245 token.type = Token::Type::Do;
246 else if (token.value == "done"sv)
247 token.type = Token::Type::Done;
248 else if (token.value == "case"sv)
249 token.type = Token::Type::Case;
250 else if (token.value == "esac"sv)
251 token.type = Token::Type::Esac;
252 else if (token.value == "for"sv)
253 token.type = Token::Type::For;
254 else if (token.value == "in"sv)
255 token.type = Token::Type::In;
256 else if (token.value == "!"sv)
257 token.type = Token::Type::Bang;
258 else if (token.value == "{"sv)
259 token.type = Token::Type::OpenBrace;
260 else if (token.value == "}"sv)
261 token.type = Token::Type::CloseBrace;
262 else if (token.type == Token::Type::Token)
263 token.type = Token::Type::Word;
264 } else if (token.type == Token::Type::Token) {
265 token.type = Token::Type::Word;
266 }
267 };
268
269 for (auto& token : tokens) {
270 if (!previous_token.has_value()) {
271 check_reserved_word(token);
272 previous_token = token;
273 continue;
274 }
275 if (!previous_previous_token.has_value()) {
276 check_reserved_word(token);
277 previous_previous_token = move(previous_token);
278 previous_token = token;
279 continue;
280 }
281
282 check_reserved_word(token);
283 expanded_tokens.append(exchange(*previous_previous_token, exchange(*previous_token, move(token))));
284 }
285
286 swap_expansions();
287
288 // (3) Detect io_number tokens
289 previous_token = Optional<Token>();
290 tokens_taken_from_buffer = 0;
291 if (m_token_buffer.size() >= 1) {
292 previous_token = m_token_buffer.take_last();
293 tokens_taken_from_buffer++;
294 }
295
296 for (auto& token : tokens) {
297 if (!previous_token.has_value()) {
298 previous_token = token;
299 continue;
300 }
301
302 if (is_io_operator(token) && previous_token->type == Token::Type::Word && all_of(previous_token->value.bytes_as_string_view(), is_ascii_digit)) {
303 previous_token->type = Token::Type::IoNumber;
304 }
305
306 expanded_tokens.append(exchange(*previous_token, move(token)));
307 }
308
309 swap_expansions();
310
311 // (4) Try to identify simple commands
312 previous_token = Optional<Token>();
313 tokens_taken_from_buffer = 0;
314
315 if (m_token_buffer.size() >= 1) {
316 previous_token = m_token_buffer.take_last();
317 tokens_taken_from_buffer++;
318 }
319
320 for (auto& token : tokens) {
321 if (!previous_token.has_value()) {
322 token.could_be_start_of_a_simple_command = true;
323 previous_token = token;
324 continue;
325 }
326
327 token.could_be_start_of_a_simple_command = is_one_of(previous_token->type, Token::Type::OpenParen, Token::Type::CloseParen, Token::Type::Newline)
328 || is_separator(*previous_token)
329 || (!is_reserved(*previous_token) && is_reserved(token));
330
331 expanded_tokens.append(exchange(*previous_token, move(token)));
332 }
333
334 swap_expansions();
335
336 // (5) Detect assignment words
337 for (auto& token : tokens) {
338 if (token.could_be_start_of_a_simple_command)
339 m_disallow_command_prefix = false;
340
341 // Check if we're in a command prefix (could be an assignment)
342 if (!m_disallow_command_prefix && token.type == Token::Type::Word && token.value.contains('=')) {
343 // If the word before '=' is a valid name, this is an assignment
344 auto equal_offset = *token.value.find_byte_offset('=');
345 if (is_valid_name(token.value.bytes_as_string_view().substring_view(0, equal_offset)))
346 token.type = Token::Type::AssignmentWord;
347 else
348 m_disallow_command_prefix = true;
349 } else {
350 m_disallow_command_prefix = true;
351 }
352
353 expanded_tokens.append(move(token));
354 }
355
356 swap_expansions();
357
358 // (6) Parse expansions
359 for (auto& token : tokens) {
360 if (!is_one_of(token.type, Token::Type::Word, Token::Type::AssignmentWord)) {
361 expanded_tokens.append(move(token));
362 continue;
363 }
364
365 Vector<ResolvedExpansion> resolved_expansions;
366 for (auto& expansion : token.expansions) {
367 auto resolved = expansion.visit(
368 [&](ParameterExpansion const& expansion) -> ResolvedExpansion {
369 auto text = expansion.parameter.string_view();
370 // ${NUMBER}
371 if (all_of(text, is_ascii_digit)) {
372 return ResolvedParameterExpansion {
373 .parameter = expansion.parameter.to_string().release_value_but_fixme_should_propagate_errors(),
374 .argument = {},
375 .range = expansion.range,
376 .op = ResolvedParameterExpansion::Op::GetPositionalParameter,
377 };
378 }
379
380 if (text.length() == 1) {
381 ResolvedParameterExpansion::Op op;
382 switch (text[0]) {
383 case '!':
384 op = ResolvedParameterExpansion::Op::GetLastBackgroundPid;
385 break;
386 case '@':
387 op = ResolvedParameterExpansion::Op::GetPositionalParameterList;
388 break;
389 case '-':
390 op = ResolvedParameterExpansion::Op::GetCurrentOptionFlags;
391 break;
392 case '#':
393 op = ResolvedParameterExpansion::Op::GetPositionalParameterCount;
394 break;
395 case '?':
396 op = ResolvedParameterExpansion::Op::GetLastExitStatus;
397 break;
398 case '*':
399 op = ResolvedParameterExpansion::Op::GetPositionalParameterListAsString;
400 break;
401 case '$':
402 op = ResolvedParameterExpansion::Op::GetShellProcessId;
403 break;
404 default:
405 if (is_valid_name(text)) {
406 op = ResolvedParameterExpansion::Op::GetVariable;
407 } else {
408 error(token, "Unknown parameter expansion: {}", text);
409 return ResolvedParameterExpansion {
410 .parameter = expansion.parameter.to_string().release_value_but_fixme_should_propagate_errors(),
411 .argument = {},
412 .range = expansion.range,
413 .op = ResolvedParameterExpansion::Op::StringLength,
414 };
415 }
416 }
417
418 return ResolvedParameterExpansion {
419 .parameter = {},
420 .argument = {},
421 .range = expansion.range,
422 .op = op,
423 };
424 }
425
426 if (text.starts_with('#')) {
427 return ResolvedParameterExpansion {
428 .parameter = String::from_utf8(text.substring_view(1)).release_value_but_fixme_should_propagate_errors(),
429 .argument = {},
430 .range = expansion.range,
431 .op = ResolvedParameterExpansion::Op::StringLength,
432 };
433 }
434
435 GenericLexer lexer { text };
436 auto parameter = lexer.consume_while([first = true](char c) mutable {
437 if (first) {
438 first = false;
439 return is_ascii_alpha(c) || c == '_';
440 }
441 return is_ascii_alphanumeric(c) || c == '_';
442 });
443
444 StringView argument;
445 ResolvedParameterExpansion::Op op;
446 switch (lexer.peek()) {
447 case ':':
448 lexer.ignore();
449 switch (lexer.is_eof() ? 0 : lexer.consume()) {
450 case '-':
451 argument = lexer.consume_all();
452 op = ResolvedParameterExpansion::Op::UseDefaultValue;
453 break;
454 case '=':
455 argument = lexer.consume_all();
456 op = ResolvedParameterExpansion::Op::AssignDefaultValue;
457 break;
458 case '?':
459 argument = lexer.consume_all();
460 op = ResolvedParameterExpansion::Op::IndicateErrorIfEmpty;
461 break;
462 case '+':
463 argument = lexer.consume_all();
464 op = ResolvedParameterExpansion::Op::UseAlternativeValue;
465 break;
466 default:
467 error(token, "Unknown parameter expansion: {}", text);
468 return ResolvedParameterExpansion {
469 .parameter = String::from_utf8(parameter).release_value_but_fixme_should_propagate_errors(),
470 .argument = {},
471 .range = expansion.range,
472 .op = ResolvedParameterExpansion::Op::StringLength,
473 };
474 }
475 break;
476 case '-':
477 lexer.ignore();
478 argument = lexer.consume_all();
479 op = ResolvedParameterExpansion::Op::UseDefaultValueIfUnset;
480 break;
481 case '=':
482 lexer.ignore();
483 argument = lexer.consume_all();
484 op = ResolvedParameterExpansion::Op::AssignDefaultValueIfUnset;
485 break;
486 case '?':
487 lexer.ignore();
488 argument = lexer.consume_all();
489 op = ResolvedParameterExpansion::Op::IndicateErrorIfUnset;
490 break;
491 case '+':
492 lexer.ignore();
493 argument = lexer.consume_all();
494 op = ResolvedParameterExpansion::Op::UseAlternativeValueIfUnset;
495 break;
496 case '%':
497 if (lexer.consume_specific('%'))
498 op = ResolvedParameterExpansion::Op::RemoveLargestSuffixByPattern;
499 else
500 op = ResolvedParameterExpansion::Op::RemoveSmallestSuffixByPattern;
501 argument = lexer.consume_all();
502 break;
503 case '#':
504 if (lexer.consume_specific('#'))
505 op = ResolvedParameterExpansion::Op::RemoveLargestPrefixByPattern;
506 else
507 op = ResolvedParameterExpansion::Op::RemoveSmallestPrefixByPattern;
508 argument = lexer.consume_all();
509 break;
510 default:
511 if (is_valid_name(text)) {
512 op = ResolvedParameterExpansion::Op::GetVariable;
513 } else {
514 error(token, "Unknown parameter expansion: {}", text);
515 return ResolvedParameterExpansion {
516 .parameter = String::from_utf8(parameter).release_value_but_fixme_should_propagate_errors(),
517 .argument = {},
518 .range = expansion.range,
519 .op = ResolvedParameterExpansion::Op::StringLength,
520 };
521 }
522 }
523 VERIFY(lexer.is_eof());
524
525 return ResolvedParameterExpansion {
526 .parameter = String::from_utf8(parameter).release_value_but_fixme_should_propagate_errors(),
527 .argument = String::from_utf8(argument).release_value_but_fixme_should_propagate_errors(),
528 .range = expansion.range,
529 .op = op,
530 .expand = ResolvedParameterExpansion::Expand::Word,
531 };
532 },
533 [&](ArithmeticExpansion const& expansion) -> ResolvedExpansion {
534 error(token, "Arithmetic expansion is not supported");
535 return ResolvedParameterExpansion {
536 .parameter = {},
537 .argument = {},
538 .range = expansion.range,
539 .op = ResolvedParameterExpansion::Op::StringLength,
540 .expand = ResolvedParameterExpansion::Expand::Nothing,
541 };
542 },
543 [&](CommandExpansion const& expansion) -> ResolvedExpansion {
544 Parser parser { expansion.command.string_view() };
545 auto node = parser.parse();
546 m_errors.extend(move(parser.m_errors));
547 return ResolvedCommandExpansion {
548 move(node),
549 expansion.range,
550 };
551 });
552
553 resolved_expansions.append(move(resolved));
554 }
555
556 token.resolved_expansions = move(resolved_expansions);
557 expanded_tokens.append(move(token));
558 }
559
560 swap_expansions();
561
562 // (7) Loop variables
563 previous_token = {};
564 tokens_taken_from_buffer = 0;
565 if (m_token_buffer.size() >= 1) {
566 previous_token = m_token_buffer.take_last();
567 tokens_taken_from_buffer++;
568 }
569
570 for (auto& token : tokens) {
571 if (!previous_token.has_value()) {
572 previous_token = token;
573 continue;
574 }
575
576 if (previous_token->type == Token::Type::For && token.type == Token::Type::Word && is_valid_name(token.value)) {
577 token.type = Token::Type::VariableName;
578 }
579
580 expanded_tokens.append(exchange(*previous_token, token));
581 }
582
583 swap_expansions();
584
585 // (8) Function names
586 previous_token = {};
587 previous_previous_token = {};
588 tokens_taken_from_buffer = 0;
589 if (m_token_buffer.size() >= 1) {
590 previous_token = m_token_buffer.take_last();
591 tokens_taken_from_buffer++;
592 }
593 if (m_token_buffer.size() >= 1) {
594 previous_previous_token = m_token_buffer.take_last();
595 tokens_taken_from_buffer++;
596 }
597
598 for (auto& token : tokens) {
599 if (!previous_token.has_value()) {
600 previous_token = token;
601 continue;
602 }
603 if (!previous_previous_token.has_value()) {
604 previous_previous_token = move(previous_token);
605 previous_token = token;
606 continue;
607 }
608
609 // NAME ( )
610 if (previous_previous_token->could_be_start_of_a_simple_command
611 && previous_previous_token->type == Token::Type::Word
612 && previous_token->type == Token::Type::OpenParen
613 && token.type == Token::Type::CloseParen) {
614
615 previous_previous_token->type = Token::Type::VariableName;
616 }
617
618 expanded_tokens.append(exchange(*previous_previous_token, exchange(*previous_token, token)));
619 }
620
621 swap_expansions();
622
623 return tokens;
624}
625
626ErrorOr<RefPtr<AST::Node>> Parser::parse_complete_command()
627{
628 auto list = TRY([&]() -> ErrorOr<RefPtr<AST::Node>> {
629 // separator...
630 while (is_separator(peek()))
631 skip();
632
633 // list EOF
634 auto list = TRY(parse_list());
635 if (eof())
636 return list;
637
638 // list separator EOF
639 while (is_separator(peek()))
640 skip();
641
642 if (eof())
643 return list;
644
645 auto position = peek().position;
646 auto syntax_error = make_ref_counted<AST::SyntaxError>(
647 position.value_or(empty_position()),
648 "Extra tokens after complete command"_string.release_value_but_fixme_should_propagate_errors());
649
650 if (list)
651 list->set_is_syntax_error(*syntax_error);
652 else
653 list = syntax_error;
654
655 return list;
656 }());
657
658 if (!list)
659 return nullptr;
660
661 return make_ref_counted<AST::Execute>(list->position(), *list);
662}
663
664ErrorOr<RefPtr<AST::Node>> Parser::parse_list()
665{
666 Vector<NonnullRefPtr<AST::Node>> nodes;
667 Vector<AST::Position> positions;
668
669 auto start_position = peek().position.value_or(empty_position());
670
671 for (;;) {
672 auto new_node = TRY(parse_and_or());
673 if (!new_node)
674 break;
675
676 if (peek().type == Token::Type::And) {
677 new_node = make_ref_counted<AST::Background>(
678 new_node->position(),
679 *new_node);
680 }
681
682 nodes.append(new_node.release_nonnull());
683
684 if (!is_separator(peek()) || eof())
685 break;
686
687 auto position = consume().position;
688 if (position.has_value())
689 positions.append(position.release_value());
690 }
691
692 auto end_position = peek().position.value_or(empty_position());
693
694 return make_ref_counted<AST::Sequence>(
695 AST::Position {
696 start_position.start_offset,
697 end_position.end_offset,
698 start_position.start_line,
699 start_position.end_line,
700 },
701 move(nodes),
702 move(positions));
703}
704
705ErrorOr<RefPtr<AST::Node>> Parser::parse_and_or()
706{
707 auto node = TRY(parse_pipeline());
708 if (!node)
709 return RefPtr<AST::Node> {};
710
711 for (;;) {
712 if (peek().type == Token::Type::AndIf) {
713 auto and_token = consume();
714 while (peek().type == Token::Type::Newline)
715 skip();
716
717 auto rhs = TRY(parse_pipeline());
718 if (!rhs)
719 return RefPtr<AST::Node> {};
720 node = make_ref_counted<AST::And>(
721 node->position(),
722 *node,
723 rhs.release_nonnull(),
724 and_token.position.value_or(empty_position()));
725 continue;
726 }
727 if (peek().type == Token::Type::OrIf) {
728 auto or_token = consume();
729 while (peek().type == Token::Type::Newline)
730 skip();
731
732 auto rhs = TRY(parse_pipeline());
733 if (!rhs)
734 return RefPtr<AST::Node> {};
735 node = make_ref_counted<AST::And>(
736 node->position(),
737 *node,
738 rhs.release_nonnull(),
739 or_token.position.value_or(empty_position()));
740 continue;
741 }
742 break;
743 }
744
745 return node;
746}
747
748ErrorOr<RefPtr<AST::Node>> Parser::parse_pipeline()
749{
750 return parse_pipe_sequence();
751}
752
753ErrorOr<RefPtr<AST::Node>> Parser::parse_pipe_sequence()
754{
755 auto node = TRY(parse_command());
756 if (!node)
757 return RefPtr<AST::Node> {};
758
759 for (;;) {
760 if (peek().type != Token::Type::Pipe)
761 break;
762
763 consume();
764 while (peek().type == Token::Type::Newline)
765 skip();
766
767 auto rhs = TRY(parse_command());
768 if (!rhs)
769 return RefPtr<AST::Node> {};
770 node = make_ref_counted<AST::Pipe>(
771 node->position(),
772 *node,
773 rhs.release_nonnull());
774 }
775
776 return node;
777}
778
779ErrorOr<RefPtr<AST::Node>> Parser::parse_command()
780{
781 auto node = TRY([this]() -> ErrorOr<RefPtr<AST::Node>> {
782 if (auto node = TRY(parse_function_definition()))
783 return node;
784
785 if (auto node = TRY(parse_simple_command()))
786 return node;
787
788 auto node = TRY(parse_compound_command());
789 if (!node)
790 return node;
791
792 if (auto list = TRY(parse_redirect_list())) {
793 auto position = list->position();
794 node = make_ref_counted<AST::Join>(
795 node->position().with_end(position),
796 *node,
797 list.release_nonnull());
798 }
799
800 return node;
801 }());
802
803 if (!node)
804 return nullptr;
805
806 return make_ref_counted<AST::CastToCommand>(node->position(), *node);
807}
808
809ErrorOr<RefPtr<AST::Node>> Parser::parse_function_definition()
810{
811 // NAME OPEN_PAREN CLOSE_PAREN newline* function_body
812
813 auto start_index = m_token_index;
814 ArmedScopeGuard reset = [&] {
815 m_token_index = start_index;
816 };
817
818 if (peek().type != Token::Type::VariableName) {
819 return nullptr;
820 }
821
822 auto name = consume();
823
824 if (consume().type != Token::Type::OpenParen)
825 return nullptr;
826
827 if (consume().type != Token::Type::CloseParen)
828 return nullptr;
829
830 while (peek().type == Token::Type::Newline)
831 skip();
832
833 auto body = TRY(parse_function_body());
834
835 if (!body)
836 return nullptr;
837
838 reset.disarm();
839
840 return make_ref_counted<AST::FunctionDeclaration>(
841 name.position.value_or(empty_position()).with_end(peek().position.value_or(empty_position())),
842 AST::NameWithPosition { String::from_utf8(name.value).release_value_but_fixme_should_propagate_errors(), name.position.value_or(empty_position()) },
843 Vector<AST::NameWithPosition> {},
844 body.release_nonnull());
845}
846
847ErrorOr<RefPtr<AST::Node>> Parser::parse_function_body()
848{
849 // compound_command redirect_list?
850
851 auto node = TRY(parse_compound_command());
852 if (!node)
853 return nullptr;
854
855 if (auto list = TRY(parse_redirect_list())) {
856 auto position = list->position();
857 node = make_ref_counted<AST::Join>(
858 node->position().with_end(position),
859 *node,
860 list.release_nonnull());
861 }
862
863 return node;
864}
865
866ErrorOr<RefPtr<AST::Node>> Parser::parse_redirect_list()
867{
868 // io_redirect*
869
870 RefPtr<AST::Node> node;
871
872 for (;;) {
873 auto new_node = TRY(parse_io_redirect());
874 if (!new_node)
875 break;
876
877 if (node) {
878 node = make_ref_counted<AST::Join>(
879 node->position().with_end(new_node->position()),
880 *node,
881 new_node.release_nonnull());
882 } else {
883 node = new_node;
884 }
885 }
886
887 return node;
888}
889
890ErrorOr<RefPtr<AST::Node>> Parser::parse_compound_command()
891{
892 if (auto node = TRY(parse_brace_group()))
893 return node;
894
895 if (auto node = TRY(parse_subshell()))
896 return node;
897
898 if (auto node = TRY(parse_if_clause()))
899 return node;
900
901 if (auto node = TRY(parse_for_clause()))
902 return node;
903
904 if (auto node = TRY(parse_case_clause()))
905 return node;
906
907 if (auto node = TRY(parse_while_clause()))
908 return node;
909
910 if (auto node = TRY(parse_until_clause()))
911 return node;
912
913 return nullptr;
914}
915
916ErrorOr<RefPtr<AST::Node>> Parser::parse_while_clause()
917{
918 if (peek().type != Token::Type::While)
919 return nullptr;
920
921 auto start_position = consume().position.value_or(empty_position());
922 auto condition = TRY(parse_compound_list());
923 if (!condition)
924 condition = make_ref_counted<AST::SyntaxError>(
925 peek().position.value_or(empty_position()),
926 "Expected condition after 'while'"_string.release_value_but_fixme_should_propagate_errors());
927
928 auto do_group = TRY(parse_do_group());
929 if (!do_group)
930 do_group = make_ref_counted<AST::SyntaxError>(
931 peek().position.value_or(empty_position()),
932 "Expected 'do' after 'while'"_string.release_value_but_fixme_should_propagate_errors());
933
934 // while foo; bar -> loop { if foo { bar } else { break } }
935 return make_ref_counted<AST::ForLoop>(
936 start_position.with_end(peek().position.value_or(empty_position())),
937 Optional<AST::NameWithPosition> {},
938 Optional<AST::NameWithPosition> {},
939 nullptr,
940 make_ref_counted<AST::IfCond>(
941 start_position.with_end(peek().position.value_or(empty_position())),
942 Optional<AST::Position> {},
943 condition.release_nonnull(),
944 do_group.release_nonnull(),
945 make_ref_counted<AST::ContinuationControl>(
946 start_position,
947 AST::ContinuationControl::ContinuationKind::Break)));
948}
949
950ErrorOr<RefPtr<AST::Node>> Parser::parse_until_clause()
951{
952 if (peek().type != Token::Type::Until)
953 return nullptr;
954
955 auto start_position = consume().position.value_or(empty_position());
956 auto condition = TRY(parse_compound_list());
957 if (!condition)
958 condition = make_ref_counted<AST::SyntaxError>(
959 peek().position.value_or(empty_position()),
960 "Expected condition after 'until'"_string.release_value_but_fixme_should_propagate_errors());
961
962 auto do_group = TRY(parse_do_group());
963 if (!do_group)
964 do_group = make_ref_counted<AST::SyntaxError>(
965 peek().position.value_or(empty_position()),
966 "Expected 'do' after 'until'"_string.release_value_but_fixme_should_propagate_errors());
967
968 // until foo; bar -> loop { if foo { break } else { bar } }
969 return make_ref_counted<AST::ForLoop>(
970 start_position.with_end(peek().position.value_or(empty_position())),
971 Optional<AST::NameWithPosition> {},
972 Optional<AST::NameWithPosition> {},
973 nullptr,
974 make_ref_counted<AST::IfCond>(
975 start_position.with_end(peek().position.value_or(empty_position())),
976 Optional<AST::Position> {},
977 condition.release_nonnull(),
978 make_ref_counted<AST::ContinuationControl>(
979 start_position,
980 AST::ContinuationControl::ContinuationKind::Break),
981 do_group.release_nonnull()));
982}
983
984ErrorOr<RefPtr<AST::Node>> Parser::parse_brace_group()
985{
986 if (peek().type != Token::Type::OpenBrace)
987 return nullptr;
988
989 consume();
990
991 auto list = TRY(parse_compound_list());
992
993 RefPtr<AST::SyntaxError> error;
994 if (peek().type != Token::Type::CloseBrace) {
995 error = make_ref_counted<AST::SyntaxError>(
996 peek().position.value_or(empty_position()),
997 String::formatted("Expected '}}', not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
998 } else {
999 consume();
1000 }
1001
1002 if (error) {
1003 if (list)
1004 list->set_is_syntax_error(*error);
1005 else
1006 list = error;
1007 }
1008
1009 return make_ref_counted<AST::Execute>(list->position(), *list);
1010}
1011
1012ErrorOr<RefPtr<AST::Node>> Parser::parse_case_clause()
1013{
1014 auto start_position = peek().position.value_or(empty_position());
1015 if (peek().type != Token::Type::Case)
1016 return nullptr;
1017
1018 skip();
1019
1020 RefPtr<AST::SyntaxError> syntax_error;
1021 auto expr = TRY(parse_word());
1022 if (!expr)
1023 expr = make_ref_counted<AST::SyntaxError>(
1024 peek().position.value_or(empty_position()),
1025 String::formatted("Expected a word, not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1026
1027 if (peek().type != Token::Type::In) {
1028 syntax_error = make_ref_counted<AST::SyntaxError>(
1029 peek().position.value_or(empty_position()),
1030 String::formatted("Expected 'in', not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1031 } else {
1032 skip();
1033 }
1034
1035 while (peek().type == Token::Type::Newline)
1036 skip();
1037
1038 Vector<AST::MatchEntry> entries;
1039
1040 for (;;) {
1041 if (eof() || peek().type == Token::Type::Esac)
1042 break;
1043
1044 if (peek().type == Token::Type::Newline) {
1045 skip();
1046 continue;
1047 }
1048
1049 // Parse a pattern list
1050 auto needs_dsemi = true;
1051 if (peek().type == Token::Type::OpenParen) {
1052 skip();
1053 needs_dsemi = false;
1054 }
1055
1056 auto result = TRY(parse_case_list());
1057
1058 if (peek().type == Token::Type::CloseParen) {
1059 skip();
1060 } else {
1061 if (!syntax_error)
1062 syntax_error = make_ref_counted<AST::SyntaxError>(
1063 peek().position.value_or(empty_position()),
1064 String::formatted("Expected ')', not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1065 break;
1066 }
1067
1068 while (peek().type == Token::Type::Newline)
1069 skip();
1070
1071 auto compound_list = TRY(parse_compound_list());
1072
1073 if (peek().type == Token::Type::DoubleSemicolon) {
1074 skip();
1075 } else if (needs_dsemi) {
1076 if (!syntax_error)
1077 syntax_error = make_ref_counted<AST::SyntaxError>(
1078 peek().position.value_or(empty_position()),
1079 String::formatted("Expected ';;', not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1080 }
1081
1082 if (syntax_error) {
1083 if (compound_list)
1084 compound_list->set_is_syntax_error(*syntax_error);
1085 else
1086 compound_list = syntax_error;
1087 syntax_error = nullptr;
1088 }
1089
1090 entries.append(AST::MatchEntry {
1091 .options = move(result.nodes),
1092 .match_names = {},
1093 .match_as_position = {},
1094 .pipe_positions = move(result.pipe_positions),
1095 .body = move(compound_list),
1096 });
1097 }
1098
1099 if (peek().type != Token::Type::Esac) {
1100 syntax_error = make_ref_counted<AST::SyntaxError>(
1101 peek().position.value_or(empty_position()),
1102 String::formatted("Expected 'esac', not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1103 } else {
1104 skip();
1105 }
1106
1107 auto node = make_ref_counted<AST::MatchExpr>(
1108 start_position.with_end(peek().position.value_or(empty_position())),
1109 expr.release_nonnull(),
1110 String {},
1111 Optional<AST::Position> {},
1112 move(entries));
1113
1114 if (syntax_error)
1115 node->set_is_syntax_error(*syntax_error);
1116
1117 return node;
1118}
1119
1120ErrorOr<Parser::CaseItemsResult> Parser::parse_case_list()
1121{
1122 // Just a list of words split by '|', delimited by ')'
1123 Vector<NonnullRefPtr<AST::Node>> nodes;
1124 Vector<AST::Position> pipes;
1125
1126 for (;;) {
1127 if (eof() || peek().type == Token::Type::CloseParen)
1128 break;
1129
1130 if (peek().type != Token::Type::Word)
1131 break;
1132
1133 auto node = TRY(parse_word());
1134 if (!node)
1135 node = make_ref_counted<AST::SyntaxError>(
1136 peek().position.value_or(empty_position()),
1137 String::formatted("Expected a word, not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1138
1139 nodes.append(node.release_nonnull());
1140
1141 if (peek().type == Token::Type::Pipe) {
1142 pipes.append(peek().position.value_or(empty_position()));
1143 skip();
1144 } else {
1145 break;
1146 }
1147 }
1148
1149 if (nodes.is_empty())
1150 nodes.append(make_ref_counted<AST::SyntaxError>(
1151 peek().position.value_or(empty_position()),
1152 String::formatted("Expected a word, not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors()));
1153
1154 return CaseItemsResult { move(pipes), move(nodes) };
1155}
1156
1157ErrorOr<RefPtr<AST::Node>> Parser::parse_if_clause()
1158{
1159 // If compound_list Then compound_list {Elif compound_list Then compound_list (Fi|Else)?} [(?=Else) compound_list] (?!=Fi) Fi
1160 auto start_position = peek().position.value_or(empty_position());
1161 if (peek().type != Token::Type::If)
1162 return nullptr;
1163
1164 skip();
1165 auto main_condition = TRY(parse_compound_list());
1166 if (!main_condition)
1167 main_condition = make_ref_counted<AST::SyntaxError>(empty_position(), "Expected compound list after 'if'"_string.release_value_but_fixme_should_propagate_errors());
1168
1169 RefPtr<AST::SyntaxError> syntax_error;
1170 if (peek().type != Token::Type::Then) {
1171 syntax_error = make_ref_counted<AST::SyntaxError>(
1172 peek().position.value_or(empty_position()),
1173 String::formatted("Expected 'then', not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1174 } else {
1175 skip();
1176 }
1177
1178 auto main_consequence = TRY(parse_compound_list());
1179 if (!main_consequence)
1180 main_consequence = make_ref_counted<AST::SyntaxError>(empty_position(), "Expected compound list after 'then'"_string.release_value_but_fixme_should_propagate_errors());
1181
1182 auto node = make_ref_counted<AST::IfCond>(start_position, Optional<AST::Position>(), main_condition.release_nonnull(), main_consequence.release_nonnull(), nullptr);
1183 auto active_node = node;
1184
1185 while (peek().type == Token::Type::Elif) {
1186 skip();
1187 auto condition = TRY(parse_compound_list());
1188 if (!condition)
1189 condition = make_ref_counted<AST::SyntaxError>(empty_position(), "Expected compound list after 'elif'"_string.release_value_but_fixme_should_propagate_errors());
1190
1191 if (peek().type != Token::Type::Then) {
1192 if (!syntax_error)
1193 syntax_error = make_ref_counted<AST::SyntaxError>(
1194 peek().position.value_or(empty_position()),
1195 String::formatted("Expected 'then', not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1196 } else {
1197 skip();
1198 }
1199
1200 auto consequence = TRY(parse_compound_list());
1201 if (!consequence)
1202 consequence = make_ref_counted<AST::SyntaxError>(empty_position(), "Expected compound list after 'then'"_string.release_value_but_fixme_should_propagate_errors());
1203
1204 auto new_node = make_ref_counted<AST::IfCond>(start_position, Optional<AST::Position>(), condition.release_nonnull(), consequence.release_nonnull(), nullptr);
1205
1206 active_node->false_branch() = new_node;
1207 active_node = move(new_node);
1208 }
1209
1210 auto needs_fi = true;
1211 switch (peek().type) {
1212 case Token::Type::Else:
1213 skip();
1214 active_node->false_branch() = TRY(parse_compound_list());
1215 if (!active_node->false_branch())
1216 active_node->false_branch() = make_ref_counted<AST::SyntaxError>(empty_position(), "Expected compound list after 'else'"_string.release_value_but_fixme_should_propagate_errors());
1217 break;
1218 case Token::Type::Fi:
1219 needs_fi = false;
1220 break;
1221 default:
1222 if (!syntax_error)
1223 syntax_error = make_ref_counted<AST::SyntaxError>(
1224 peek().position.value_or(empty_position()),
1225 String::formatted("Expected 'else' or 'fi', not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1226 break;
1227 }
1228
1229 if (needs_fi) {
1230 if (peek().type != Token::Type::Fi) {
1231 if (!syntax_error)
1232 syntax_error = make_ref_counted<AST::SyntaxError>(
1233 peek().position.value_or(empty_position()),
1234 String::formatted("Expected 'fi', not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1235 } else {
1236 skip();
1237 }
1238 }
1239
1240 if (syntax_error)
1241 node->set_is_syntax_error(*syntax_error);
1242
1243 return node;
1244}
1245
1246ErrorOr<RefPtr<AST::Node>> Parser::parse_subshell()
1247{
1248 auto start_position = peek().position.value_or(empty_position());
1249 if (peek().type != Token::Type::OpenParen)
1250 return nullptr;
1251
1252 skip();
1253 RefPtr<AST::SyntaxError> error;
1254
1255 auto list = TRY(parse_compound_list());
1256 if (!list)
1257 error = make_ref_counted<AST::SyntaxError>(peek().position.value_or(empty_position()), "Expected compound list after ("_string.release_value_but_fixme_should_propagate_errors());
1258
1259 if (peek().type != Token::Type::CloseParen)
1260 error = make_ref_counted<AST::SyntaxError>(peek().position.value_or(empty_position()), "Expected ) after compound list"_string.release_value_but_fixme_should_propagate_errors());
1261 else
1262 skip();
1263
1264 if (!list)
1265 return error;
1266
1267 return make_ref_counted<AST::Subshell>(
1268 start_position.with_end(peek().position.value_or(empty_position())),
1269 list.release_nonnull());
1270}
1271
1272ErrorOr<RefPtr<AST::Node>> Parser::parse_compound_list()
1273{
1274 while (peek().type == Token::Type::Newline)
1275 skip();
1276
1277 auto term = TRY(parse_term());
1278 if (!term)
1279 return term;
1280
1281 if (is_separator(peek())) {
1282 if (consume().type == Token::Type::And) {
1283 term = make_ref_counted<AST::Background>(
1284 term->position().with_end(peek().position.value_or(empty_position())),
1285 *term);
1286 }
1287 }
1288
1289 return term;
1290}
1291
1292ErrorOr<RefPtr<AST::Node>> Parser::parse_term()
1293{
1294 Vector<NonnullRefPtr<AST::Node>> nodes;
1295 Vector<AST::Position> positions;
1296
1297 auto start_position = peek().position.value_or(empty_position());
1298
1299 for (;;) {
1300 auto new_node = TRY(parse_and_or());
1301 if (!new_node)
1302 break;
1303
1304 nodes.append(new_node.release_nonnull());
1305
1306 if (!is_separator(peek()))
1307 break;
1308
1309 auto position = consume().position;
1310 if (position.has_value())
1311 positions.append(position.release_value());
1312 }
1313
1314 auto end_position = peek().position.value_or(empty_position());
1315
1316 return make_ref_counted<AST::Sequence>(
1317 start_position.with_end(end_position),
1318 move(nodes),
1319 move(positions));
1320}
1321
1322ErrorOr<RefPtr<AST::Node>> Parser::parse_for_clause()
1323{
1324 // FOR NAME newline+ do_group
1325 // FOR NAME newline+ IN separator do_group
1326 // FOR NAME IN separator do_group
1327 // FOR NAME IN wordlist separator do_group
1328
1329 if (peek().type != Token::Type::For)
1330 return nullptr;
1331
1332 auto start_position = consume().position.value_or(empty_position());
1333
1334 String name;
1335 Optional<AST::Position> name_position;
1336 if (peek().type == Token::Type::VariableName) {
1337 name_position = peek().position;
1338 name = consume().value;
1339 } else {
1340 name = "it"_short_string;
1341 error(peek(), "Expected a variable name, not {}", peek().type_name());
1342 }
1343
1344 auto saw_newline = false;
1345 while (peek().type == Token::Type::Newline) {
1346 saw_newline = true;
1347 skip();
1348 }
1349
1350 auto saw_in = false;
1351 Optional<AST::Position> in_kw_position;
1352 if (peek().type == Token::Type::In) {
1353 saw_in = true;
1354 in_kw_position = peek().position;
1355 skip();
1356 } else if (!saw_newline) {
1357 error(peek(), "Expected 'in' or a newline, not {}", peek().type_name());
1358 }
1359
1360 RefPtr<AST::Node> iterated_expression;
1361 if (!saw_newline)
1362 iterated_expression = parse_word_list();
1363
1364 if (saw_in) {
1365 if (peek().type == Token::Type::Semicolon)
1366 skip();
1367 else
1368 error(peek(), "Expected a semicolon, not {}", peek().type_name());
1369 }
1370
1371 auto body = TRY(parse_do_group());
1372 return AST::make_ref_counted<AST::ForLoop>(
1373 start_position.with_end(peek().position.value_or(empty_position())),
1374 AST::NameWithPosition { name, name_position.value_or(empty_position()) },
1375 Optional<AST::NameWithPosition> {},
1376 move(iterated_expression),
1377 move(body),
1378 move(in_kw_position),
1379 Optional<AST::Position> {});
1380}
1381
1382RefPtr<AST::Node> Parser::parse_word_list()
1383{
1384 Vector<NonnullRefPtr<AST::Node>> nodes;
1385
1386 auto start_position = peek().position.value_or(empty_position());
1387
1388 for (; peek().type == Token::Type::Word;) {
1389 auto word = parse_word().release_value_but_fixme_should_propagate_errors();
1390 nodes.append(word.release_nonnull());
1391 }
1392
1393 return make_ref_counted<AST::ListConcatenate>(
1394 start_position.with_end(peek().position.value_or(empty_position())),
1395 move(nodes));
1396}
1397
1398ErrorOr<RefPtr<AST::Node>> Parser::parse_word()
1399{
1400 if (peek().type != Token::Type::Word)
1401 return nullptr;
1402
1403 auto token = consume();
1404 RefPtr<AST::Node> word;
1405
1406 enum class Quote {
1407 None,
1408 Single,
1409 Double,
1410 } in_quote { Quote::None };
1411
1412 auto append_bareword = [&](StringView string) -> ErrorOr<void> {
1413 if (!word && string.starts_with('~')) {
1414 GenericLexer lexer { string };
1415 lexer.ignore();
1416 auto user = lexer.consume_while(is_ascii_alphanumeric);
1417 string = lexer.remaining();
1418
1419 word = make_ref_counted<AST::Tilde>(token.position.value_or(empty_position()), TRY(String::from_utf8(user)));
1420 }
1421
1422 if (string.is_empty())
1423 return {};
1424
1425 auto node = make_ref_counted<AST::BarewordLiteral>(
1426 token.position.value_or(empty_position()),
1427 TRY(String::from_utf8(string)));
1428
1429 if (word) {
1430 word = make_ref_counted<AST::Juxtaposition>(
1431 word->position().with_end(token.position.value_or(empty_position())),
1432 *word,
1433 move(node),
1434 AST::Juxtaposition::Mode::StringExpand);
1435 } else {
1436 word = move(node);
1437 }
1438
1439 return {};
1440 };
1441
1442 auto append_string_literal = [&](StringView string) -> ErrorOr<void> {
1443 if (string.is_empty())
1444 return {};
1445
1446 auto node = make_ref_counted<AST::StringLiteral>(
1447 token.position.value_or(empty_position()),
1448 TRY(String::from_utf8(string)),
1449 AST::StringLiteral::EnclosureType::SingleQuotes);
1450
1451 if (word) {
1452 word = make_ref_counted<AST::Juxtaposition>(
1453 word->position().with_end(token.position.value_or(empty_position())),
1454 *word,
1455 move(node),
1456 AST::Juxtaposition::Mode::StringExpand);
1457 } else {
1458 word = move(node);
1459 }
1460
1461 return {};
1462 };
1463
1464 auto append_string_part = [&](StringView string) -> ErrorOr<void> {
1465 if (string.is_empty())
1466 return {};
1467
1468 auto node = make_ref_counted<AST::StringLiteral>(
1469 token.position.value_or(empty_position()),
1470 TRY(String::from_utf8(string)),
1471 AST::StringLiteral::EnclosureType::DoubleQuotes);
1472
1473 if (word) {
1474 word = make_ref_counted<AST::Juxtaposition>(
1475 word->position().with_end(token.position.value_or(empty_position())),
1476 *word,
1477 move(node),
1478 AST::Juxtaposition::Mode::StringExpand);
1479 } else {
1480 word = move(node);
1481 }
1482
1483 return {};
1484 };
1485
1486 auto append_parameter_expansion = [&](ResolvedParameterExpansion const& x) -> ErrorOr<void> {
1487 StringView immediate_function_name;
1488 RefPtr<AST::Node> node;
1489 switch (x.op) {
1490 case ResolvedParameterExpansion::Op::UseDefaultValue:
1491 immediate_function_name = "value_or_default"sv;
1492 break;
1493 case ResolvedParameterExpansion::Op::AssignDefaultValue:
1494 immediate_function_name = "assign_default"sv;
1495 break;
1496 case ResolvedParameterExpansion::Op::IndicateErrorIfEmpty:
1497 immediate_function_name = "error_if_empty"sv;
1498 break;
1499 case ResolvedParameterExpansion::Op::UseAlternativeValue:
1500 immediate_function_name = "null_or_alternative"sv;
1501 break;
1502 case ResolvedParameterExpansion::Op::UseDefaultValueIfUnset:
1503 immediate_function_name = "defined_value_or_default"sv;
1504 break;
1505 case ResolvedParameterExpansion::Op::AssignDefaultValueIfUnset:
1506 immediate_function_name = "assign_defined_default"sv;
1507 break;
1508 case ResolvedParameterExpansion::Op::IndicateErrorIfUnset:
1509 immediate_function_name = "error_if_unset"sv;
1510 break;
1511 case ResolvedParameterExpansion::Op::UseAlternativeValueIfUnset:
1512 immediate_function_name = "null_if_unset_or_alternative"sv;
1513 break;
1514 case ResolvedParameterExpansion::Op::RemoveLargestSuffixByPattern:
1515 // FIXME: Implement this
1516 case ResolvedParameterExpansion::Op::RemoveSmallestSuffixByPattern:
1517 immediate_function_name = "remove_suffix"sv;
1518 break;
1519 case ResolvedParameterExpansion::Op::RemoveLargestPrefixByPattern:
1520 // FIXME: Implement this
1521 case ResolvedParameterExpansion::Op::RemoveSmallestPrefixByPattern:
1522 immediate_function_name = "remove_prefix"sv;
1523 break;
1524 case ResolvedParameterExpansion::Op::StringLength:
1525 immediate_function_name = "length_of_variable"sv;
1526 break;
1527 case ResolvedParameterExpansion::Op::GetPositionalParameter:
1528 case ResolvedParameterExpansion::Op::GetVariable:
1529 node = make_ref_counted<AST::SimpleVariable>(
1530 token.position.value_or(empty_position()),
1531 x.parameter);
1532 break;
1533 case ResolvedParameterExpansion::Op::GetLastBackgroundPid:
1534 node = make_ref_counted<AST::SyntaxError>(
1535 token.position.value_or(empty_position()),
1536 TRY("$! not implemented"_string));
1537 break;
1538 case ResolvedParameterExpansion::Op::GetPositionalParameterList:
1539 node = make_ref_counted<AST::SpecialVariable>(
1540 token.position.value_or(empty_position()),
1541 '*');
1542 break;
1543 case ResolvedParameterExpansion::Op::GetCurrentOptionFlags:
1544 node = make_ref_counted<AST::SyntaxError>(
1545 token.position.value_or(empty_position()),
1546 TRY("The current option flags are not available in parameter expansions"_string));
1547 break;
1548 case ResolvedParameterExpansion::Op::GetPositionalParameterCount:
1549 node = make_ref_counted<AST::SpecialVariable>(
1550 token.position.value_or(empty_position()),
1551 '#');
1552 break;
1553 case ResolvedParameterExpansion::Op::GetLastExitStatus:
1554 node = make_ref_counted<AST::SpecialVariable>(
1555 token.position.value_or(empty_position()),
1556 '?');
1557 break;
1558 case ResolvedParameterExpansion::Op::GetPositionalParameterListAsString:
1559 node = make_ref_counted<AST::SyntaxError>(
1560 token.position.value_or(empty_position()),
1561 TRY("$* not implemented"_string));
1562 break;
1563 case ResolvedParameterExpansion::Op::GetShellProcessId:
1564 node = make_ref_counted<AST::SpecialVariable>(
1565 token.position.value_or(empty_position()),
1566 '$');
1567 break;
1568 }
1569
1570 if (!node) {
1571 Vector<NonnullRefPtr<AST::Node>> arguments;
1572 arguments.append(make_ref_counted<AST::BarewordLiteral>(
1573 token.position.value_or(empty_position()),
1574 x.parameter));
1575
1576 if (!x.argument.is_empty()) {
1577 // dbgln("Will parse {}", x.argument);
1578 arguments.append(*TRY(Parser { x.argument }.parse_word()));
1579 }
1580
1581 node = make_ref_counted<AST::ImmediateExpression>(
1582 token.position.value_or(empty_position()),
1583 AST::NameWithPosition {
1584 TRY(String::from_utf8(immediate_function_name)),
1585 token.position.value_or(empty_position()),
1586 },
1587 move(arguments),
1588 Optional<AST::Position> {});
1589 }
1590
1591 if (x.expand == ResolvedParameterExpansion::Expand::Word) {
1592 node = make_ref_counted<AST::ImmediateExpression>(
1593 token.position.value_or(empty_position()),
1594 AST::NameWithPosition {
1595 TRY("reexpand"_string),
1596 token.position.value_or(empty_position()),
1597 },
1598 Vector { node.release_nonnull() },
1599 Optional<AST::Position> {});
1600 }
1601
1602 if (word) {
1603 word = make_ref_counted<AST::Juxtaposition>(
1604 word->position().with_end(token.position.value_or(empty_position())),
1605 *word,
1606 node.release_nonnull(),
1607 AST::Juxtaposition::Mode::StringExpand);
1608 } else {
1609 word = move(node);
1610 }
1611
1612 return {};
1613 };
1614
1615 auto append_command_expansion = [&](ResolvedCommandExpansion const& x) -> ErrorOr<void> {
1616 if (!x.command)
1617 return {};
1618
1619 RefPtr<AST::Execute> execute_node;
1620
1621 if (x.command->is_execute()) {
1622 execute_node = const_cast<AST::Execute&>(static_cast<AST::Execute const&>(*x.command));
1623 execute_node->capture_stdout();
1624 } else {
1625 execute_node = make_ref_counted<AST::Execute>(
1626 word ? word->position() : empty_position(),
1627 *x.command,
1628 true);
1629 }
1630
1631 if (word) {
1632 word = make_ref_counted<AST::Juxtaposition>(
1633 word->position(),
1634 *word,
1635 execute_node.release_nonnull(),
1636 AST::Juxtaposition::Mode::StringExpand);
1637 } else {
1638 word = move(execute_node);
1639 }
1640
1641 return {};
1642 };
1643
1644 auto append_string = [&](StringView string) -> ErrorOr<void> {
1645 if (string.is_empty())
1646 return {};
1647
1648 Optional<size_t> run_start;
1649 auto escape = false;
1650 for (size_t i = 0; i < string.length(); ++i) {
1651 auto ch = string[i];
1652 switch (ch) {
1653 case '\\':
1654 if (!escape && i + 1 < string.length()) {
1655 if (is_one_of(string[i + 1], '"', '\'', '$', '`', '\\')) {
1656 escape = in_quote != Quote::Single;
1657 continue;
1658 }
1659 }
1660 break;
1661 case '\'':
1662 if (in_quote == Quote::Single) {
1663 in_quote = Quote::None;
1664 TRY(append_string_literal(string.substring_view(*run_start, i - *run_start)));
1665 run_start = i + 1;
1666 continue;
1667 }
1668 if (in_quote == Quote::Double) {
1669 escape = false;
1670 continue;
1671 }
1672 [[fallthrough]];
1673 case '"':
1674 if (ch == '\'' && in_quote == Quote::Single) {
1675 escape = false;
1676 continue;
1677 }
1678 if (!escape) {
1679 if (ch == '"' && in_quote == Quote::Double) {
1680 in_quote = Quote::None;
1681 if (run_start.has_value())
1682 TRY(append_string_part(string.substring_view(*run_start, i - *run_start)));
1683 run_start = i + 1;
1684 continue;
1685 }
1686 if (run_start.has_value())
1687 TRY(append_bareword(string.substring_view(*run_start, i - *run_start)));
1688 in_quote = ch == '\'' ? Quote::Single : Quote::Double;
1689 run_start = i + 1;
1690 }
1691 escape = false;
1692 [[fallthrough]];
1693 default:
1694 if (!run_start.has_value())
1695 run_start = i;
1696 escape = false;
1697 continue;
1698 }
1699 }
1700
1701 if (run_start.has_value())
1702 TRY(append_bareword(string.substring_view(*run_start, string.length() - *run_start)));
1703
1704 return {};
1705 };
1706
1707 if (!token.resolved_expansions.is_empty())
1708 dbgln_if(SHELL_POSIX_PARSER_DEBUG, "Expanding '{}' with {} expansion entries", token.value, token.resolved_expansions.size());
1709
1710 size_t current_offset = 0;
1711 auto value_bytes = token.value.bytes_as_string_view();
1712 for (auto& expansion : token.resolved_expansions) {
1713 TRY(expansion.visit(
1714 [&](ResolvedParameterExpansion const& x) -> ErrorOr<void> {
1715 dbgln_if(SHELL_POSIX_PARSER_DEBUG, " Expanding '{}' ({}+{})", x.to_deprecated_string(), x.range.start, x.range.length);
1716 if (x.range.start >= value_bytes.length()) {
1717 dbgln("Parameter expansion range {}-{} is out of bounds for '{}'", x.range.start, x.range.length, value_bytes);
1718 return {};
1719 }
1720
1721 if (x.range.start != current_offset) {
1722 TRY(append_string(value_bytes.substring_view(current_offset, x.range.start - current_offset)));
1723 current_offset = x.range.start;
1724 }
1725 current_offset += x.range.length;
1726 return append_parameter_expansion(x);
1727 },
1728 [&](ResolvedCommandExpansion const& x) -> ErrorOr<void> {
1729 if (x.range.start >= value_bytes.length()) {
1730 dbgln("Parameter expansion range {}-{} is out of bounds for '{}'", x.range.start, x.range.length, value_bytes);
1731 return {};
1732 }
1733
1734 if (x.range.start != current_offset) {
1735 TRY(append_string(value_bytes.substring_view(current_offset, x.range.start - current_offset)));
1736 current_offset = x.range.start;
1737 }
1738 current_offset += x.range.length;
1739 return append_command_expansion(x);
1740 }));
1741 }
1742
1743 if (current_offset > value_bytes.length()) {
1744 dbgln("Parameter expansion range {}- is out of bounds for '{}'", current_offset, value_bytes);
1745 return word;
1746 }
1747
1748 if (current_offset != value_bytes.length())
1749 TRY(append_string(value_bytes.substring_view(current_offset)));
1750
1751 return word;
1752}
1753
1754ErrorOr<RefPtr<AST::Node>> Parser::parse_do_group()
1755{
1756 if (peek().type != Token::Type::Do) {
1757 return make_ref_counted<AST::SyntaxError>(
1758 peek().position.value_or(empty_position()),
1759 String::formatted("Expected 'do', not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1760 }
1761
1762 consume();
1763
1764 auto list = TRY(parse_compound_list());
1765
1766 RefPtr<AST::SyntaxError> error;
1767 if (peek().type != Token::Type::Done) {
1768 error = make_ref_counted<AST::SyntaxError>(
1769 peek().position.value_or(empty_position()),
1770 String::formatted("Expected 'done', not {}", peek().type_name()).release_value_but_fixme_should_propagate_errors());
1771 } else {
1772 consume();
1773 }
1774
1775 if (error) {
1776 if (list)
1777 list->set_is_syntax_error(*error);
1778 else
1779 list = error;
1780 }
1781
1782 return make_ref_counted<AST::Execute>(list->position(), *list);
1783}
1784
1785ErrorOr<RefPtr<AST::Node>> Parser::parse_simple_command()
1786{
1787 auto start_position = peek().position.value_or(empty_position());
1788
1789 Vector<String> definitions;
1790 Vector<NonnullRefPtr<AST::Node>> nodes;
1791
1792 for (;;) {
1793 if (auto io_redirect = TRY(parse_io_redirect()))
1794 nodes.append(*io_redirect);
1795 else
1796 break;
1797 }
1798
1799 while (peek().type == Token::Type::AssignmentWord) {
1800 definitions.append(peek().value);
1801
1802 if (!nodes.is_empty()) {
1803 nodes.append(
1804 make_ref_counted<AST::BarewordLiteral>(
1805 peek().position.value_or(empty_position()),
1806 consume().value));
1807 } else {
1808 // env (assignments) (command)
1809 nodes.append(make_ref_counted<AST::BarewordLiteral>(
1810 empty_position(),
1811 "env"_short_string));
1812
1813 nodes.append(
1814 make_ref_counted<AST::BarewordLiteral>(
1815 peek().position.value_or(empty_position()),
1816 consume().value));
1817 }
1818 }
1819
1820 // WORD or io_redirect: IO_NUMBER or io_file
1821 if (!is_one_of(peek().type,
1822 Token::Type::Word, Token::Type::IoNumber,
1823 Token::Type::Less, Token::Type::LessAnd, Token::Type::Great, Token::Type::GreatAnd,
1824 Token::Type::DoubleGreat, Token::Type::LessGreat, Token::Type::Clobber)) {
1825 if (!nodes.is_empty()) {
1826 Vector<AST::VariableDeclarations::Variable> variables;
1827 for (auto& definition : definitions) {
1828 auto equal_offset = definition.find_byte_offset('=');
1829 auto split_offset = equal_offset.value_or(definition.bytes().size());
1830 auto name = make_ref_counted<AST::BarewordLiteral>(
1831 empty_position(),
1832 definition.substring_from_byte_offset_with_shared_superstring(0, split_offset).release_value_but_fixme_should_propagate_errors());
1833
1834 auto value = make_ref_counted<AST::BarewordLiteral>(
1835 empty_position(),
1836 definition.substring_from_byte_offset_with_shared_superstring(equal_offset.map([](auto x) { return x + 1; }).value_or(definition.bytes().size())).release_value_but_fixme_should_propagate_errors());
1837
1838 variables.append({ move(name), move(value) });
1839 }
1840
1841 return make_ref_counted<AST::VariableDeclarations>(empty_position(), move(variables));
1842 }
1843 return nullptr;
1844 }
1845
1846 // auto first = true;
1847 for (;;) {
1848 if (peek().type == Token::Type::Word) {
1849 auto new_word = TRY(parse_word());
1850 if (!new_word)
1851 break;
1852
1853 // if (first) {
1854 // first = false;
1855 // new_word = make_ref_counted<AST::ImmediateExpression>(
1856 // new_word->position(),
1857 // AST::NameWithPosition {
1858 // "substitute_aliases"sv,
1859 // empty_position(),
1860 // },
1861 // Vector<NonnullRefPtr<AST::Node>> { *new_word },
1862 // Optional<AST::Position> {});
1863 // }
1864
1865 nodes.append(new_word.release_nonnull());
1866 } else if (auto io_redirect = TRY(parse_io_redirect())) {
1867 nodes.append(io_redirect.release_nonnull());
1868 } else {
1869 break;
1870 }
1871 }
1872
1873 auto node = make_ref_counted<AST::ListConcatenate>(
1874 start_position.with_end(peek().position.value_or(empty_position())),
1875 move(nodes));
1876
1877 return node;
1878}
1879
1880ErrorOr<RefPtr<AST::Node>> Parser::parse_io_redirect()
1881{
1882 auto start_position = peek().position.value_or(empty_position());
1883 auto start_index = m_token_index;
1884
1885 // io_redirect: IO_NUMBER? io_file | IO_NUMBER? io_here
1886 Optional<int> io_number;
1887
1888 if (peek().type == Token::Type::IoNumber)
1889 io_number = consume().value.bytes_as_string_view().to_int();
1890
1891 if (auto io_file = TRY(parse_io_file(start_position, io_number)))
1892 return io_file;
1893
1894 if (auto io_here = TRY(parse_io_here(start_position, io_number)))
1895 return io_here;
1896
1897 m_token_index = start_index;
1898 return nullptr;
1899}
1900
1901ErrorOr<RefPtr<AST::Node>> Parser::parse_io_here(AST::Position start_position, Optional<int> fd)
1902{
1903 // io_here: IO_NUMBER? (DLESS | DLESSDASH) WORD
1904 auto io_operator = peek().type;
1905 if (!is_one_of(io_operator, Token::Type::DoubleLess, Token::Type::DoubleLessDash))
1906 return nullptr;
1907
1908 auto io_operator_token = consume();
1909
1910 auto redirection_fd = fd.value_or(0);
1911
1912 auto end_keyword = consume();
1913 if (!is_one_of(end_keyword.type, Token::Type::Word, Token::Type::Token))
1914 return make_ref_counted<AST::SyntaxError>(io_operator_token.position.value_or(start_position), "Expected a heredoc keyword"_string.release_value_but_fixme_should_propagate_errors(), true);
1915
1916 auto [end_keyword_text, allow_interpolation] = Lexer::process_heredoc_key(end_keyword);
1917 RefPtr<AST::SyntaxError> error;
1918
1919 auto position = start_position.with_end(peek().position.value_or(empty_position()));
1920 auto result = make_ref_counted<AST::Heredoc>(
1921 position,
1922 end_keyword_text,
1923 allow_interpolation,
1924 io_operator == Token::Type::DoubleLessDash,
1925 Optional<int> { redirection_fd });
1926
1927 m_unprocessed_heredoc_entries.set(end_keyword_text, result);
1928
1929 if (error)
1930 result->set_is_syntax_error(*error);
1931
1932 return result;
1933}
1934
1935ErrorOr<RefPtr<AST::Node>> Parser::parse_io_file(AST::Position start_position, Optional<int> fd)
1936{
1937 auto start_index = m_token_index;
1938
1939 // io_file = (LESS | LESSAND | GREAT | GREATAND | DGREAT | LESSGREAT | CLOBBER) WORD
1940 auto io_operator = peek().type;
1941 if (!is_one_of(io_operator,
1942 Token::Type::Less, Token::Type::LessAnd, Token::Type::Great, Token::Type::GreatAnd,
1943 Token::Type::DoubleGreat, Token::Type::LessGreat, Token::Type::Clobber))
1944 return nullptr;
1945
1946 auto io_operator_token = consume();
1947
1948 auto word = TRY(parse_word());
1949 if (!word) {
1950 m_token_index = start_index;
1951 return nullptr;
1952 }
1953
1954 auto position = start_position.with_end(peek().position.value_or(empty_position()));
1955 switch (io_operator) {
1956 case Token::Type::Less:
1957 return make_ref_counted<AST::ReadRedirection>(
1958 position,
1959 fd.value_or(0),
1960 word.release_nonnull());
1961 case Token::Type::Clobber:
1962 // FIXME: Add support for clobber (and 'noclobber')
1963 case Token::Type::Great:
1964 return make_ref_counted<AST::WriteRedirection>(
1965 position,
1966 fd.value_or(1),
1967 word.release_nonnull());
1968 case Token::Type::DoubleGreat:
1969 return make_ref_counted<AST::WriteAppendRedirection>(
1970 position,
1971 fd.value_or(1),
1972 word.release_nonnull());
1973 case Token::Type::LessGreat:
1974 return make_ref_counted<AST::ReadWriteRedirection>(
1975 position,
1976 fd.value_or(0),
1977 word.release_nonnull());
1978 case Token::Type::LessAnd:
1979 case Token::Type::GreatAnd: {
1980 auto is_less = io_operator == Token::Type::LessAnd;
1981 auto source_fd = fd.value_or(is_less ? 0 : 1);
1982 if (word->is_bareword()) {
1983 auto maybe_target_fd = static_ptr_cast<AST::BarewordLiteral>(word)->text().bytes_as_string_view().to_int();
1984 if (maybe_target_fd.has_value()) {
1985 auto target_fd = maybe_target_fd.release_value();
1986 if (is_less)
1987 swap(source_fd, target_fd);
1988
1989 return make_ref_counted<AST::Fd2FdRedirection>(
1990 position,
1991 source_fd,
1992 target_fd);
1993 }
1994 }
1995 if (is_less) {
1996 return make_ref_counted<AST::ReadRedirection>(
1997 position,
1998 source_fd,
1999 word.release_nonnull());
2000 }
2001
2002 return make_ref_counted<AST::WriteRedirection>(
2003 position,
2004 source_fd,
2005 word.release_nonnull());
2006 }
2007 default:
2008 VERIFY_NOT_REACHED();
2009 }
2010}
2011
2012}