Serenity Operating System
1/*
2 * Copyright (c) 2020, the SerenityOS developers.
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#pragma once
8
9#include "AST.h"
10#include <AK/Function.h>
11#include <AK/RefPtr.h>
12#include <AK/String.h>
13#include <AK/StringBuilder.h>
14#include <AK/Vector.h>
15
16namespace Shell {
17
18class Parser {
19public:
20 Parser(StringView input, bool interactive = false)
21 : m_input(move(input))
22 , m_in_interactive_mode(interactive)
23 {
24 }
25
26 RefPtr<AST::Node> parse();
27 /// Parse the given string *as* an expression
28 /// that is to forcefully enclose it in double-quotes.
29 RefPtr<AST::Node> parse_as_single_expression();
30 Vector<NonnullRefPtr<AST::Node>> parse_as_multiple_expressions();
31
32 struct SavedOffset {
33 size_t offset;
34 AST::Position::Line line;
35 };
36 SavedOffset save_offset() const;
37
38private:
39 enum class ShouldReadMoreSequences {
40 Yes,
41 No,
42 };
43
44 enum class StringEndCondition {
45 DoubleQuote,
46 Heredoc,
47 };
48
49 struct SequenceParseResult {
50 Vector<NonnullRefPtr<AST::Node>> entries;
51 Vector<AST::Position, 1> separator_positions;
52 ShouldReadMoreSequences decision;
53 };
54
55 struct HeredocInitiationRecord {
56 String end;
57 RefPtr<AST::Heredoc> node;
58 bool interpolate { false };
59 bool deindent { false };
60 };
61
62 constexpr static size_t max_allowed_nested_rule_depth = 2048;
63 RefPtr<AST::Node> parse_toplevel();
64 SequenceParseResult parse_sequence();
65 RefPtr<AST::Node> parse_function_decl();
66 RefPtr<AST::Node> parse_and_logical_sequence();
67 RefPtr<AST::Node> parse_or_logical_sequence();
68 RefPtr<AST::Node> parse_variable_decls();
69 RefPtr<AST::Node> parse_pipe_sequence();
70 RefPtr<AST::Node> parse_command();
71 RefPtr<AST::Node> parse_control_structure();
72 RefPtr<AST::Node> parse_continuation_control();
73 RefPtr<AST::Node> parse_for_loop();
74 RefPtr<AST::Node> parse_loop_loop();
75 RefPtr<AST::Node> parse_if_expr();
76 RefPtr<AST::Node> parse_subshell();
77 RefPtr<AST::Node> parse_match_expr();
78 AST::MatchEntry parse_match_entry();
79 RefPtr<AST::Node> parse_match_pattern();
80 Optional<Regex<ECMA262>> parse_regex_pattern();
81 RefPtr<AST::Node> parse_redirection();
82 RefPtr<AST::Node> parse_list_expression();
83 RefPtr<AST::Node> parse_expression();
84 RefPtr<AST::Node> parse_string_composite();
85 RefPtr<AST::Node> parse_string();
86 RefPtr<AST::Node> parse_string_inner(StringEndCondition);
87 RefPtr<AST::Node> parse_variable();
88 RefPtr<AST::Node> parse_variable_ref();
89 RefPtr<AST::Slice> parse_slice();
90 RefPtr<AST::Node> parse_evaluate();
91 RefPtr<AST::Node> parse_history_designator();
92 RefPtr<AST::Node> parse_comment();
93 RefPtr<AST::Node> parse_bareword();
94 RefPtr<AST::Node> parse_glob();
95 RefPtr<AST::Node> parse_brace_expansion();
96 RefPtr<AST::Node> parse_brace_expansion_spec();
97 RefPtr<AST::Node> parse_immediate_expression();
98 RefPtr<AST::Node> parse_heredoc_initiation_record();
99 bool parse_heredoc_entries();
100
101 template<typename A, typename... Args>
102 NonnullRefPtr<A> create(Args&&... args);
103
104 void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); }
105 bool at_end() const
106 {
107 if (m_end_condition && (*m_end_condition)())
108 return true;
109 return m_input.length() <= m_offset;
110 }
111 char peek();
112 char consume();
113 bool expect(char);
114 bool expect(StringView);
115 bool next_is(StringView);
116
117 void restore_to(size_t offset, AST::Position::Line line)
118 {
119 m_offset = offset;
120 m_line = move(line);
121 }
122
123 AST::Position::Line line() const { return m_line; }
124
125 StringView consume_while(Function<bool(char)>);
126
127 struct Offset {
128 size_t offset;
129 AST::Position::Line line;
130 };
131 struct ScopedOffset {
132 ScopedOffset(Vector<size_t>& offsets, Vector<AST::Position::Line>& lines, size_t offset, size_t lineno, size_t linecol)
133 : offsets(offsets)
134 , lines(lines)
135 , offset(offset)
136 , line({ lineno, linecol })
137 {
138 offsets.append(offset);
139 lines.append(line);
140 }
141 ~ScopedOffset()
142 {
143 auto last = offsets.take_last();
144 VERIFY(last == offset);
145
146 auto last_line = lines.take_last();
147 VERIFY(last_line == line);
148 }
149
150 Vector<size_t>& offsets;
151 Vector<AST::Position::Line>& lines;
152 size_t offset;
153 AST::Position::Line line;
154 };
155
156 void restore_to(ScopedOffset const& offset) { restore_to(offset.offset, offset.line); }
157
158 OwnPtr<ScopedOffset> push_start();
159 Offset current_position();
160
161 StringView m_input;
162 size_t m_offset { 0 };
163
164 AST::Position::Line m_line { 0, 0 };
165
166 Vector<size_t> m_rule_start_offsets;
167 Vector<AST::Position::Line> m_rule_start_lines;
168
169 OwnPtr<Function<bool()>> m_end_condition;
170 Vector<HeredocInitiationRecord> m_heredoc_initiations;
171 Vector<char> m_extra_chars_not_allowed_in_barewords;
172 bool m_is_in_brace_expansion_spec { false };
173 bool m_continuation_controls_allowed { false };
174 bool m_in_interactive_mode { false };
175};
176
177#if 0
178constexpr auto the_grammar = R"(
179toplevel :: sequence?
180
181sequence :: variable_decls? or_logical_sequence terminator sequence
182 | variable_decls? or_logical_sequence '&' sequence
183 | variable_decls? or_logical_sequence
184 | variable_decls? function_decl (terminator sequence)?
185 | variable_decls? terminator sequence
186
187function_decl :: identifier '(' (ws* identifier)* ')' ws* '{' [!c] toplevel '}'
188
189or_logical_sequence :: and_logical_sequence '|' '|' and_logical_sequence
190 | and_logical_sequence
191
192and_logical_sequence :: pipe_sequence '&' '&' and_logical_sequence
193 | pipe_sequence
194
195terminator :: ';'
196 | '\n' [?!heredoc_stack.is_empty] heredoc_entries
197
198heredoc_entries :: { .*? (heredoc_entry) '\n' } [each heredoc_entries]
199
200variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '*
201 | identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '*
202
203pipe_sequence :: command '|' '&'? pipe_sequence
204 | command
205 | control_structure '|' '&'? pipe_sequence
206 | control_structure
207
208control_structure[c] :: for_expr
209 | loop_expr
210 | if_expr
211 | subshell
212 | match_expr
213 | ?c: continuation_control
214
215continuation_control :: 'break'
216 | 'continue'
217
218for_expr :: 'for' ws+ (('index' ' '+ identifier ' '+)? identifier ' '+ 'in' ws*)? expression ws+ '{' [c] toplevel '}'
219
220loop_expr :: 'loop' ws* '{' [c] toplevel '}'
221
222if_expr :: 'if' ws+ or_logical_sequence ws+ '{' toplevel '}' else_clause?
223
224else_clause :: else '{' toplevel '}'
225 | else if_expr
226
227subshell :: '{' toplevel '}'
228
229match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}'
230
231match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}'
232 | regex_pattern ws* '{' toplevel '}'
233
234identifier_list :: '(' (identifier ws*)* ')'
235
236regex_pattern :: regex_pattern (ws* '|' ws* regex_pattern)*
237
238match_pattern :: expression (ws* '|' ws* expression)*
239
240regex_pattern :: '(?:' .* ')' { enclosed string must contain balanced parentheses }
241
242command :: redirection command
243 | list_expression command?
244
245redirection :: number? '>'{1,2} ' '* string_composite
246 | number? '<' ' '* string_composite
247 | number? '>' '&' number
248 | number? '>' '&' '-'
249
250list_expression :: ' '* expression (' '+ list_expression)?
251
252expression :: evaluate expression?
253 | string_composite expression?
254 | comment expression?
255 | immediate_expression expression?
256 | history_designator expression?
257 | '(' list_expression ')' expression?
258
259evaluate :: '$' '(' pipe_sequence ')'
260 | '$' [lookahead != '('] expression {eval / dynamic resolve}
261
262string_composite :: string string_composite?
263 | variable string_composite?
264 | bareword string_composite?
265 | glob string_composite?
266 | brace_expansion string_composite?
267 | heredoc_initiator string_composite? {append to heredoc_entries}
268
269heredoc_initiator :: '<' '<' '-' bareword {*bareword, interpolate, no deindent}
270 | '<' '<' '-' "'" [^']* "'" {*string, no interpolate, no deindent}
271 | '<' '<' '~' bareword {*bareword, interpolate, deindent}
272 | '<' '<' '~' "'" [^']* "'" {*bareword, no interpolate, deindent}
273
274string :: '"' dquoted_string_inner '"'
275 | "'" [^']* "'"
276
277dquoted_string_inner :: '\' . dquoted_string_inner? {concat}
278 | variable dquoted_string_inner? {compose}
279 | . dquoted_string_inner?
280 | '\' 'x' xdigit*2 dquoted_string_inner?
281 | '\' 'u' xdigit*8 dquoted_string_inner?
282 | '\' [abefrnt] dquoted_string_inner?
283
284variable :: variable_ref slice?
285
286variable_ref :: '$' identifier
287 | '$' '$'
288 | '$' '?'
289 | '$' '*'
290 | '$' '#'
291 | ...
292
293slice :: '[' brace_expansion_spec ']'
294
295comment :: '#' [^\n]*
296
297immediate_expression :: '$' '{' immediate_function expression* '}'
298
299immediate_function :: identifier { predetermined list of names, see Shell.h:ENUMERATE_SHELL_IMMEDIATE_FUNCTIONS }
300
301history_designator :: '!' event_selector (':' word_selector_composite)?
302
303event_selector :: '!' {== '-0'}
304 | '?' bareword '?'
305 | bareword {number: index, otherwise: lookup}
306
307word_selector_composite :: word_selector ('-' word_selector)?
308
309word_selector :: number
310 | '^' {== 0}
311 | '$' {== end}
312
313bareword :: [^"'*$&#|()[\]{} ?;<>] bareword?
314 | '\' [^"'*$&#|()[\]{} ?;<>] bareword?
315
316bareword_with_tilde_expansion :: '~' bareword?
317
318glob :: [*?] bareword?
319 | bareword [*?]
320
321brace_expansion :: '{' brace_expansion_spec '}'
322
323brace_expansion_spec :: expression? (',' expression?)*
324 | expression '..' expression
325)";
326#endif
327
328}