Serenity Operating System
at master 328 lines 10 kB view raw
1/* 2 * Copyright (c) 2020, the SerenityOS developers. 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#pragma once 8 9#include "AST.h" 10#include <AK/Function.h> 11#include <AK/RefPtr.h> 12#include <AK/String.h> 13#include <AK/StringBuilder.h> 14#include <AK/Vector.h> 15 16namespace Shell { 17 18class Parser { 19public: 20 Parser(StringView input, bool interactive = false) 21 : m_input(move(input)) 22 , m_in_interactive_mode(interactive) 23 { 24 } 25 26 RefPtr<AST::Node> parse(); 27 /// Parse the given string *as* an expression 28 /// that is to forcefully enclose it in double-quotes. 29 RefPtr<AST::Node> parse_as_single_expression(); 30 Vector<NonnullRefPtr<AST::Node>> parse_as_multiple_expressions(); 31 32 struct SavedOffset { 33 size_t offset; 34 AST::Position::Line line; 35 }; 36 SavedOffset save_offset() const; 37 38private: 39 enum class ShouldReadMoreSequences { 40 Yes, 41 No, 42 }; 43 44 enum class StringEndCondition { 45 DoubleQuote, 46 Heredoc, 47 }; 48 49 struct SequenceParseResult { 50 Vector<NonnullRefPtr<AST::Node>> entries; 51 Vector<AST::Position, 1> separator_positions; 52 ShouldReadMoreSequences decision; 53 }; 54 55 struct HeredocInitiationRecord { 56 String end; 57 RefPtr<AST::Heredoc> node; 58 bool interpolate { false }; 59 bool deindent { false }; 60 }; 61 62 constexpr static size_t max_allowed_nested_rule_depth = 2048; 63 RefPtr<AST::Node> parse_toplevel(); 64 SequenceParseResult parse_sequence(); 65 RefPtr<AST::Node> parse_function_decl(); 66 RefPtr<AST::Node> parse_and_logical_sequence(); 67 RefPtr<AST::Node> parse_or_logical_sequence(); 68 RefPtr<AST::Node> parse_variable_decls(); 69 RefPtr<AST::Node> parse_pipe_sequence(); 70 RefPtr<AST::Node> parse_command(); 71 RefPtr<AST::Node> parse_control_structure(); 72 RefPtr<AST::Node> parse_continuation_control(); 73 RefPtr<AST::Node> parse_for_loop(); 74 RefPtr<AST::Node> parse_loop_loop(); 75 RefPtr<AST::Node> parse_if_expr(); 76 RefPtr<AST::Node> parse_subshell(); 77 RefPtr<AST::Node> parse_match_expr(); 78 AST::MatchEntry parse_match_entry(); 79 RefPtr<AST::Node> parse_match_pattern(); 80 Optional<Regex<ECMA262>> parse_regex_pattern(); 81 RefPtr<AST::Node> parse_redirection(); 82 RefPtr<AST::Node> parse_list_expression(); 83 RefPtr<AST::Node> parse_expression(); 84 RefPtr<AST::Node> parse_string_composite(); 85 RefPtr<AST::Node> parse_string(); 86 RefPtr<AST::Node> parse_string_inner(StringEndCondition); 87 RefPtr<AST::Node> parse_variable(); 88 RefPtr<AST::Node> parse_variable_ref(); 89 RefPtr<AST::Slice> parse_slice(); 90 RefPtr<AST::Node> parse_evaluate(); 91 RefPtr<AST::Node> parse_history_designator(); 92 RefPtr<AST::Node> parse_comment(); 93 RefPtr<AST::Node> parse_bareword(); 94 RefPtr<AST::Node> parse_glob(); 95 RefPtr<AST::Node> parse_brace_expansion(); 96 RefPtr<AST::Node> parse_brace_expansion_spec(); 97 RefPtr<AST::Node> parse_immediate_expression(); 98 RefPtr<AST::Node> parse_heredoc_initiation_record(); 99 bool parse_heredoc_entries(); 100 101 template<typename A, typename... Args> 102 NonnullRefPtr<A> create(Args&&... args); 103 104 void set_end_condition(OwnPtr<Function<bool()>> condition) { m_end_condition = move(condition); } 105 bool at_end() const 106 { 107 if (m_end_condition && (*m_end_condition)()) 108 return true; 109 return m_input.length() <= m_offset; 110 } 111 char peek(); 112 char consume(); 113 bool expect(char); 114 bool expect(StringView); 115 bool next_is(StringView); 116 117 void restore_to(size_t offset, AST::Position::Line line) 118 { 119 m_offset = offset; 120 m_line = move(line); 121 } 122 123 AST::Position::Line line() const { return m_line; } 124 125 StringView consume_while(Function<bool(char)>); 126 127 struct Offset { 128 size_t offset; 129 AST::Position::Line line; 130 }; 131 struct ScopedOffset { 132 ScopedOffset(Vector<size_t>& offsets, Vector<AST::Position::Line>& lines, size_t offset, size_t lineno, size_t linecol) 133 : offsets(offsets) 134 , lines(lines) 135 , offset(offset) 136 , line({ lineno, linecol }) 137 { 138 offsets.append(offset); 139 lines.append(line); 140 } 141 ~ScopedOffset() 142 { 143 auto last = offsets.take_last(); 144 VERIFY(last == offset); 145 146 auto last_line = lines.take_last(); 147 VERIFY(last_line == line); 148 } 149 150 Vector<size_t>& offsets; 151 Vector<AST::Position::Line>& lines; 152 size_t offset; 153 AST::Position::Line line; 154 }; 155 156 void restore_to(ScopedOffset const& offset) { restore_to(offset.offset, offset.line); } 157 158 OwnPtr<ScopedOffset> push_start(); 159 Offset current_position(); 160 161 StringView m_input; 162 size_t m_offset { 0 }; 163 164 AST::Position::Line m_line { 0, 0 }; 165 166 Vector<size_t> m_rule_start_offsets; 167 Vector<AST::Position::Line> m_rule_start_lines; 168 169 OwnPtr<Function<bool()>> m_end_condition; 170 Vector<HeredocInitiationRecord> m_heredoc_initiations; 171 Vector<char> m_extra_chars_not_allowed_in_barewords; 172 bool m_is_in_brace_expansion_spec { false }; 173 bool m_continuation_controls_allowed { false }; 174 bool m_in_interactive_mode { false }; 175}; 176 177#if 0 178constexpr auto the_grammar = R"( 179toplevel :: sequence? 180 181sequence :: variable_decls? or_logical_sequence terminator sequence 182 | variable_decls? or_logical_sequence '&' sequence 183 | variable_decls? or_logical_sequence 184 | variable_decls? function_decl (terminator sequence)? 185 | variable_decls? terminator sequence 186 187function_decl :: identifier '(' (ws* identifier)* ')' ws* '{' [!c] toplevel '}' 188 189or_logical_sequence :: and_logical_sequence '|' '|' and_logical_sequence 190 | and_logical_sequence 191 192and_logical_sequence :: pipe_sequence '&' '&' and_logical_sequence 193 | pipe_sequence 194 195terminator :: ';' 196 | '\n' [?!heredoc_stack.is_empty] heredoc_entries 197 198heredoc_entries :: { .*? (heredoc_entry) '\n' } [each heredoc_entries] 199 200variable_decls :: identifier '=' expression (' '+ variable_decls)? ' '* 201 | identifier '=' '(' pipe_sequence ')' (' '+ variable_decls)? ' '* 202 203pipe_sequence :: command '|' '&'? pipe_sequence 204 | command 205 | control_structure '|' '&'? pipe_sequence 206 | control_structure 207 208control_structure[c] :: for_expr 209 | loop_expr 210 | if_expr 211 | subshell 212 | match_expr 213 | ?c: continuation_control 214 215continuation_control :: 'break' 216 | 'continue' 217 218for_expr :: 'for' ws+ (('index' ' '+ identifier ' '+)? identifier ' '+ 'in' ws*)? expression ws+ '{' [c] toplevel '}' 219 220loop_expr :: 'loop' ws* '{' [c] toplevel '}' 221 222if_expr :: 'if' ws+ or_logical_sequence ws+ '{' toplevel '}' else_clause? 223 224else_clause :: else '{' toplevel '}' 225 | else if_expr 226 227subshell :: '{' toplevel '}' 228 229match_expr :: 'match' ws+ expression ws* ('as' ws+ identifier)? '{' match_entry* '}' 230 231match_entry :: match_pattern ws* (as identifier_list)? '{' toplevel '}' 232 | regex_pattern ws* '{' toplevel '}' 233 234identifier_list :: '(' (identifier ws*)* ')' 235 236regex_pattern :: regex_pattern (ws* '|' ws* regex_pattern)* 237 238match_pattern :: expression (ws* '|' ws* expression)* 239 240regex_pattern :: '(?:' .* ')' { enclosed string must contain balanced parentheses } 241 242command :: redirection command 243 | list_expression command? 244 245redirection :: number? '>'{1,2} ' '* string_composite 246 | number? '<' ' '* string_composite 247 | number? '>' '&' number 248 | number? '>' '&' '-' 249 250list_expression :: ' '* expression (' '+ list_expression)? 251 252expression :: evaluate expression? 253 | string_composite expression? 254 | comment expression? 255 | immediate_expression expression? 256 | history_designator expression? 257 | '(' list_expression ')' expression? 258 259evaluate :: '$' '(' pipe_sequence ')' 260 | '$' [lookahead != '('] expression {eval / dynamic resolve} 261 262string_composite :: string string_composite? 263 | variable string_composite? 264 | bareword string_composite? 265 | glob string_composite? 266 | brace_expansion string_composite? 267 | heredoc_initiator string_composite? {append to heredoc_entries} 268 269heredoc_initiator :: '<' '<' '-' bareword {*bareword, interpolate, no deindent} 270 | '<' '<' '-' "'" [^']* "'" {*string, no interpolate, no deindent} 271 | '<' '<' '~' bareword {*bareword, interpolate, deindent} 272 | '<' '<' '~' "'" [^']* "'" {*bareword, no interpolate, deindent} 273 274string :: '"' dquoted_string_inner '"' 275 | "'" [^']* "'" 276 277dquoted_string_inner :: '\' . dquoted_string_inner? {concat} 278 | variable dquoted_string_inner? {compose} 279 | . dquoted_string_inner? 280 | '\' 'x' xdigit*2 dquoted_string_inner? 281 | '\' 'u' xdigit*8 dquoted_string_inner? 282 | '\' [abefrnt] dquoted_string_inner? 283 284variable :: variable_ref slice? 285 286variable_ref :: '$' identifier 287 | '$' '$' 288 | '$' '?' 289 | '$' '*' 290 | '$' '#' 291 | ... 292 293slice :: '[' brace_expansion_spec ']' 294 295comment :: '#' [^\n]* 296 297immediate_expression :: '$' '{' immediate_function expression* '}' 298 299immediate_function :: identifier { predetermined list of names, see Shell.h:ENUMERATE_SHELL_IMMEDIATE_FUNCTIONS } 300 301history_designator :: '!' event_selector (':' word_selector_composite)? 302 303event_selector :: '!' {== '-0'} 304 | '?' bareword '?' 305 | bareword {number: index, otherwise: lookup} 306 307word_selector_composite :: word_selector ('-' word_selector)? 308 309word_selector :: number 310 | '^' {== 0} 311 | '$' {== end} 312 313bareword :: [^"'*$&#|()[\]{} ?;<>] bareword? 314 | '\' [^"'*$&#|()[\]{} ?;<>] bareword? 315 316bareword_with_tilde_expansion :: '~' bareword? 317 318glob :: [*?] bareword? 319 | bareword [*?] 320 321brace_expansion :: '{' brace_expansion_spec '}' 322 323brace_expansion_spec :: expression? (',' expression?)* 324 | expression '..' expression 325)"; 326#endif 327 328}