Serenity Operating System
at master 410 lines 13 kB view raw
1/* 2 * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include "Preprocessor.h" 8#include <AK/Assertions.h> 9#include <AK/GenericLexer.h> 10#include <AK/StringBuilder.h> 11#include <LibCpp/Lexer.h> 12#include <ctype.h> 13 14namespace Cpp { 15Preprocessor::Preprocessor(DeprecatedString const& filename, StringView program) 16 : m_filename(filename) 17 , m_program(program) 18{ 19} 20 21Vector<Token> Preprocessor::process_and_lex() 22{ 23 Lexer lexer { m_program }; 24 lexer.set_ignore_whitespace(true); 25 auto tokens = lexer.lex(); 26 27 m_unprocessed_tokens = tokens; 28 29 for (size_t token_index = 0; token_index < tokens.size(); ++token_index) { 30 auto& token = tokens[token_index]; 31 m_current_line = token.start().line; 32 if (token.type() == Token::Type::PreprocessorStatement) { 33 handle_preprocessor_statement(token.text()); 34 m_processed_tokens.append(tokens[token_index]); 35 continue; 36 } 37 38 if (m_state != State::Normal) 39 continue; 40 41 if (token.type() == Token::Type::IncludeStatement) { 42 if (token_index >= tokens.size() - 1 || tokens[token_index + 1].type() != Token::Type::IncludePath) 43 continue; 44 handle_include_statement(tokens[token_index + 1].text()); 45 if (m_options.keep_include_statements) { 46 m_processed_tokens.append(tokens[token_index]); 47 m_processed_tokens.append(tokens[token_index + 1]); 48 } 49 ++token_index; // Also skip IncludePath token 50 continue; 51 } 52 53 if (token.type() == Token::Type::Identifier) { 54 if (auto defined_value = m_definitions.find(token.text()); defined_value != m_definitions.end()) { 55 auto last_substituted_token_index = do_substitution(tokens, token_index, defined_value->value); 56 token_index = last_substituted_token_index; 57 continue; 58 } 59 } 60 61 m_processed_tokens.append(token); 62 } 63 64 return m_processed_tokens; 65} 66 67static void consume_whitespace(GenericLexer& lexer) 68{ 69 auto ignore_line = [&] { 70 for (;;) { 71 if (lexer.consume_specific("\\\n"sv)) { 72 lexer.ignore(2); 73 } else { 74 lexer.ignore_until('\n'); 75 lexer.ignore(); 76 break; 77 } 78 } 79 }; 80 for (;;) { 81 if (lexer.consume_specific("//"sv)) { 82 ignore_line(); 83 } else if (lexer.consume_specific("/*"sv)) { 84 lexer.ignore_until("*/"); 85 lexer.ignore(2); 86 } else if (lexer.next_is("\\\n"sv)) { 87 lexer.ignore(2); 88 } else if (lexer.is_eof() || !lexer.next_is(isspace)) { 89 break; 90 } else { 91 lexer.ignore(); 92 } 93 } 94} 95 96void Preprocessor::handle_preprocessor_statement(StringView line) 97{ 98 GenericLexer lexer(line); 99 100 consume_whitespace(lexer); 101 lexer.consume_specific('#'); 102 consume_whitespace(lexer); 103 auto keyword = lexer.consume_until(' '); 104 lexer.ignore(); 105 if (keyword.is_empty() || keyword.is_null() || keyword.is_whitespace()) 106 return; 107 108 handle_preprocessor_keyword(keyword, lexer); 109} 110 111void Preprocessor::handle_include_statement(StringView include_path) 112{ 113 m_included_paths.append(include_path); 114 if (definitions_in_header_callback) { 115 for (auto& def : definitions_in_header_callback(include_path)) 116 m_definitions.set(def.key, def.value); 117 } 118} 119 120void Preprocessor::handle_preprocessor_keyword(StringView keyword, GenericLexer& line_lexer) 121{ 122 if (keyword == "include") { 123 // Should have called 'handle_include_statement'. 124 VERIFY_NOT_REACHED(); 125 } 126 127 if (keyword == "else") { 128 if (m_options.ignore_invalid_statements && m_current_depth == 0) 129 return; 130 VERIFY(m_current_depth > 0); 131 if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1)) { 132 m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; }); 133 m_state = State::Normal; 134 } 135 if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) { 136 m_state = State::SkipElseBranch; 137 } 138 return; 139 } 140 141 if (keyword == "endif") { 142 if (m_options.ignore_invalid_statements && m_current_depth == 0) 143 return; 144 VERIFY(m_current_depth > 0); 145 --m_current_depth; 146 if (m_depths_of_not_taken_branches.contains_slow(m_current_depth)) { 147 m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; }); 148 } 149 if (m_depths_of_taken_branches.contains_slow(m_current_depth)) { 150 m_depths_of_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; }); 151 } 152 m_state = State::Normal; 153 return; 154 } 155 156 if (keyword == "define") { 157 if (m_state == State::Normal) { 158 auto definition = create_definition(line_lexer.consume_all()); 159 if (definition.has_value()) 160 m_definitions.set(definition->key, *definition); 161 } 162 return; 163 } 164 if (keyword == "undef") { 165 if (m_state == State::Normal) { 166 auto key = line_lexer.consume_until(' '); 167 line_lexer.consume_all(); 168 m_definitions.remove(key); 169 } 170 return; 171 } 172 if (keyword == "ifdef") { 173 ++m_current_depth; 174 if (m_state == State::Normal) { 175 auto key = line_lexer.consume_until(' '); 176 line_lexer.ignore(); 177 if (m_definitions.contains(key)) { 178 m_depths_of_taken_branches.append(m_current_depth - 1); 179 return; 180 } else { 181 m_depths_of_not_taken_branches.append(m_current_depth - 1); 182 m_state = State::SkipIfBranch; 183 return; 184 } 185 } 186 return; 187 } 188 if (keyword == "ifndef") { 189 ++m_current_depth; 190 if (m_state == State::Normal) { 191 auto key = line_lexer.consume_until(' '); 192 line_lexer.ignore(); 193 if (!m_definitions.contains(key)) { 194 m_depths_of_taken_branches.append(m_current_depth - 1); 195 return; 196 } else { 197 m_depths_of_not_taken_branches.append(m_current_depth - 1); 198 m_state = State::SkipIfBranch; 199 return; 200 } 201 } 202 return; 203 } 204 if (keyword == "if") { 205 ++m_current_depth; 206 if (m_state == State::Normal) { 207 // FIXME: Implement #if logic 208 // We currently always take #if branches. 209 m_depths_of_taken_branches.append(m_current_depth - 1); 210 } 211 return; 212 } 213 214 if (keyword == "elif") { 215 if (m_options.ignore_invalid_statements && m_current_depth == 0) 216 return; 217 VERIFY(m_current_depth > 0); 218 // FIXME: Evaluate the elif expression 219 // We currently always treat the expression in #elif as true. 220 if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1) /* && should_take*/) { 221 m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; }); 222 m_state = State::Normal; 223 } 224 if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) { 225 m_state = State::SkipElseBranch; 226 } 227 return; 228 } 229 if (keyword == "pragma") { 230 line_lexer.consume_all(); 231 return; 232 } 233 234 if (!m_options.ignore_unsupported_keywords) { 235 dbgln("Unsupported preprocessor keyword: {}", keyword); 236 VERIFY_NOT_REACHED(); 237 } 238} 239 240size_t Preprocessor::do_substitution(Vector<Token> const& tokens, size_t token_index, Definition const& defined_value) 241{ 242 if (defined_value.value.is_null()) 243 return token_index; 244 245 Substitution sub; 246 sub.defined_value = defined_value; 247 248 auto macro_call = parse_macro_call(tokens, token_index); 249 250 if (!macro_call.has_value()) 251 return token_index; 252 253 Vector<Token> original_tokens; 254 for (size_t i = token_index; i <= macro_call->end_token_index; ++i) { 255 original_tokens.append(tokens[i]); 256 } 257 VERIFY(!original_tokens.is_empty()); 258 259 auto processed_value = evaluate_macro_call(*macro_call, defined_value); 260 m_substitutions.append({ original_tokens, defined_value, processed_value }); 261 262 Lexer lexer(processed_value); 263 lexer.lex_iterable([&](auto token) { 264 if (token.type() == Token::Type::Whitespace) 265 return; 266 token.set_start(original_tokens.first().start()); 267 token.set_end(original_tokens.first().end()); 268 m_processed_tokens.append(token); 269 }); 270 return macro_call->end_token_index; 271} 272 273Optional<Preprocessor::MacroCall> Preprocessor::parse_macro_call(Vector<Token> const& tokens, size_t token_index) 274{ 275 auto name = tokens[token_index]; 276 ++token_index; 277 278 if (token_index >= tokens.size() || tokens[token_index].type() != Token::Type::LeftParen) 279 return MacroCall { name, {}, token_index - 1 }; 280 ++token_index; 281 282 Vector<MacroCall::Argument> arguments; 283 Optional<MacroCall::Argument> current_argument; 284 285 size_t paren_depth = 1; 286 for (; token_index < tokens.size(); ++token_index) { 287 auto& token = tokens[token_index]; 288 if (token.type() == Token::Type::LeftParen) 289 ++paren_depth; 290 if (token.type() == Token::Type::RightParen) 291 --paren_depth; 292 293 if (paren_depth == 0) { 294 if (current_argument.has_value()) 295 arguments.append(*current_argument); 296 break; 297 } 298 299 if (paren_depth == 1 && token.type() == Token::Type::Comma) { 300 if (current_argument.has_value()) 301 arguments.append(*current_argument); 302 current_argument = {}; 303 } else { 304 if (!current_argument.has_value()) 305 current_argument = MacroCall::Argument {}; 306 current_argument->tokens.append(token); 307 } 308 } 309 310 if (token_index >= tokens.size()) 311 return {}; 312 313 return MacroCall { name, move(arguments), token_index }; 314} 315 316Optional<Preprocessor::Definition> Preprocessor::create_definition(StringView line) 317{ 318 Lexer lexer { line }; 319 lexer.set_ignore_whitespace(true); 320 auto tokens = lexer.lex(); 321 if (tokens.is_empty()) 322 return {}; 323 324 if (tokens.first().type() != Token::Type::Identifier) 325 return {}; 326 327 Definition definition; 328 definition.filename = m_filename; 329 definition.line = m_current_line; 330 331 definition.key = tokens.first().text(); 332 333 if (tokens.size() == 1) 334 return definition; 335 336 size_t token_index = 1; 337 // Parse macro parameters (if any) 338 if (tokens[token_index].type() == Token::Type::LeftParen) { 339 ++token_index; 340 while (token_index < tokens.size() && tokens[token_index].type() != Token::Type::RightParen) { 341 auto param = tokens[token_index]; 342 if (param.type() != Token::Type::Identifier) 343 return {}; 344 345 if (token_index + 1 >= tokens.size()) 346 return {}; 347 348 ++token_index; 349 350 if (tokens[token_index].type() == Token::Type::Comma) 351 ++token_index; 352 else if (tokens[token_index].type() != Token::Type::RightParen) 353 return {}; 354 355 definition.parameters.empend(param.text()); 356 } 357 if (token_index >= tokens.size()) 358 return {}; 359 ++token_index; 360 } 361 362 if (token_index < tokens.size()) 363 definition.value = remove_escaped_newlines(line.substring_view(tokens[token_index].start().column)); 364 365 return definition; 366} 367 368DeprecatedString Preprocessor::remove_escaped_newlines(StringView value) 369{ 370 static constexpr auto escaped_newline = "\\\n"sv; 371 AK::StringBuilder processed_value; 372 GenericLexer lexer { value }; 373 while (!lexer.is_eof()) { 374 processed_value.append(lexer.consume_until(escaped_newline)); 375 lexer.ignore(escaped_newline.length()); 376 } 377 return processed_value.to_deprecated_string(); 378} 379 380DeprecatedString Preprocessor::evaluate_macro_call(MacroCall const& macro_call, Definition const& definition) 381{ 382 if (macro_call.arguments.size() != definition.parameters.size()) { 383 dbgln("mismatch in # of arguments for macro call: {}", macro_call.name.text()); 384 return {}; 385 } 386 387 Lexer lexer { definition.value }; 388 StringBuilder processed_value; 389 lexer.lex_iterable([&](auto token) { 390 if (token.type() != Token::Type::Identifier) { 391 processed_value.append(token.text()); 392 return; 393 } 394 395 auto param_index = definition.parameters.find_first_index(token.text()); 396 if (!param_index.has_value()) { 397 processed_value.append(token.text()); 398 return; 399 } 400 401 auto& argument = macro_call.arguments[*param_index]; 402 for (auto& arg_token : argument.tokens) { 403 processed_value.append(arg_token.text()); 404 } 405 }); 406 407 return processed_value.to_deprecated_string(); 408} 409 410};