Serenity Operating System
1/*
2 * Copyright (c) 2021, Itamar S. <itamar8910@gmail.com>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include "Preprocessor.h"
8#include <AK/Assertions.h>
9#include <AK/GenericLexer.h>
10#include <AK/StringBuilder.h>
11#include <LibCpp/Lexer.h>
12#include <ctype.h>
13
14namespace Cpp {
15Preprocessor::Preprocessor(DeprecatedString const& filename, StringView program)
16 : m_filename(filename)
17 , m_program(program)
18{
19}
20
21Vector<Token> Preprocessor::process_and_lex()
22{
23 Lexer lexer { m_program };
24 lexer.set_ignore_whitespace(true);
25 auto tokens = lexer.lex();
26
27 m_unprocessed_tokens = tokens;
28
29 for (size_t token_index = 0; token_index < tokens.size(); ++token_index) {
30 auto& token = tokens[token_index];
31 m_current_line = token.start().line;
32 if (token.type() == Token::Type::PreprocessorStatement) {
33 handle_preprocessor_statement(token.text());
34 m_processed_tokens.append(tokens[token_index]);
35 continue;
36 }
37
38 if (m_state != State::Normal)
39 continue;
40
41 if (token.type() == Token::Type::IncludeStatement) {
42 if (token_index >= tokens.size() - 1 || tokens[token_index + 1].type() != Token::Type::IncludePath)
43 continue;
44 handle_include_statement(tokens[token_index + 1].text());
45 if (m_options.keep_include_statements) {
46 m_processed_tokens.append(tokens[token_index]);
47 m_processed_tokens.append(tokens[token_index + 1]);
48 }
49 ++token_index; // Also skip IncludePath token
50 continue;
51 }
52
53 if (token.type() == Token::Type::Identifier) {
54 if (auto defined_value = m_definitions.find(token.text()); defined_value != m_definitions.end()) {
55 auto last_substituted_token_index = do_substitution(tokens, token_index, defined_value->value);
56 token_index = last_substituted_token_index;
57 continue;
58 }
59 }
60
61 m_processed_tokens.append(token);
62 }
63
64 return m_processed_tokens;
65}
66
67static void consume_whitespace(GenericLexer& lexer)
68{
69 auto ignore_line = [&] {
70 for (;;) {
71 if (lexer.consume_specific("\\\n"sv)) {
72 lexer.ignore(2);
73 } else {
74 lexer.ignore_until('\n');
75 lexer.ignore();
76 break;
77 }
78 }
79 };
80 for (;;) {
81 if (lexer.consume_specific("//"sv)) {
82 ignore_line();
83 } else if (lexer.consume_specific("/*"sv)) {
84 lexer.ignore_until("*/");
85 lexer.ignore(2);
86 } else if (lexer.next_is("\\\n"sv)) {
87 lexer.ignore(2);
88 } else if (lexer.is_eof() || !lexer.next_is(isspace)) {
89 break;
90 } else {
91 lexer.ignore();
92 }
93 }
94}
95
96void Preprocessor::handle_preprocessor_statement(StringView line)
97{
98 GenericLexer lexer(line);
99
100 consume_whitespace(lexer);
101 lexer.consume_specific('#');
102 consume_whitespace(lexer);
103 auto keyword = lexer.consume_until(' ');
104 lexer.ignore();
105 if (keyword.is_empty() || keyword.is_null() || keyword.is_whitespace())
106 return;
107
108 handle_preprocessor_keyword(keyword, lexer);
109}
110
111void Preprocessor::handle_include_statement(StringView include_path)
112{
113 m_included_paths.append(include_path);
114 if (definitions_in_header_callback) {
115 for (auto& def : definitions_in_header_callback(include_path))
116 m_definitions.set(def.key, def.value);
117 }
118}
119
120void Preprocessor::handle_preprocessor_keyword(StringView keyword, GenericLexer& line_lexer)
121{
122 if (keyword == "include") {
123 // Should have called 'handle_include_statement'.
124 VERIFY_NOT_REACHED();
125 }
126
127 if (keyword == "else") {
128 if (m_options.ignore_invalid_statements && m_current_depth == 0)
129 return;
130 VERIFY(m_current_depth > 0);
131 if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1)) {
132 m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
133 m_state = State::Normal;
134 }
135 if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
136 m_state = State::SkipElseBranch;
137 }
138 return;
139 }
140
141 if (keyword == "endif") {
142 if (m_options.ignore_invalid_statements && m_current_depth == 0)
143 return;
144 VERIFY(m_current_depth > 0);
145 --m_current_depth;
146 if (m_depths_of_not_taken_branches.contains_slow(m_current_depth)) {
147 m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
148 }
149 if (m_depths_of_taken_branches.contains_slow(m_current_depth)) {
150 m_depths_of_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth; });
151 }
152 m_state = State::Normal;
153 return;
154 }
155
156 if (keyword == "define") {
157 if (m_state == State::Normal) {
158 auto definition = create_definition(line_lexer.consume_all());
159 if (definition.has_value())
160 m_definitions.set(definition->key, *definition);
161 }
162 return;
163 }
164 if (keyword == "undef") {
165 if (m_state == State::Normal) {
166 auto key = line_lexer.consume_until(' ');
167 line_lexer.consume_all();
168 m_definitions.remove(key);
169 }
170 return;
171 }
172 if (keyword == "ifdef") {
173 ++m_current_depth;
174 if (m_state == State::Normal) {
175 auto key = line_lexer.consume_until(' ');
176 line_lexer.ignore();
177 if (m_definitions.contains(key)) {
178 m_depths_of_taken_branches.append(m_current_depth - 1);
179 return;
180 } else {
181 m_depths_of_not_taken_branches.append(m_current_depth - 1);
182 m_state = State::SkipIfBranch;
183 return;
184 }
185 }
186 return;
187 }
188 if (keyword == "ifndef") {
189 ++m_current_depth;
190 if (m_state == State::Normal) {
191 auto key = line_lexer.consume_until(' ');
192 line_lexer.ignore();
193 if (!m_definitions.contains(key)) {
194 m_depths_of_taken_branches.append(m_current_depth - 1);
195 return;
196 } else {
197 m_depths_of_not_taken_branches.append(m_current_depth - 1);
198 m_state = State::SkipIfBranch;
199 return;
200 }
201 }
202 return;
203 }
204 if (keyword == "if") {
205 ++m_current_depth;
206 if (m_state == State::Normal) {
207 // FIXME: Implement #if logic
208 // We currently always take #if branches.
209 m_depths_of_taken_branches.append(m_current_depth - 1);
210 }
211 return;
212 }
213
214 if (keyword == "elif") {
215 if (m_options.ignore_invalid_statements && m_current_depth == 0)
216 return;
217 VERIFY(m_current_depth > 0);
218 // FIXME: Evaluate the elif expression
219 // We currently always treat the expression in #elif as true.
220 if (m_depths_of_not_taken_branches.contains_slow(m_current_depth - 1) /* && should_take*/) {
221 m_depths_of_not_taken_branches.remove_all_matching([this](auto x) { return x == m_current_depth - 1; });
222 m_state = State::Normal;
223 }
224 if (m_depths_of_taken_branches.contains_slow(m_current_depth - 1)) {
225 m_state = State::SkipElseBranch;
226 }
227 return;
228 }
229 if (keyword == "pragma") {
230 line_lexer.consume_all();
231 return;
232 }
233
234 if (!m_options.ignore_unsupported_keywords) {
235 dbgln("Unsupported preprocessor keyword: {}", keyword);
236 VERIFY_NOT_REACHED();
237 }
238}
239
240size_t Preprocessor::do_substitution(Vector<Token> const& tokens, size_t token_index, Definition const& defined_value)
241{
242 if (defined_value.value.is_null())
243 return token_index;
244
245 Substitution sub;
246 sub.defined_value = defined_value;
247
248 auto macro_call = parse_macro_call(tokens, token_index);
249
250 if (!macro_call.has_value())
251 return token_index;
252
253 Vector<Token> original_tokens;
254 for (size_t i = token_index; i <= macro_call->end_token_index; ++i) {
255 original_tokens.append(tokens[i]);
256 }
257 VERIFY(!original_tokens.is_empty());
258
259 auto processed_value = evaluate_macro_call(*macro_call, defined_value);
260 m_substitutions.append({ original_tokens, defined_value, processed_value });
261
262 Lexer lexer(processed_value);
263 lexer.lex_iterable([&](auto token) {
264 if (token.type() == Token::Type::Whitespace)
265 return;
266 token.set_start(original_tokens.first().start());
267 token.set_end(original_tokens.first().end());
268 m_processed_tokens.append(token);
269 });
270 return macro_call->end_token_index;
271}
272
273Optional<Preprocessor::MacroCall> Preprocessor::parse_macro_call(Vector<Token> const& tokens, size_t token_index)
274{
275 auto name = tokens[token_index];
276 ++token_index;
277
278 if (token_index >= tokens.size() || tokens[token_index].type() != Token::Type::LeftParen)
279 return MacroCall { name, {}, token_index - 1 };
280 ++token_index;
281
282 Vector<MacroCall::Argument> arguments;
283 Optional<MacroCall::Argument> current_argument;
284
285 size_t paren_depth = 1;
286 for (; token_index < tokens.size(); ++token_index) {
287 auto& token = tokens[token_index];
288 if (token.type() == Token::Type::LeftParen)
289 ++paren_depth;
290 if (token.type() == Token::Type::RightParen)
291 --paren_depth;
292
293 if (paren_depth == 0) {
294 if (current_argument.has_value())
295 arguments.append(*current_argument);
296 break;
297 }
298
299 if (paren_depth == 1 && token.type() == Token::Type::Comma) {
300 if (current_argument.has_value())
301 arguments.append(*current_argument);
302 current_argument = {};
303 } else {
304 if (!current_argument.has_value())
305 current_argument = MacroCall::Argument {};
306 current_argument->tokens.append(token);
307 }
308 }
309
310 if (token_index >= tokens.size())
311 return {};
312
313 return MacroCall { name, move(arguments), token_index };
314}
315
316Optional<Preprocessor::Definition> Preprocessor::create_definition(StringView line)
317{
318 Lexer lexer { line };
319 lexer.set_ignore_whitespace(true);
320 auto tokens = lexer.lex();
321 if (tokens.is_empty())
322 return {};
323
324 if (tokens.first().type() != Token::Type::Identifier)
325 return {};
326
327 Definition definition;
328 definition.filename = m_filename;
329 definition.line = m_current_line;
330
331 definition.key = tokens.first().text();
332
333 if (tokens.size() == 1)
334 return definition;
335
336 size_t token_index = 1;
337 // Parse macro parameters (if any)
338 if (tokens[token_index].type() == Token::Type::LeftParen) {
339 ++token_index;
340 while (token_index < tokens.size() && tokens[token_index].type() != Token::Type::RightParen) {
341 auto param = tokens[token_index];
342 if (param.type() != Token::Type::Identifier)
343 return {};
344
345 if (token_index + 1 >= tokens.size())
346 return {};
347
348 ++token_index;
349
350 if (tokens[token_index].type() == Token::Type::Comma)
351 ++token_index;
352 else if (tokens[token_index].type() != Token::Type::RightParen)
353 return {};
354
355 definition.parameters.empend(param.text());
356 }
357 if (token_index >= tokens.size())
358 return {};
359 ++token_index;
360 }
361
362 if (token_index < tokens.size())
363 definition.value = remove_escaped_newlines(line.substring_view(tokens[token_index].start().column));
364
365 return definition;
366}
367
368DeprecatedString Preprocessor::remove_escaped_newlines(StringView value)
369{
370 static constexpr auto escaped_newline = "\\\n"sv;
371 AK::StringBuilder processed_value;
372 GenericLexer lexer { value };
373 while (!lexer.is_eof()) {
374 processed_value.append(lexer.consume_until(escaped_newline));
375 lexer.ignore(escaped_newline.length());
376 }
377 return processed_value.to_deprecated_string();
378}
379
380DeprecatedString Preprocessor::evaluate_macro_call(MacroCall const& macro_call, Definition const& definition)
381{
382 if (macro_call.arguments.size() != definition.parameters.size()) {
383 dbgln("mismatch in # of arguments for macro call: {}", macro_call.name.text());
384 return {};
385 }
386
387 Lexer lexer { definition.value };
388 StringBuilder processed_value;
389 lexer.lex_iterable([&](auto token) {
390 if (token.type() != Token::Type::Identifier) {
391 processed_value.append(token.text());
392 return;
393 }
394
395 auto param_index = definition.parameters.find_first_index(token.text());
396 if (!param_index.has_value()) {
397 processed_value.append(token.text());
398 return;
399 }
400
401 auto& argument = macro_call.arguments[*param_index];
402 for (auto& arg_token : argument.tokens) {
403 processed_value.append(arg_token.text());
404 }
405 });
406
407 return processed_value.to_deprecated_string();
408}
409
410};