Serenity Operating System
1/*
2 * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include "Lexer.h"
8#include <AK/CharacterTypes.h>
9#include <AK/Debug.h>
10#include <AK/ScopeLogger.h>
11
12namespace CMake::Cache {
13
14static bool is_identifier_start_character(u32 c)
15{
16 return AK::is_ascii_alpha(c) || c == '_' || c == '-';
17}
18
19static bool is_identifier_character(u32 c)
20{
21 return AK::is_ascii_alphanumeric(c) || c == '_' || c == '-';
22}
23
24Lexer::Lexer(StringView input)
25 : GenericLexer(input)
26{
27}
28
29ErrorOr<Vector<Token>> Lexer::lex(StringView input)
30{
31 Lexer lexer { input };
32 return lexer.lex_file();
33}
34
35ErrorOr<Vector<Token>> Lexer::lex_file()
36{
37 ScopeLogger<CMAKE_DEBUG> logger;
38
39 while (!is_eof()) {
40 skip_whitespace();
41
42 if (is_eof())
43 break;
44
45 if (next_is('#')) {
46 consume_comment();
47 continue;
48 }
49
50 if (next_is("//"sv)) {
51 consume_help_text();
52 continue;
53 }
54
55 if (next_is(is_identifier_start_character)) {
56 consume_variable_definition();
57 continue;
58 }
59
60 consume_garbage();
61 }
62
63 return m_tokens;
64}
65
66void Lexer::skip_whitespace()
67{
68 ScopeLogger<CMAKE_DEBUG> log;
69
70 while (!is_eof()) {
71 if (next_is('\n')) {
72 next_line();
73 continue;
74 }
75 auto consumed = consume_while(AK::is_ascii_space);
76 if (consumed.is_empty())
77 break;
78 }
79}
80
81void Lexer::consume_comment()
82{
83 ScopeLogger<CMAKE_DEBUG> log;
84
85 auto start = position();
86 VERIFY(consume_specific('#'));
87 auto comment = consume_until('\n');
88 emit_token(Token::Type::Comment, comment, start, position());
89}
90
91void Lexer::consume_help_text()
92{
93 ScopeLogger<CMAKE_DEBUG> log;
94
95 auto start = position();
96 VERIFY(consume_specific("//"sv));
97 auto help_text = consume_until('\n');
98 emit_token(Token::Type::HelpText, help_text, start, position());
99}
100
101void Lexer::consume_variable_definition()
102{
103 ScopeLogger<CMAKE_DEBUG> log;
104
105 consume_key();
106
107 if (!next_is(':')) {
108 consume_garbage();
109 return;
110 }
111 consume_colon();
112
113 if (!next_is(is_identifier_start_character)) {
114 consume_garbage();
115 return;
116 }
117 consume_type();
118
119 if (!next_is('=')) {
120 consume_garbage();
121 return;
122 }
123 consume_equals();
124
125 consume_value();
126}
127
128void Lexer::consume_key()
129{
130 ScopeLogger<CMAKE_DEBUG> log;
131
132 auto start = position();
133 auto key = consume_while(is_identifier_character);
134 emit_token(Token::Type::Key, key, start, position());
135}
136
137void Lexer::consume_colon()
138{
139 ScopeLogger<CMAKE_DEBUG> log;
140
141 auto start = position();
142 VERIFY(consume_specific(':'));
143 emit_token(Token::Type::Colon, ":"sv, start, position());
144}
145
146void Lexer::consume_type()
147{
148 ScopeLogger<CMAKE_DEBUG> log;
149
150 auto start = position();
151 auto type = consume_while(is_identifier_character);
152 emit_token(Token::Type::Type, type, start, position());
153}
154
155void Lexer::consume_equals()
156{
157 ScopeLogger<CMAKE_DEBUG> log;
158
159 auto start = position();
160 VERIFY(consume_specific('='));
161 emit_token(Token::Type::Colon, "="sv, start, position());
162}
163
164void Lexer::consume_value()
165{
166 ScopeLogger<CMAKE_DEBUG> log;
167
168 auto start = position();
169 auto value = consume_until('\n');
170 emit_token(Token::Type::Value, value, start, position());
171}
172
173void Lexer::consume_garbage()
174{
175 ScopeLogger<CMAKE_DEBUG> log;
176
177 auto start = position();
178 auto garbage = consume_until('\n');
179 emit_token(Token::Type::Garbage, garbage, start, position());
180}
181
182Position Lexer::position() const
183{
184 return Position {
185 .line = m_line,
186 .column = tell() - m_string_offset_after_previous_newline,
187 };
188}
189
190void Lexer::next_line()
191{
192 VERIFY(consume_specific('\n'));
193 m_string_offset_after_previous_newline = tell();
194 m_line++;
195}
196
197void Lexer::emit_token(Token::Type type, StringView value, Position start, Position end)
198{
199 dbgln_if(CMAKE_DEBUG, "Emitting {} token: `{}` ({}:{} to {}:{})", to_string(type), value, start.line, start.column, end.line, end.column);
200 m_tokens.empend(type, value, start, end);
201}
202
203}