Serenity Operating System
at master 203 lines 4.2 kB view raw
1/* 2 * Copyright (c) 2023, Sam Atkins <atkinssj@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include "Lexer.h" 8#include <AK/CharacterTypes.h> 9#include <AK/Debug.h> 10#include <AK/ScopeLogger.h> 11 12namespace CMake::Cache { 13 14static bool is_identifier_start_character(u32 c) 15{ 16 return AK::is_ascii_alpha(c) || c == '_' || c == '-'; 17} 18 19static bool is_identifier_character(u32 c) 20{ 21 return AK::is_ascii_alphanumeric(c) || c == '_' || c == '-'; 22} 23 24Lexer::Lexer(StringView input) 25 : GenericLexer(input) 26{ 27} 28 29ErrorOr<Vector<Token>> Lexer::lex(StringView input) 30{ 31 Lexer lexer { input }; 32 return lexer.lex_file(); 33} 34 35ErrorOr<Vector<Token>> Lexer::lex_file() 36{ 37 ScopeLogger<CMAKE_DEBUG> logger; 38 39 while (!is_eof()) { 40 skip_whitespace(); 41 42 if (is_eof()) 43 break; 44 45 if (next_is('#')) { 46 consume_comment(); 47 continue; 48 } 49 50 if (next_is("//"sv)) { 51 consume_help_text(); 52 continue; 53 } 54 55 if (next_is(is_identifier_start_character)) { 56 consume_variable_definition(); 57 continue; 58 } 59 60 consume_garbage(); 61 } 62 63 return m_tokens; 64} 65 66void Lexer::skip_whitespace() 67{ 68 ScopeLogger<CMAKE_DEBUG> log; 69 70 while (!is_eof()) { 71 if (next_is('\n')) { 72 next_line(); 73 continue; 74 } 75 auto consumed = consume_while(AK::is_ascii_space); 76 if (consumed.is_empty()) 77 break; 78 } 79} 80 81void Lexer::consume_comment() 82{ 83 ScopeLogger<CMAKE_DEBUG> log; 84 85 auto start = position(); 86 VERIFY(consume_specific('#')); 87 auto comment = consume_until('\n'); 88 emit_token(Token::Type::Comment, comment, start, position()); 89} 90 91void Lexer::consume_help_text() 92{ 93 ScopeLogger<CMAKE_DEBUG> log; 94 95 auto start = position(); 96 VERIFY(consume_specific("//"sv)); 97 auto help_text = consume_until('\n'); 98 emit_token(Token::Type::HelpText, help_text, start, position()); 99} 100 101void Lexer::consume_variable_definition() 102{ 103 ScopeLogger<CMAKE_DEBUG> log; 104 105 consume_key(); 106 107 if (!next_is(':')) { 108 consume_garbage(); 109 return; 110 } 111 consume_colon(); 112 113 if (!next_is(is_identifier_start_character)) { 114 consume_garbage(); 115 return; 116 } 117 consume_type(); 118 119 if (!next_is('=')) { 120 consume_garbage(); 121 return; 122 } 123 consume_equals(); 124 125 consume_value(); 126} 127 128void Lexer::consume_key() 129{ 130 ScopeLogger<CMAKE_DEBUG> log; 131 132 auto start = position(); 133 auto key = consume_while(is_identifier_character); 134 emit_token(Token::Type::Key, key, start, position()); 135} 136 137void Lexer::consume_colon() 138{ 139 ScopeLogger<CMAKE_DEBUG> log; 140 141 auto start = position(); 142 VERIFY(consume_specific(':')); 143 emit_token(Token::Type::Colon, ":"sv, start, position()); 144} 145 146void Lexer::consume_type() 147{ 148 ScopeLogger<CMAKE_DEBUG> log; 149 150 auto start = position(); 151 auto type = consume_while(is_identifier_character); 152 emit_token(Token::Type::Type, type, start, position()); 153} 154 155void Lexer::consume_equals() 156{ 157 ScopeLogger<CMAKE_DEBUG> log; 158 159 auto start = position(); 160 VERIFY(consume_specific('=')); 161 emit_token(Token::Type::Colon, "="sv, start, position()); 162} 163 164void Lexer::consume_value() 165{ 166 ScopeLogger<CMAKE_DEBUG> log; 167 168 auto start = position(); 169 auto value = consume_until('\n'); 170 emit_token(Token::Type::Value, value, start, position()); 171} 172 173void Lexer::consume_garbage() 174{ 175 ScopeLogger<CMAKE_DEBUG> log; 176 177 auto start = position(); 178 auto garbage = consume_until('\n'); 179 emit_token(Token::Type::Garbage, garbage, start, position()); 180} 181 182Position Lexer::position() const 183{ 184 return Position { 185 .line = m_line, 186 .column = tell() - m_string_offset_after_previous_newline, 187 }; 188} 189 190void Lexer::next_line() 191{ 192 VERIFY(consume_specific('\n')); 193 m_string_offset_after_previous_newline = tell(); 194 m_line++; 195} 196 197void Lexer::emit_token(Token::Type type, StringView value, Position start, Position end) 198{ 199 dbgln_if(CMAKE_DEBUG, "Emitting {} token: `{}` ({}:{} to {}:{})", to_string(type), value, start.line, start.column, end.line, end.column); 200 m_tokens.empend(type, value, start, end); 201} 202 203}