Serenity Operating System
at master 305 lines 11 kB view raw
1/* 2 * Copyright (c) 2022-2023, Tim Flynn <trflynn89@serenityos.org> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include "../LibUnicode/GeneratorUtil.h" // FIXME: Move this somewhere common. 8#include <AK/DeprecatedString.h> 9#include <AK/Format.h> 10#include <AK/HashMap.h> 11#include <AK/JsonObject.h> 12#include <AK/JsonParser.h> 13#include <AK/JsonValue.h> 14#include <AK/LexicalPath.h> 15#include <AK/SourceGenerator.h> 16#include <AK/StringBuilder.h> 17#include <LibCore/ArgsParser.h> 18#include <LibCore/DirIterator.h> 19#include <LibLocale/Locale.h> 20#include <LibLocale/RelativeTimeFormat.h> 21 22struct RelativeTimeFormat { 23 unsigned hash() const 24 { 25 auto hash = time_unit.hash(); 26 hash = pair_int_hash(hash, style.hash()); 27 hash = pair_int_hash(hash, plurality.hash()); 28 hash = pair_int_hash(hash, tense_or_number); 29 hash = pair_int_hash(hash, pattern); 30 return hash; 31 } 32 33 bool operator==(RelativeTimeFormat const& other) const 34 { 35 return (time_unit == other.time_unit) 36 && (plurality == other.plurality) 37 && (style == other.style) 38 && (tense_or_number == other.tense_or_number) 39 && (pattern == other.pattern); 40 } 41 42 DeprecatedString time_unit; 43 DeprecatedString style; 44 DeprecatedString plurality; 45 size_t tense_or_number { 0 }; 46 size_t pattern { 0 }; 47}; 48 49template<> 50struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> { 51 ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format) 52 { 53 return Formatter<FormatString>::format(builder, 54 "{{ TimeUnit::{}, Style::{}, PluralCategory::{}, {}, {} }}"sv, 55 format.time_unit, 56 format.style, 57 format.plurality, 58 format.tense_or_number, 59 format.pattern); 60 } 61}; 62 63template<> 64struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> { 65 static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); } 66}; 67 68struct LocaleData { 69 Vector<size_t> time_units; 70}; 71 72struct CLDR { 73 UniqueStringStorage unique_strings; 74 UniqueStorage<RelativeTimeFormat> unique_formats; 75 76 HashMap<DeprecatedString, LocaleData> locales; 77}; 78 79static ErrorOr<void> parse_date_fields(DeprecatedString locale_dates_path, CLDR& cldr, LocaleData& locale) 80{ 81 LexicalPath date_fields_path(move(locale_dates_path)); 82 date_fields_path = date_fields_path.append("dateFields.json"sv); 83 84 auto date_fields = TRY(read_json_file(date_fields_path.string())); 85 auto const& main_object = date_fields.as_object().get_object("main"sv).value(); 86 auto const& locale_object = main_object.get_object(date_fields_path.parent().basename()).value(); 87 auto const& dates_object = locale_object.get_object("dates"sv).value(); 88 auto const& fields_object = dates_object.get_object("fields"sv).value(); 89 90 auto is_sanctioned_unit = [](auto unit) { 91 // This is a copy of the time units sanctioned for use within ECMA-402. 92 // https://tc39.es/ecma402/#sec-singularrelativetimeunit 93 return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv); 94 }; 95 96 auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) { 97 RelativeTimeFormat format {}; 98 format.time_unit = unit.to_titlecase_string(); 99 format.style = style.to_titlecase_string(); 100 format.plurality = plurality.to_titlecase_string(); 101 format.tense_or_number = cldr.unique_strings.ensure(tense_or_number); 102 format.pattern = cldr.unique_strings.ensure(pattern.as_string()); 103 104 locale.time_units.append(cldr.unique_formats.ensure(move(format))); 105 }; 106 107 fields_object.for_each_member([&](auto const& unit_and_style, auto const& patterns) { 108 auto segments = unit_and_style.split_view('-'); 109 auto unit = segments[0]; 110 auto style = (segments.size() > 1) ? segments[1] : "long"sv; 111 112 if (!is_sanctioned_unit(unit)) 113 return; 114 115 patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) { 116 constexpr auto number_key = "relative-type-"sv; 117 constexpr auto tense_key = "relativeTime-type-"sv; 118 constexpr auto plurality_key = "relativeTimePattern-count-"sv; 119 120 if (type.starts_with(number_key)) { 121 auto number = type.substring_view(number_key.length()); 122 parse_pattern(unit, style, "Other"sv, number, pattern_value); 123 } else if (type.starts_with(tense_key)) { 124 pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) { 125 VERIFY(key.starts_with(plurality_key)); 126 auto plurality = key.substring_view(plurality_key.length()); 127 auto tense = type.substring_view(tense_key.length()); 128 129 parse_pattern(unit, style, plurality, tense, pattern); 130 }); 131 } 132 }); 133 }); 134 135 return {}; 136} 137 138static ErrorOr<void> parse_all_locales(DeprecatedString dates_path, CLDR& cldr) 139{ 140 auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path))); 141 142 auto remove_variants_from_path = [&](DeprecatedString path) -> ErrorOr<DeprecatedString> { 143 auto parsed_locale = TRY(CanonicalLanguageID::parse(cldr.unique_strings, LexicalPath::basename(path))); 144 145 StringBuilder builder; 146 builder.append(cldr.unique_strings.get(parsed_locale.language)); 147 if (auto script = cldr.unique_strings.get(parsed_locale.script); !script.is_empty()) 148 builder.appendff("-{}", script); 149 if (auto region = cldr.unique_strings.get(parsed_locale.region); !region.is_empty()) 150 builder.appendff("-{}", region); 151 152 return builder.to_deprecated_string(); 153 }; 154 155 while (dates_iterator.has_next()) { 156 auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator)); 157 auto language = TRY(remove_variants_from_path(dates_path)); 158 159 auto& locale = cldr.locales.ensure(language); 160 TRY(parse_date_fields(move(dates_path), cldr, locale)); 161 } 162 163 return {}; 164} 165 166static ErrorOr<void> generate_unicode_locale_header(Core::BufferedFile& file, CLDR&) 167{ 168 StringBuilder builder; 169 SourceGenerator generator { builder }; 170 171 generator.append(R"~~~( 172#pragma once 173 174#include <LibLocale/Forward.h> 175 176namespace Locale { 177)~~~"); 178 179 generator.append(R"~~~( 180} 181)~~~"); 182 183 TRY(file.write_until_depleted(generator.as_string_view().bytes())); 184 return {}; 185} 186 187static ErrorOr<void> generate_unicode_locale_implementation(Core::BufferedFile& file, CLDR& cldr) 188{ 189 StringBuilder builder; 190 SourceGenerator generator { builder }; 191 generator.set("string_index_type"sv, cldr.unique_strings.type_that_fits()); 192 generator.set("relative_time_format_index_type"sv, cldr.unique_formats.type_that_fits()); 193 194 generator.append(R"~~~( 195#include <AK/Array.h> 196#include <AK/StringView.h> 197#include <AK/Vector.h> 198#include <LibLocale/Locale.h> 199#include <LibLocale/PluralRules.h> 200#include <LibLocale/RelativeTimeFormat.h> 201#include <LibLocale/RelativeTimeFormatData.h> 202 203namespace Locale { 204)~~~"); 205 206 cldr.unique_strings.generate(generator); 207 208 generator.append(R"~~~( 209struct RelativeTimeFormatImpl { 210 RelativeTimeFormat to_relative_time_format() const 211 { 212 RelativeTimeFormat relative_time_format {}; 213 relative_time_format.plurality = plurality; 214 relative_time_format.pattern = decode_string(pattern); 215 216 return relative_time_format; 217 } 218 219 TimeUnit time_unit; 220 Style style; 221 PluralCategory plurality; 222 @string_index_type@ tense_or_number { 0 }; 223 @string_index_type@ pattern { 0 }; 224}; 225)~~~"); 226 227 cldr.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10); 228 229 auto append_list = [&](DeprecatedString name, auto const& list) { 230 generator.set("name", name); 231 generator.set("size", DeprecatedString::number(list.size())); 232 233 generator.append(R"~~~( 234static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~"); 235 236 bool first = true; 237 for (auto index : list) { 238 generator.append(first ? " "sv : ", "sv); 239 generator.append(DeprecatedString::number(index)); 240 first = false; 241 } 242 243 generator.append(" } };"); 244 }; 245 246 generate_mapping(generator, cldr.locales, cldr.unique_formats.type_that_fits(), "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}"sv, nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); }); 247 248 generator.append(R"~~~( 249ErrorOr<Vector<RelativeTimeFormat>> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style) 250{ 251 Vector<RelativeTimeFormat> formats; 252 253 auto locale_value = locale_from_string(locale); 254 if (!locale_value.has_value()) 255 return formats; 256 257 auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None. 258 auto const& locale_formats = s_locale_relative_time_formats.at(locale_index); 259 260 for (auto const& locale_format_index : locale_formats) { 261 auto const& locale_format = s_relative_time_formats.at(locale_format_index); 262 263 if (locale_format.time_unit != time_unit) 264 continue; 265 if (locale_format.style != style) 266 continue; 267 if (decode_string(locale_format.tense_or_number) != tense_or_number) 268 continue; 269 270 TRY(formats.try_append(locale_format.to_relative_time_format())); 271 } 272 273 return formats; 274} 275 276} 277)~~~"); 278 279 TRY(file.write_until_depleted(generator.as_string_view().bytes())); 280 return {}; 281} 282 283ErrorOr<int> serenity_main(Main::Arguments arguments) 284{ 285 StringView generated_header_path; 286 StringView generated_implementation_path; 287 StringView dates_path; 288 289 Core::ArgsParser args_parser; 290 args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path"); 291 args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path"); 292 args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path"); 293 args_parser.parse(arguments); 294 295 auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write)); 296 auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write)); 297 298 CLDR cldr; 299 TRY(parse_all_locales(dates_path, cldr)); 300 301 TRY(generate_unicode_locale_header(*generated_header_file, cldr)); 302 TRY(generate_unicode_locale_implementation(*generated_implementation_file, cldr)); 303 304 return 0; 305}