Serenity Operating System
1/*
2 * Copyright (c) 2022-2023, Tim Flynn <trflynn89@serenityos.org>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include "../LibUnicode/GeneratorUtil.h" // FIXME: Move this somewhere common.
8#include <AK/DeprecatedString.h>
9#include <AK/Format.h>
10#include <AK/HashMap.h>
11#include <AK/JsonObject.h>
12#include <AK/JsonParser.h>
13#include <AK/JsonValue.h>
14#include <AK/LexicalPath.h>
15#include <AK/SourceGenerator.h>
16#include <AK/StringBuilder.h>
17#include <LibCore/ArgsParser.h>
18#include <LibCore/DirIterator.h>
19#include <LibLocale/Locale.h>
20#include <LibLocale/RelativeTimeFormat.h>
21
22struct RelativeTimeFormat {
23 unsigned hash() const
24 {
25 auto hash = time_unit.hash();
26 hash = pair_int_hash(hash, style.hash());
27 hash = pair_int_hash(hash, plurality.hash());
28 hash = pair_int_hash(hash, tense_or_number);
29 hash = pair_int_hash(hash, pattern);
30 return hash;
31 }
32
33 bool operator==(RelativeTimeFormat const& other) const
34 {
35 return (time_unit == other.time_unit)
36 && (plurality == other.plurality)
37 && (style == other.style)
38 && (tense_or_number == other.tense_or_number)
39 && (pattern == other.pattern);
40 }
41
42 DeprecatedString time_unit;
43 DeprecatedString style;
44 DeprecatedString plurality;
45 size_t tense_or_number { 0 };
46 size_t pattern { 0 };
47};
48
49template<>
50struct AK::Formatter<RelativeTimeFormat> : Formatter<FormatString> {
51 ErrorOr<void> format(FormatBuilder& builder, RelativeTimeFormat const& format)
52 {
53 return Formatter<FormatString>::format(builder,
54 "{{ TimeUnit::{}, Style::{}, PluralCategory::{}, {}, {} }}"sv,
55 format.time_unit,
56 format.style,
57 format.plurality,
58 format.tense_or_number,
59 format.pattern);
60 }
61};
62
63template<>
64struct AK::Traits<RelativeTimeFormat> : public GenericTraits<RelativeTimeFormat> {
65 static unsigned hash(RelativeTimeFormat const& format) { return format.hash(); }
66};
67
68struct LocaleData {
69 Vector<size_t> time_units;
70};
71
72struct CLDR {
73 UniqueStringStorage unique_strings;
74 UniqueStorage<RelativeTimeFormat> unique_formats;
75
76 HashMap<DeprecatedString, LocaleData> locales;
77};
78
79static ErrorOr<void> parse_date_fields(DeprecatedString locale_dates_path, CLDR& cldr, LocaleData& locale)
80{
81 LexicalPath date_fields_path(move(locale_dates_path));
82 date_fields_path = date_fields_path.append("dateFields.json"sv);
83
84 auto date_fields = TRY(read_json_file(date_fields_path.string()));
85 auto const& main_object = date_fields.as_object().get_object("main"sv).value();
86 auto const& locale_object = main_object.get_object(date_fields_path.parent().basename()).value();
87 auto const& dates_object = locale_object.get_object("dates"sv).value();
88 auto const& fields_object = dates_object.get_object("fields"sv).value();
89
90 auto is_sanctioned_unit = [](auto unit) {
91 // This is a copy of the time units sanctioned for use within ECMA-402.
92 // https://tc39.es/ecma402/#sec-singularrelativetimeunit
93 return unit.is_one_of("second"sv, "minute"sv, "hour"sv, "day"sv, "week"sv, "month"sv, "quarter"sv, "year"sv);
94 };
95
96 auto parse_pattern = [&](auto unit, auto style, auto plurality, auto tense_or_number, auto const& pattern) {
97 RelativeTimeFormat format {};
98 format.time_unit = unit.to_titlecase_string();
99 format.style = style.to_titlecase_string();
100 format.plurality = plurality.to_titlecase_string();
101 format.tense_or_number = cldr.unique_strings.ensure(tense_or_number);
102 format.pattern = cldr.unique_strings.ensure(pattern.as_string());
103
104 locale.time_units.append(cldr.unique_formats.ensure(move(format)));
105 };
106
107 fields_object.for_each_member([&](auto const& unit_and_style, auto const& patterns) {
108 auto segments = unit_and_style.split_view('-');
109 auto unit = segments[0];
110 auto style = (segments.size() > 1) ? segments[1] : "long"sv;
111
112 if (!is_sanctioned_unit(unit))
113 return;
114
115 patterns.as_object().for_each_member([&](auto const& type, auto const& pattern_value) {
116 constexpr auto number_key = "relative-type-"sv;
117 constexpr auto tense_key = "relativeTime-type-"sv;
118 constexpr auto plurality_key = "relativeTimePattern-count-"sv;
119
120 if (type.starts_with(number_key)) {
121 auto number = type.substring_view(number_key.length());
122 parse_pattern(unit, style, "Other"sv, number, pattern_value);
123 } else if (type.starts_with(tense_key)) {
124 pattern_value.as_object().for_each_member([&](auto const& key, auto const& pattern) {
125 VERIFY(key.starts_with(plurality_key));
126 auto plurality = key.substring_view(plurality_key.length());
127 auto tense = type.substring_view(tense_key.length());
128
129 parse_pattern(unit, style, plurality, tense, pattern);
130 });
131 }
132 });
133 });
134
135 return {};
136}
137
138static ErrorOr<void> parse_all_locales(DeprecatedString dates_path, CLDR& cldr)
139{
140 auto dates_iterator = TRY(path_to_dir_iterator(move(dates_path)));
141
142 auto remove_variants_from_path = [&](DeprecatedString path) -> ErrorOr<DeprecatedString> {
143 auto parsed_locale = TRY(CanonicalLanguageID::parse(cldr.unique_strings, LexicalPath::basename(path)));
144
145 StringBuilder builder;
146 builder.append(cldr.unique_strings.get(parsed_locale.language));
147 if (auto script = cldr.unique_strings.get(parsed_locale.script); !script.is_empty())
148 builder.appendff("-{}", script);
149 if (auto region = cldr.unique_strings.get(parsed_locale.region); !region.is_empty())
150 builder.appendff("-{}", region);
151
152 return builder.to_deprecated_string();
153 };
154
155 while (dates_iterator.has_next()) {
156 auto dates_path = TRY(next_path_from_dir_iterator(dates_iterator));
157 auto language = TRY(remove_variants_from_path(dates_path));
158
159 auto& locale = cldr.locales.ensure(language);
160 TRY(parse_date_fields(move(dates_path), cldr, locale));
161 }
162
163 return {};
164}
165
166static ErrorOr<void> generate_unicode_locale_header(Core::BufferedFile& file, CLDR&)
167{
168 StringBuilder builder;
169 SourceGenerator generator { builder };
170
171 generator.append(R"~~~(
172#pragma once
173
174#include <LibLocale/Forward.h>
175
176namespace Locale {
177)~~~");
178
179 generator.append(R"~~~(
180}
181)~~~");
182
183 TRY(file.write_until_depleted(generator.as_string_view().bytes()));
184 return {};
185}
186
187static ErrorOr<void> generate_unicode_locale_implementation(Core::BufferedFile& file, CLDR& cldr)
188{
189 StringBuilder builder;
190 SourceGenerator generator { builder };
191 generator.set("string_index_type"sv, cldr.unique_strings.type_that_fits());
192 generator.set("relative_time_format_index_type"sv, cldr.unique_formats.type_that_fits());
193
194 generator.append(R"~~~(
195#include <AK/Array.h>
196#include <AK/StringView.h>
197#include <AK/Vector.h>
198#include <LibLocale/Locale.h>
199#include <LibLocale/PluralRules.h>
200#include <LibLocale/RelativeTimeFormat.h>
201#include <LibLocale/RelativeTimeFormatData.h>
202
203namespace Locale {
204)~~~");
205
206 cldr.unique_strings.generate(generator);
207
208 generator.append(R"~~~(
209struct RelativeTimeFormatImpl {
210 RelativeTimeFormat to_relative_time_format() const
211 {
212 RelativeTimeFormat relative_time_format {};
213 relative_time_format.plurality = plurality;
214 relative_time_format.pattern = decode_string(pattern);
215
216 return relative_time_format;
217 }
218
219 TimeUnit time_unit;
220 Style style;
221 PluralCategory plurality;
222 @string_index_type@ tense_or_number { 0 };
223 @string_index_type@ pattern { 0 };
224};
225)~~~");
226
227 cldr.unique_formats.generate(generator, "RelativeTimeFormatImpl"sv, "s_relative_time_formats"sv, 10);
228
229 auto append_list = [&](DeprecatedString name, auto const& list) {
230 generator.set("name", name);
231 generator.set("size", DeprecatedString::number(list.size()));
232
233 generator.append(R"~~~(
234static constexpr Array<@relative_time_format_index_type@, @size@> @name@ { {)~~~");
235
236 bool first = true;
237 for (auto index : list) {
238 generator.append(first ? " "sv : ", "sv);
239 generator.append(DeprecatedString::number(index));
240 first = false;
241 }
242
243 generator.append(" } };");
244 };
245
246 generate_mapping(generator, cldr.locales, cldr.unique_formats.type_that_fits(), "s_locale_relative_time_formats"sv, "s_number_systems_digits_{}"sv, nullptr, [&](auto const& name, auto const& value) { append_list(name, value.time_units); });
247
248 generator.append(R"~~~(
249ErrorOr<Vector<RelativeTimeFormat>> get_relative_time_format_patterns(StringView locale, TimeUnit time_unit, StringView tense_or_number, Style style)
250{
251 Vector<RelativeTimeFormat> formats;
252
253 auto locale_value = locale_from_string(locale);
254 if (!locale_value.has_value())
255 return formats;
256
257 auto locale_index = to_underlying(*locale_value) - 1; // Subtract 1 because 0 == Locale::None.
258 auto const& locale_formats = s_locale_relative_time_formats.at(locale_index);
259
260 for (auto const& locale_format_index : locale_formats) {
261 auto const& locale_format = s_relative_time_formats.at(locale_format_index);
262
263 if (locale_format.time_unit != time_unit)
264 continue;
265 if (locale_format.style != style)
266 continue;
267 if (decode_string(locale_format.tense_or_number) != tense_or_number)
268 continue;
269
270 TRY(formats.try_append(locale_format.to_relative_time_format()));
271 }
272
273 return formats;
274}
275
276}
277)~~~");
278
279 TRY(file.write_until_depleted(generator.as_string_view().bytes()));
280 return {};
281}
282
283ErrorOr<int> serenity_main(Main::Arguments arguments)
284{
285 StringView generated_header_path;
286 StringView generated_implementation_path;
287 StringView dates_path;
288
289 Core::ArgsParser args_parser;
290 args_parser.add_option(generated_header_path, "Path to the Unicode locale header file to generate", "generated-header-path", 'h', "generated-header-path");
291 args_parser.add_option(generated_implementation_path, "Path to the Unicode locale implementation file to generate", "generated-implementation-path", 'c', "generated-implementation-path");
292 args_parser.add_option(dates_path, "Path to cldr-dates directory", "dates-path", 'd', "dates-path");
293 args_parser.parse(arguments);
294
295 auto generated_header_file = TRY(open_file(generated_header_path, Core::File::OpenMode::Write));
296 auto generated_implementation_file = TRY(open_file(generated_implementation_path, Core::File::OpenMode::Write));
297
298 CLDR cldr;
299 TRY(parse_all_locales(dates_path, cldr));
300
301 TRY(generate_unicode_locale_header(*generated_header_file, cldr));
302 TRY(generate_unicode_locale_implementation(*generated_implementation_file, cldr));
303
304 return 0;
305}