Serenity Operating System
1/*
2 * Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/Types.h>
8#include <LibCompress/BrotliDictionary.h>
9
10// Include the 119.9 KiB of dictionary data from a binary file
11extern u8 const brotli_dictionary_data[];
12#if defined(AK_OS_MACOS)
13asm(".const_data\n"
14 ".globl _brotli_dictionary_data\n"
15 "_brotli_dictionary_data:\n");
16#elif defined(AK_OS_EMSCRIPTEN)
17asm(".section .data, \"\",@\n"
18 ".global brotli_dictionary_data\n"
19 "brotli_dictionary_data:\n");
20#else
21asm(".section .rodata\n"
22 ".global brotli_dictionary_data\n"
23 "brotli_dictionary_data:\n");
24#endif
25asm(".incbin \"LibCompress/BrotliDictionaryData.bin\"\n"
26#if (!defined(AK_OS_WINDOWS) && !defined(AK_OS_EMSCRIPTEN))
27 ".previous\n");
28#else
29);
30#endif
31
32namespace Compress {
33
34static size_t const bits_by_length[25] {
35 0, 0, 0, 0, 10, 10, 11, 11, 10, 10, 10, 10, 10, 9, 9, 8, 7, 7, 8, 7, 7, 6, 6, 5, 5
36};
37
38static size_t const offset_by_length[25] {
39 0, 0, 0, 0, 0, 4096, 9216, 21504, 35840, 44032, 53248, 63488, 74752, 87040, 93696, 100864,
40 104704, 106752, 108928, 113536, 115968, 118528, 119872, 121280, 122016
41};
42
43static int ferment(Bytes word, size_t pos)
44{
45 if (word[pos] < 192) {
46 if (word[pos] >= 97 && word[pos] <= 122) {
47 word[pos] = word[pos] ^ 32;
48 }
49 return 1;
50 } else if (word[pos] < 224) {
51 if (pos + 1 < word.size()) {
52 word[pos + 1] = word[pos + 1] ^ 32;
53 }
54 return 2;
55 } else {
56 if (pos + 2 < word.size()) {
57 word[pos + 2] = word[pos + 2] ^ 5;
58 }
59 return 3;
60 }
61}
62
63static void ferment_first(Bytes word)
64{
65 if (word.size() > 0) {
66 ferment(word, 0);
67 }
68}
69
70[[maybe_unused]] static void ferment_all(Bytes word)
71{
72 size_t i = 0;
73 while (i < word.size()) {
74 i += ferment(word, i);
75 }
76}
77
78using BrotliDictionary::TransformationOperation::FermentAll;
79using BrotliDictionary::TransformationOperation::FermentFirst;
80using BrotliDictionary::TransformationOperation::Identity;
81using BrotliDictionary::TransformationOperation::OmitFirst;
82using BrotliDictionary::TransformationOperation::OmitLast;
83constexpr static BrotliDictionary::Transformation transformations[121] {
84 // ID Prefix Transform Suffix
85 // -- ------ --------- ------
86 { ""sv, Identity, 0, ""sv }, // 0 "" Identity ""
87 { ""sv, Identity, 0, " "sv }, // 1 "" Identity " "
88 { " "sv, Identity, 0, " "sv }, // 2 " " Identity " "
89 { ""sv, OmitFirst, 1, ""sv }, // 3 "" OmitFirst1 ""
90 { ""sv, FermentFirst, 0, " "sv }, // 4 "" FermentFirst " "
91 { ""sv, Identity, 0, " the "sv }, // 5 "" Identity " the "
92 { " "sv, Identity, 0, ""sv }, // 6 " " Identity ""
93 { "s "sv, Identity, 0, " "sv }, // 7 "s " Identity " "
94 { ""sv, Identity, 0, " of "sv }, // 8 "" Identity " of "
95 { ""sv, FermentFirst, 0, ""sv }, // 9 "" FermentFirst ""
96 { ""sv, Identity, 0, " and "sv }, // 10 "" Identity " and "
97 { ""sv, OmitFirst, 2, ""sv }, // 11 "" OmitFirst2 ""
98 { ""sv, OmitLast, 1, ""sv }, // 12 "" OmitLast1 ""
99 { ", "sv, Identity, 0, " "sv }, // 13 ", " Identity " "
100 { ""sv, Identity, 0, ", "sv }, // 14 "" Identity ", "
101 { " "sv, FermentFirst, 0, " "sv }, // 15 " " FermentFirst " "
102 { ""sv, Identity, 0, " in "sv }, // 16 "" Identity " in "
103 { ""sv, Identity, 0, " to "sv }, // 17 "" Identity " to "
104 { "e "sv, Identity, 0, " "sv }, // 18 "e " Identity " "
105 { ""sv, Identity, 0, "\""sv }, // 19 "" Identity "\""
106 { ""sv, Identity, 0, "."sv }, // 20 "" Identity "."
107 { ""sv, Identity, 0, "\">"sv }, // 21 "" Identity "\">"
108 { ""sv, Identity, 0, "\n"sv }, // 22 "" Identity "\n"
109 { ""sv, OmitLast, 3, ""sv }, // 23 "" OmitLast3 ""
110 { ""sv, Identity, 0, "]"sv }, // 24 "" Identity "]"
111 { ""sv, Identity, 0, " for "sv }, // 25 "" Identity " for "
112 { ""sv, OmitFirst, 3, ""sv }, // 26 "" OmitFirst3 ""
113 { ""sv, OmitLast, 2, ""sv }, // 27 "" OmitLast2 ""
114 { ""sv, Identity, 0, " a "sv }, // 28 "" Identity " a "
115 { ""sv, Identity, 0, " that "sv }, // 29 "" Identity " that "
116 { " "sv, FermentFirst, 0, ""sv }, // 30 " " FermentFirst ""
117 { ""sv, Identity, 0, ". "sv }, // 31 "" Identity ". "
118 { "."sv, Identity, 0, ""sv }, // 32 "." Identity ""
119 { " "sv, Identity, 0, ", "sv }, // 33 " " Identity ", "
120 { ""sv, OmitFirst, 4, ""sv }, // 34 "" OmitFirst4 ""
121 { ""sv, Identity, 0, " with "sv }, // 35 "" Identity " with "
122 { ""sv, Identity, 0, "'"sv }, // 36 "" Identity "'"
123 { ""sv, Identity, 0, " from "sv }, // 37 "" Identity " from "
124 { ""sv, Identity, 0, " by "sv }, // 38 "" Identity " by "
125 { ""sv, OmitFirst, 5, ""sv }, // 39 "" OmitFirst5 ""
126 { ""sv, OmitFirst, 6, ""sv }, // 40 "" OmitFirst6 ""
127 { " the "sv, Identity, 0, ""sv }, // 41 " the " Identity ""
128 { ""sv, OmitLast, 4, ""sv }, // 42 "" OmitLast4 ""
129 { ""sv, Identity, 0, ". The "sv }, // 43 "" Identity ". The "
130 { ""sv, FermentAll, 0, ""sv }, // 44 "" FermentAll ""
131 { ""sv, Identity, 0, " on "sv }, // 45 "" Identity " on "
132 { ""sv, Identity, 0, " as "sv }, // 46 "" Identity " as "
133 { ""sv, Identity, 0, " is "sv }, // 47 "" Identity " is "
134 { ""sv, OmitLast, 7, ""sv }, // 48 "" OmitLast7 ""
135 { ""sv, OmitLast, 1, "ing "sv }, // 49 "" OmitLast1 "ing "
136 { ""sv, Identity, 0, "\n\t"sv }, // 50 "" Identity "\n\t"
137 { ""sv, Identity, 0, ":"sv }, // 51 "" Identity ":"
138 { " "sv, Identity, 0, ". "sv }, // 52 " " Identity ". "
139 { ""sv, Identity, 0, "ed "sv }, // 53 "" Identity "ed "
140 { ""sv, OmitFirst, 9, ""sv }, // 54 "" OmitFirst9 ""
141 { ""sv, OmitFirst, 7, ""sv }, // 55 "" OmitFirst7 ""
142 { ""sv, OmitLast, 6, ""sv }, // 56 "" OmitLast6 ""
143 { ""sv, Identity, 0, "("sv }, // 57 "" Identity "("
144 { ""sv, FermentFirst, 0, ", "sv }, // 58 "" FermentFirst ", "
145 { ""sv, OmitLast, 8, ""sv }, // 59 "" OmitLast8 ""
146 { ""sv, Identity, 0, " at "sv }, // 60 "" Identity " at "
147 { ""sv, Identity, 0, "ly "sv }, // 61 "" Identity "ly "
148 { " the "sv, Identity, 0, " of "sv }, // 62 " the " Identity " of "
149 { ""sv, OmitLast, 5, ""sv }, // 63 "" OmitLast5 ""
150 { ""sv, OmitLast, 9, ""sv }, // 64 "" OmitLast9 ""
151 { " "sv, FermentFirst, 0, ", "sv }, // 65 " " FermentFirst ", "
152 { ""sv, FermentFirst, 0, "\""sv }, // 66 "" FermentFirst "\""
153 { "."sv, Identity, 0, "("sv }, // 67 "." Identity "("
154 { ""sv, FermentAll, 0, " "sv }, // 68 "" FermentAll " "
155 { ""sv, FermentFirst, 0, "\">"sv }, // 69 "" FermentFirst "\">"
156 { ""sv, Identity, 0, "=\""sv }, // 70 "" Identity "=\""
157 { " "sv, Identity, 0, "."sv }, // 71 " " Identity "."
158 { ".com/"sv, Identity, 0, ""sv }, // 72 ".com/" Identity ""
159 { " the "sv, Identity, 0, " of the "sv }, // 73 " the " Identity " of the "
160 { ""sv, FermentFirst, 0, "'"sv }, // 74 "" FermentFirst "'"
161 { ""sv, Identity, 0, ". This "sv }, // 75 "" Identity ". This "
162 { ""sv, Identity, 0, ","sv }, // 76 "" Identity ","
163 { "."sv, Identity, 0, " "sv }, // 77 "." Identity " "
164 { ""sv, FermentFirst, 0, "("sv }, // 78 "" FermentFirst "("
165 { ""sv, FermentFirst, 0, "."sv }, // 79 "" FermentFirst "."
166 { ""sv, Identity, 0, " not "sv }, // 80 "" Identity " not "
167 { " "sv, Identity, 0, "=\""sv }, // 81 " " Identity "=\""
168 { ""sv, Identity, 0, "er "sv }, // 82 "" Identity "er "
169 { " "sv, FermentAll, 0, " "sv }, // 83 " " FermentAll " "
170 { ""sv, Identity, 0, "al "sv }, // 84 "" Identity "al "
171 { " "sv, FermentAll, 0, ""sv }, // 85 " " FermentAll ""
172 { ""sv, Identity, 0, "='"sv }, // 86 "" Identity "='"
173 { ""sv, FermentAll, 0, "\""sv }, // 87 "" FermentAll "\""
174 { ""sv, FermentFirst, 0, ". "sv }, // 88 "" FermentFirst ". "
175 { " "sv, Identity, 0, "("sv }, // 89 " " Identity "("
176 { ""sv, Identity, 0, "ful "sv }, // 90 "" Identity "ful "
177 { " "sv, FermentFirst, 0, ". "sv }, // 91 " " FermentFirst ". "
178 { ""sv, Identity, 0, "ive "sv }, // 92 "" Identity "ive "
179 { ""sv, Identity, 0, "less "sv }, // 93 "" Identity "less "
180 { ""sv, FermentAll, 0, "'"sv }, // 94 "" FermentAll "'"
181 { ""sv, Identity, 0, "est "sv }, // 95 "" Identity "est "
182 { " "sv, FermentFirst, 0, "."sv }, // 96 " " FermentFirst "."
183 { ""sv, FermentAll, 0, "\">"sv }, // 97 "" FermentAll "\">"
184 { " "sv, Identity, 0, "='"sv }, // 98 " " Identity "='"
185 { ""sv, FermentFirst, 0, ","sv }, // 99 "" FermentFirst ","
186 { ""sv, Identity, 0, "ize "sv }, // 100 "" Identity "ize "
187 { ""sv, FermentAll, 0, "."sv }, // 101 "" FermentAll "."
188 { "\xc2\xa0"sv, Identity, 0, ""sv }, // 102 "\xc2\xa0" Identity ""
189 { " "sv, Identity, 0, ","sv }, // 103 " " Identity ","
190 { ""sv, FermentFirst, 0, "=\""sv }, // 104 "" FermentFirst "=\""
191 { ""sv, FermentAll, 0, "=\""sv }, // 105 "" FermentAll "=\""
192 { ""sv, Identity, 0, "ous "sv }, // 106 "" Identity "ous "
193 { ""sv, FermentAll, 0, ", "sv }, // 107 "" FermentAll ", "
194 { ""sv, FermentFirst, 0, "='"sv }, // 108 "" FermentFirst "='"
195 { " "sv, FermentFirst, 0, ","sv }, // 109 " " FermentFirst ","
196 { " "sv, FermentAll, 0, "=\""sv }, // 110 " " FermentAll "=\""
197 { " "sv, FermentAll, 0, ", "sv }, // 111 " " FermentAll ", "
198 { ""sv, FermentAll, 0, ","sv }, // 112 "" FermentAll ","
199 { ""sv, FermentAll, 0, "("sv }, // 113 "" FermentAll "("
200 { ""sv, FermentAll, 0, ". "sv }, // 114 "" FermentAll ". "
201 { " "sv, FermentAll, 0, "."sv }, // 115 " " FermentAll "."
202 { ""sv, FermentAll, 0, "='"sv }, // 116 "" FermentAll "='"
203 { " "sv, FermentAll, 0, ". "sv }, // 117 " " FermentAll ". "
204 { " "sv, FermentFirst, 0, "=\""sv }, // 118 " " FermentFirst "=\""
205 { " "sv, FermentAll, 0, "='"sv }, // 119 " " FermentAll "='"
206 { " "sv, FermentFirst, 0, "='"sv }, // 120 " " FermentFirst "='"
207};
208
209ErrorOr<ByteBuffer> BrotliDictionary::lookup_word(size_t index, size_t length)
210{
211 if (length < 4 || length > 24)
212 return Error::from_string_literal("invalid dictionary lookup length");
213
214 size_t word_index = index % (1 << bits_by_length[length]);
215 ReadonlyBytes base_word { brotli_dictionary_data + offset_by_length[length] + (word_index * length), length };
216 size_t transform_id = index >> bits_by_length[length];
217
218 if (transform_id >= 121)
219 return Error::from_string_literal("invalid dictionary transformation");
220
221 auto transformation = transformations[transform_id];
222 ByteBuffer bb;
223 bb.append(transformation.prefix.bytes());
224 size_t prefix_length = bb.size();
225
226 switch (transformation.operation) {
227 case TransformationOperation::Identity:
228 bb.append(base_word);
229 break;
230 case TransformationOperation::FermentFirst:
231 bb.append(base_word);
232 ferment_first(bb.bytes().slice(prefix_length));
233 break;
234 case TransformationOperation::FermentAll:
235 bb.append(base_word);
236 ferment_all(bb.bytes().slice(prefix_length));
237 break;
238 case TransformationOperation::OmitFirst:
239 if (transformation.operation_data < base_word.size())
240 bb.append(base_word.slice(transformation.operation_data));
241 break;
242 case TransformationOperation::OmitLast:
243 if (transformation.operation_data < base_word.size())
244 bb.append(base_word.slice(0, base_word.size() - transformation.operation_data));
245 break;
246 }
247
248 bb.append(transformation.suffix.bytes());
249 return bb;
250}
251
252}