Serenity Operating System
1/*
2 * Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl>
3 *
4 * SPDX-License-Identifier: BSD-2-Clause
5 */
6
7#include <AK/BinarySearch.h>
8#include <AK/QuickSort.h>
9#include <LibCompress/Brotli.h>
10#include <LibCompress/BrotliDictionary.h>
11
12namespace Compress {
13
14ErrorOr<size_t> BrotliDecompressionStream::CanonicalCode::read_symbol(LittleEndianInputBitStream& input_stream)
15{
16 size_t code_bits = 1;
17
18 while (code_bits < (1 << 16)) {
19 // FIXME: This is very inefficient and could greatly be improved by implementing this
20 // algorithm: https://www.hanshq.net/zip.html#huffdec
21 size_t index;
22 if (binary_search(m_symbol_codes.span(), code_bits, &index))
23 return m_symbol_values[index];
24
25 code_bits = (code_bits << 1) | TRY(input_stream.read_bit());
26 }
27
28 return Error::from_string_literal("no matching code found");
29}
30
31BrotliDecompressionStream::BrotliDecompressionStream(Stream& stream)
32 : m_input_stream(MaybeOwned(stream))
33{
34}
35
36ErrorOr<size_t> BrotliDecompressionStream::read_window_length()
37{
38 if (TRY(m_input_stream.read_bit())) {
39 switch (TRY(m_input_stream.read_bits(3))) {
40 case 0: {
41 switch (TRY(m_input_stream.read_bits(3))) {
42 case 0:
43 return 17;
44 case 1:
45 return Error::from_string_literal("invalid window length");
46 case 2:
47 return 10;
48 case 3:
49 return 11;
50 case 4:
51 return 12;
52 case 5:
53 return 13;
54 case 6:
55 return 14;
56 case 7:
57 return 15;
58 default:
59 VERIFY_NOT_REACHED();
60 }
61 }
62 case 1:
63 return 18;
64 case 2:
65 return 19;
66 case 3:
67 return 20;
68 case 4:
69 return 21;
70 case 5:
71 return 22;
72 case 6:
73 return 23;
74 case 7:
75 return 24;
76 default:
77 VERIFY_NOT_REACHED();
78 }
79 } else {
80 return 16;
81 }
82}
83
84ErrorOr<size_t> BrotliDecompressionStream::read_size_number_of_nibbles()
85{
86 switch (TRY(m_input_stream.read_bits(2))) {
87 case 0:
88 return 4;
89 case 1:
90 return 5;
91 case 2:
92 return 6;
93 case 3:
94 return 0;
95 default:
96 VERIFY_NOT_REACHED();
97 }
98}
99
100ErrorOr<size_t> BrotliDecompressionStream::read_variable_length()
101{
102 // Value Bit Pattern
103 // ----- -----------
104 // 1 0
105 // 2 0001
106 // 3..4 x0011
107 // 5..8 xx0101
108 // 9..16 xxx0111
109 // 17..32 xxxx1001
110 // 33..64 xxxxx1011
111 // 65..128 xxxxxx1101
112 // 129..256 xxxxxxx1111
113
114 if (TRY(m_input_stream.read_bit())) {
115 switch (TRY(m_input_stream.read_bits(3))) {
116 case 0:
117 return 2;
118 case 1:
119 return 3 + TRY(m_input_stream.read_bits(1));
120 case 2:
121 return 5 + TRY(m_input_stream.read_bits(2));
122 case 3:
123 return 9 + TRY(m_input_stream.read_bits(3));
124 case 4:
125 return 17 + TRY(m_input_stream.read_bits(4));
126 case 5:
127 return 33 + TRY(m_input_stream.read_bits(5));
128 case 6:
129 return 65 + TRY(m_input_stream.read_bits(6));
130 case 7:
131 return 129 + TRY(m_input_stream.read_bits(7));
132 default:
133 VERIFY_NOT_REACHED();
134 }
135 } else {
136 return 1;
137 }
138}
139
140ErrorOr<size_t> BrotliDecompressionStream::read_complex_prefix_code_length()
141{
142 // Symbol Code
143 // ------ ----
144 // 0 00
145 // 1 0111
146 // 2 011
147 // 3 10
148 // 4 01
149 // 5 1111
150
151 switch (TRY(m_input_stream.read_bits(2))) {
152 case 0:
153 return 0;
154 case 1:
155 return 4;
156 case 2:
157 return 3;
158 case 3: {
159 if (TRY(m_input_stream.read_bit()) == 0) {
160 return 2;
161 } else {
162 if (TRY(m_input_stream.read_bit()) == 0) {
163 return 1;
164 } else {
165 return 5;
166 }
167 }
168 }
169 default:
170 VERIFY_NOT_REACHED();
171 }
172}
173
174ErrorOr<void> BrotliDecompressionStream::read_prefix_code(CanonicalCode& code, size_t alphabet_size)
175{
176 size_t hskip = TRY(m_input_stream.read_bits(2));
177
178 if (hskip == 1) {
179 TRY(read_simple_prefix_code(code, alphabet_size));
180 } else {
181 TRY(read_complex_prefix_code(code, alphabet_size, hskip));
182 }
183
184 return {};
185}
186
187ErrorOr<void> BrotliDecompressionStream::read_simple_prefix_code(CanonicalCode& code, size_t alphabet_size)
188{
189 VERIFY(code.m_symbol_codes.is_empty());
190 VERIFY(code.m_symbol_values.is_empty());
191
192 size_t number_of_symbols = 1 + TRY(m_input_stream.read_bits(2));
193
194 size_t symbol_size = 0;
195 while ((1u << symbol_size) < alphabet_size)
196 symbol_size++;
197
198 Vector<size_t> symbols;
199 for (size_t i = 0; i < number_of_symbols; i++) {
200 size_t symbol = TRY(m_input_stream.read_bits(symbol_size));
201 symbols.append(symbol);
202
203 if (symbol >= alphabet_size)
204 return Error::from_string_literal("symbol larger than alphabet");
205 }
206
207 if (number_of_symbols == 1) {
208 code.m_symbol_codes.append(0b1);
209 code.m_symbol_values = move(symbols);
210 } else if (number_of_symbols == 2) {
211 code.m_symbol_codes.extend({ 0b10, 0b11 });
212 if (symbols[0] > symbols[1])
213 swap(symbols[0], symbols[1]);
214 code.m_symbol_values = move(symbols);
215 } else if (number_of_symbols == 3) {
216 code.m_symbol_codes.extend({ 0b10, 0b110, 0b111 });
217 if (symbols[1] > symbols[2])
218 swap(symbols[1], symbols[2]);
219 code.m_symbol_values = move(symbols);
220 } else if (number_of_symbols == 4) {
221 bool tree_select = TRY(m_input_stream.read_bit());
222 if (tree_select) {
223 code.m_symbol_codes.extend({ 0b10, 0b110, 0b1110, 0b1111 });
224 if (symbols[2] > symbols[3])
225 swap(symbols[2], symbols[3]);
226 code.m_symbol_values = move(symbols);
227 } else {
228 code.m_symbol_codes.extend({ 0b100, 0b101, 0b110, 0b111 });
229 quick_sort(symbols);
230 code.m_symbol_values = move(symbols);
231 }
232 }
233
234 return {};
235}
236
237ErrorOr<void> BrotliDecompressionStream::read_complex_prefix_code(CanonicalCode& code, size_t alphabet_size, size_t hskip)
238{
239 // hskip should only be 0, 2 or 3
240 VERIFY(hskip != 1);
241 VERIFY(hskip <= 3);
242
243 // Read the prefix code_value that is used to encode the actual prefix code_value
244 size_t const symbol_mapping[18] = { 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 };
245 size_t code_length[18] { 0 };
246 size_t code_length_counts[6] { 0 };
247
248 size_t sum = 0;
249 size_t number_of_non_zero_symbols = 0;
250 for (size_t i = hskip; i < 18; i++) {
251 size_t len = TRY(read_complex_prefix_code_length());
252 code_length[symbol_mapping[i]] = len;
253
254 if (len != 0) {
255 code_length_counts[len]++;
256 sum += (32 >> len);
257 number_of_non_zero_symbols++;
258 }
259
260 if (sum == 32)
261 break;
262 else if (sum > 32)
263 return Error::from_string_literal("invalid prefix code");
264 }
265
266 BrotliDecompressionStream::CanonicalCode temp_code;
267 if (number_of_non_zero_symbols > 1) {
268 size_t code_value = 0;
269 for (size_t bits = 1; bits <= 5; bits++) {
270 code_value = (code_value + code_length_counts[bits - 1]) << 1;
271 size_t current_code_value = code_value;
272
273 for (size_t i = 0; i < 18; i++) {
274 size_t len = code_length[i];
275 if (len == bits) {
276 temp_code.m_symbol_codes.append((1 << bits) | current_code_value);
277 temp_code.m_symbol_values.append(i);
278 current_code_value++;
279 }
280 }
281 }
282 } else {
283 for (size_t i = 0; i < 18; i++) {
284 size_t len = code_length[i];
285 if (len != 0) {
286 temp_code.m_symbol_codes.append(1);
287 temp_code.m_symbol_values.append(i);
288 break;
289 }
290 }
291 }
292
293 // Read the actual prefix code_value
294 sum = 0;
295 size_t i = 0;
296
297 size_t previous_non_zero_code_length = 8;
298 size_t last_symbol = 0;
299 size_t last_repeat = 0;
300
301 Vector<size_t> result_symbols;
302 Vector<size_t> result_lengths;
303 size_t result_lengths_count[16] { 0 };
304 while (i < alphabet_size) {
305 auto symbol = TRY(temp_code.read_symbol(m_input_stream));
306
307 if (symbol < 16) {
308 result_symbols.append(i);
309 result_lengths.append(symbol);
310 result_lengths_count[symbol]++;
311
312 if (symbol != 0) {
313 previous_non_zero_code_length = symbol;
314 sum += (32768 >> symbol);
315 if (sum == 32768)
316 break;
317 else if (sum > 32768)
318 return Error::from_string_literal("invalid prefix code");
319 }
320
321 last_repeat = 0;
322 i++;
323 } else if (symbol == 16) {
324 size_t repeat_count = 0;
325 if (last_symbol == 16 && last_repeat != 0) {
326 repeat_count = (4 * (last_repeat - 2));
327 } else {
328 last_repeat = 0;
329 }
330 repeat_count += 3 + TRY(m_input_stream.read_bits(2));
331
332 for (size_t rep = 0; rep < (repeat_count - last_repeat); rep++) {
333 result_symbols.append(i);
334 result_lengths.append(previous_non_zero_code_length);
335 result_lengths_count[previous_non_zero_code_length]++;
336
337 if (previous_non_zero_code_length != 0) {
338 sum += (32768 >> previous_non_zero_code_length);
339 if (sum == 32768)
340 break;
341 else if (sum > 32768)
342 return Error::from_string_literal("invalid prefix code");
343 }
344
345 i++;
346 if (i >= alphabet_size)
347 break;
348 }
349 if (sum == 32768)
350 break;
351 VERIFY(sum < 32768);
352
353 last_repeat = repeat_count;
354 } else if (symbol == 17) {
355 size_t repeat_count = 0;
356 if (last_symbol == 17 && last_repeat != 0) {
357 repeat_count = (8 * (last_repeat - 2));
358 } else {
359 last_repeat = 0;
360 }
361 repeat_count += 3 + TRY(m_input_stream.read_bits(3));
362
363 i += (repeat_count - last_repeat);
364 last_repeat = repeat_count;
365 }
366
367 last_symbol = symbol;
368 }
369 result_lengths_count[0] = 0;
370
371 size_t code_value = 0;
372 for (size_t bits = 1; bits < 16; bits++) {
373 code_value = (code_value + result_lengths_count[bits - 1]) << 1;
374 size_t current_code_value = code_value;
375
376 for (size_t n = 0; n < result_symbols.size(); n++) {
377 size_t len = result_lengths[n];
378 if (len == bits) {
379 code.m_symbol_codes.append((1 << bits) | current_code_value);
380 code.m_symbol_values.append(result_symbols[n]);
381 current_code_value++;
382 }
383 }
384 }
385
386 return {};
387}
388
389static void inverse_move_to_front_transform(Span<u8> v)
390{
391 // RFC 7932 section 7.3
392 u8 mtf[256];
393 for (size_t i = 0; i < 256; ++i) {
394 mtf[i] = (u8)i;
395 }
396 for (size_t i = 0; i < v.size(); ++i) {
397 u8 index = v[i];
398 u8 value = mtf[index];
399 v[i] = value;
400 for (; index; --index) {
401 mtf[index] = mtf[index - 1];
402 }
403 mtf[0] = value;
404 }
405}
406
407ErrorOr<void> BrotliDecompressionStream::read_context_map(size_t number_of_codes, Vector<u8>& context_map, size_t context_map_size)
408{
409 bool use_run_length_encoding = TRY(m_input_stream.read_bit());
410 size_t run_length_encoding_max = 0;
411 if (use_run_length_encoding) {
412 run_length_encoding_max = 1 + TRY(m_input_stream.read_bits(4));
413 }
414
415 BrotliDecompressionStream::CanonicalCode code;
416 TRY(read_prefix_code(code, number_of_codes + run_length_encoding_max));
417
418 size_t i = 0;
419 while (i < context_map_size) {
420 size_t symbol = TRY(code.read_symbol(m_input_stream));
421
422 if (symbol <= run_length_encoding_max) {
423 size_t repeat_base = 1 << symbol;
424 size_t repeat_additional = TRY(m_input_stream.read_bits(symbol));
425 size_t repeat_count = repeat_base + repeat_additional;
426 while (repeat_count--) {
427 context_map.append(0);
428 i++;
429 }
430 } else {
431 size_t value = symbol - run_length_encoding_max;
432 context_map.append(value);
433 i++;
434 }
435 }
436
437 bool inverse_move_to_front = TRY(m_input_stream.read_bit());
438 if (inverse_move_to_front)
439 inverse_move_to_front_transform(context_map.span());
440
441 return {};
442}
443
444ErrorOr<void> BrotliDecompressionStream::read_block_configuration(Block& block)
445{
446 size_t blocks_of_type = TRY(read_variable_length());
447
448 block.type = 0;
449 block.type_previous = 1;
450 block.number_of_types = blocks_of_type;
451
452 block.type_code.clear();
453 block.length_code.clear();
454
455 if (blocks_of_type == 1) {
456 block.length = 16 * MiB;
457 } else {
458 TRY(read_prefix_code(block.type_code, 2 + blocks_of_type));
459 TRY(read_prefix_code(block.length_code, 26));
460 TRY(block_update_length(block));
461 }
462
463 return {};
464}
465
466ErrorOr<void> BrotliDecompressionStream::block_update_length(Block& block)
467{
468 size_t const block_length_code_base[26] { 1, 5, 9, 13, 17, 25, 33, 41, 49, 65, 81, 97, 113, 145, 177, 209, 241, 305, 369, 497, 753, 1265, 2289, 4337, 8433, 16625 };
469 size_t const block_length_code_extra[26] { 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24 };
470
471 size_t symbol = TRY(block.length_code.read_symbol(m_input_stream));
472 size_t block_length = block_length_code_base[symbol] + TRY(m_input_stream.read_bits(block_length_code_extra[symbol]));
473
474 block.length = block_length;
475 return {};
476}
477
478ErrorOr<void> BrotliDecompressionStream::block_read_new_state(Block& block)
479{
480 size_t block_type_symbol = TRY(block.type_code.read_symbol(m_input_stream));
481 TRY(block_update_length(block));
482
483 if (block_type_symbol == 0) {
484 swap(block.type, block.type_previous);
485 } else if (block_type_symbol == 1) {
486 block.type_previous = block.type;
487 block.type = (block.type + 1) % block.number_of_types;
488 } else {
489 block.type_previous = block.type;
490 block.type = block_type_symbol - 2;
491 }
492
493 return {};
494}
495
496size_t BrotliDecompressionStream::literal_code_index_from_context()
497{
498 size_t const context_id_lut0[256] {
499 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0,
500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
501 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12,
502 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12,
503 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48,
504 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12,
505 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56,
506 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0,
507 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
508 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
509 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
510 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
511 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
512 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
513 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3,
514 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3
515 };
516 size_t const context_id_lut1[256] {
517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
519 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
520 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1,
521 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
522 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1,
523 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
524 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0,
525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
531 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
532 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2
533 };
534 size_t const context_id_lut2[256] {
535 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
536 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
537 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
538 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
539 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
540 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
541 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
542 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
543 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
544 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
545 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
546 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
547 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
548 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
549 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
550 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7
551 };
552
553 size_t context_mode = m_literal_context_modes[m_literal_block.type];
554 size_t context_id;
555 switch (context_mode) {
556 case 0:
557 context_id = m_lookback_buffer.value().lookback(1, 0) & 0x3f;
558 break;
559 case 1:
560 context_id = m_lookback_buffer.value().lookback(1, 0) >> 2;
561 break;
562 case 2:
563 context_id = context_id_lut0[m_lookback_buffer.value().lookback(1, 0)] | context_id_lut1[m_lookback_buffer.value().lookback(2, 0)];
564 break;
565 case 3:
566 context_id = (context_id_lut2[m_lookback_buffer.value().lookback(1, 0)] << 3) | context_id_lut2[m_lookback_buffer.value().lookback(2, 0)];
567 break;
568 default:
569 VERIFY_NOT_REACHED();
570 }
571
572 size_t literal_code_index = m_context_mapping_literal[64 * m_literal_block.type + context_id];
573 return literal_code_index;
574}
575
576ErrorOr<Bytes> BrotliDecompressionStream::read_some(Bytes output_buffer)
577{
578 size_t bytes_read = 0;
579 while (bytes_read < output_buffer.size()) {
580 if (m_current_state == State::WindowSize) {
581 size_t window_bits = TRY(read_window_length());
582 m_window_size = (1 << window_bits) - 16;
583
584 m_lookback_buffer = TRY(LookbackBuffer::try_create(m_window_size));
585
586 m_current_state = State::Idle;
587 } else if (m_current_state == State::Idle) {
588 // If the final block was read, we are done decompressing
589 if (m_read_final_block)
590 break;
591
592 // RFC 7932 section 9.1
593 //
594 // 1 bit: ISLAST, set to 1 if this is the last meta-block
595 m_read_final_block = TRY(m_input_stream.read_bit());
596 if (m_read_final_block) {
597 // 1 bit: ISLASTEMPTY, if set to 1, the meta-block is empty; this
598 // field is only present if ISLAST bit is set -- if it is 1,
599 // then the meta-block and the brotli stream ends at that
600 // bit, with any remaining bits in the last byte of the
601 // compressed stream filled with zeros (if the fill bits are
602 // not zero, then the stream should be rejected as invalid)
603 bool is_last_block_empty = TRY(m_input_stream.read_bit());
604 // If the last block is empty we are done decompressing
605 if (is_last_block_empty)
606 break;
607 }
608
609 // 2 bits: MNIBBLES, number of nibbles to represent the uncompressed
610 // length
611 size_t size_number_of_nibbles = TRY(read_size_number_of_nibbles());
612
613 // If MNIBBLES is 0, the meta-block is empty, i.e., it does
614 // not generate any uncompressed data. In this case, the
615 // rest of the meta-block has the following format:
616 if (size_number_of_nibbles == 0) {
617
618 // 1 bit: reserved, must be zero
619 bool reserved = TRY(m_input_stream.read_bit());
620 if (reserved)
621 return Error::from_string_literal("invalid reserved bit");
622
623 // 2 bits: MSKIPBYTES, number of bytes to represent
624 // metadata length
625 //
626 // MSKIPBYTES * 8 bits: MSKIPLEN - 1, where MSKIPLEN is
627 // the number of metadata bytes; this field is
628 // only present if MSKIPBYTES is positive;
629 // otherwise, MSKIPLEN is 0 (if MSKIPBYTES is
630 // greater than 1, and the last byte is all
631 // zeros, then the stream should be rejected as
632 // invalid)
633 size_t skip_bytes = TRY(m_input_stream.read_bits(2));
634 if (skip_bytes == 0) {
635 // 0..7 bits: fill bits until the next byte boundary,
636 // must be all zeros
637 u8 remainder = m_input_stream.align_to_byte_boundary();
638 if (remainder != 0)
639 return Error::from_string_literal("remainder bits are non-zero");
640 continue;
641 }
642
643 // MSKIPLEN bytes of metadata, not part of the
644 // uncompressed data or the sliding window
645 size_t skip_length = 1 + TRY(m_input_stream.read_bits(8 * skip_bytes));
646
647 u8 remainder = m_input_stream.align_to_byte_boundary();
648 if (remainder != 0)
649 return Error::from_string_literal("remainder bits are non-zero");
650
651 // Discard meta-data bytes
652 u8 temp_buffer[4096];
653 Bytes temp_bytes { temp_buffer, 4096 };
654 while (skip_length > 0) {
655 Bytes temp_bytes_slice = temp_bytes.slice(0, min(4096, skip_length));
656 auto metadata_bytes = TRY(m_input_stream.read_some(temp_bytes_slice));
657 if (metadata_bytes.is_empty())
658 return Error::from_string_literal("eof");
659 if (metadata_bytes.last() == 0)
660 return Error::from_string_literal("invalid stream");
661 skip_length -= metadata_bytes.size();
662 }
663
664 continue;
665 }
666
667 size_t uncompressed_size = 1 + TRY(m_input_stream.read_bits(4 * size_number_of_nibbles));
668
669 // 1 bit: ISUNCOMPRESSED, if set to 1, any bits of compressed data
670 // up to the next byte boundary are ignored, and the rest of
671 // the meta-block contains MLEN bytes of literal data; this
672 // field is only present if the ISLAST bit is not set (if the
673 // ignored bits are not all zeros, the stream should be
674 // rejected as invalid)
675 bool is_uncompressed = false;
676 if (!m_read_final_block)
677 is_uncompressed = TRY(m_input_stream.read_bit());
678
679 m_bytes_left = uncompressed_size;
680 if (is_uncompressed) {
681 u8 remainder = m_input_stream.align_to_byte_boundary();
682 if (remainder != 0)
683 return Error::from_string_literal("remainder is non-zero");
684 m_current_state = State::UncompressedData;
685 } else {
686 TRY(read_block_configuration(m_literal_block));
687 TRY(read_block_configuration(m_insert_and_copy_block));
688 TRY(read_block_configuration(m_distance_block));
689
690 m_postfix_bits = TRY(m_input_stream.read_bits(2));
691 m_direct_distances = TRY(m_input_stream.read_bits(4)) << m_postfix_bits;
692
693 m_literal_context_modes.clear();
694 for (size_t i = 0; i < m_literal_block.number_of_types; i++) {
695 size_t context_mode = TRY(m_input_stream.read_bits(2));
696 m_literal_context_modes.append(context_mode);
697 }
698
699 m_context_mapping_literal.clear();
700 size_t number_of_literal_codes = TRY(read_variable_length());
701 if (number_of_literal_codes == 1) {
702 for (size_t i = 0; i < 64 * m_literal_block.number_of_types; i++)
703 m_context_mapping_literal.append(0);
704 } else {
705 TRY(read_context_map(number_of_literal_codes, m_context_mapping_literal, 64 * m_literal_block.number_of_types));
706 }
707
708 m_context_mapping_distance.clear();
709 size_t number_of_distance_codes = TRY(read_variable_length());
710 if (number_of_distance_codes == 1) {
711 for (size_t i = 0; i < 4 * m_distance_block.number_of_types; i++)
712 m_context_mapping_distance.append(0);
713 } else {
714 TRY(read_context_map(number_of_distance_codes, m_context_mapping_distance, 4 * m_distance_block.number_of_types));
715 }
716
717 m_literal_codes.clear();
718 for (size_t i = 0; i < number_of_literal_codes; i++) {
719 CanonicalCode code;
720 TRY(read_prefix_code(code, 256));
721 m_literal_codes.append(move(code));
722 }
723
724 m_insert_and_copy_codes.clear();
725 for (size_t i = 0; i < m_insert_and_copy_block.number_of_types; i++) {
726 CanonicalCode code;
727 TRY(read_prefix_code(code, 704));
728 m_insert_and_copy_codes.append(move(code));
729 }
730
731 m_distance_codes.clear();
732 for (size_t i = 0; i < number_of_distance_codes; i++) {
733 CanonicalCode code;
734 TRY(read_prefix_code(code, 16 + m_direct_distances + (48 << m_postfix_bits)));
735 m_distance_codes.append(move(code));
736 }
737
738 m_current_state = State::CompressedCommand;
739 }
740 } else if (m_current_state == State::UncompressedData) {
741 size_t number_of_fitting_bytes = min(output_buffer.size() - bytes_read, m_bytes_left);
742 VERIFY(number_of_fitting_bytes > 0);
743
744 auto uncompressed_bytes = TRY(m_input_stream.read_some(output_buffer.slice(bytes_read, number_of_fitting_bytes)));
745 if (uncompressed_bytes.is_empty())
746 return Error::from_string_literal("eof");
747
748 m_bytes_left -= uncompressed_bytes.size();
749 bytes_read += uncompressed_bytes.size();
750
751 // If all bytes were read, return to the idle state
752 if (m_bytes_left == 0)
753 m_current_state = State::Idle;
754 } else if (m_current_state == State::CompressedCommand) {
755 if (m_insert_and_copy_block.length == 0) {
756 TRY(block_read_new_state(m_insert_and_copy_block));
757 }
758 m_insert_and_copy_block.length--;
759
760 size_t insert_and_copy_symbol = TRY(m_insert_and_copy_codes[m_insert_and_copy_block.type].read_symbol(m_input_stream));
761
762 size_t const insert_length_code_base[11] { 0, 0, 0, 0, 8, 8, 0, 16, 8, 16, 16 };
763 size_t const copy_length_code_base[11] { 0, 8, 0, 8, 0, 8, 16, 0, 16, 8, 16 };
764 bool const implicit_zero_distance[11] { true, true, false, false, false, false, false, false, false, false, false };
765
766 size_t insert_and_copy_index = insert_and_copy_symbol >> 6;
767 size_t insert_length_code_offset = (insert_and_copy_symbol >> 3) & 0b111;
768 size_t copy_length_code_offset = insert_and_copy_symbol & 0b111;
769
770 size_t insert_length_code = insert_length_code_base[insert_and_copy_index] + insert_length_code_offset;
771 size_t copy_length_code = copy_length_code_base[insert_and_copy_index] + copy_length_code_offset;
772
773 m_implicit_zero_distance = implicit_zero_distance[insert_and_copy_index];
774
775 size_t const insert_length_base[24] { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 };
776 size_t const insert_length_extra[24] { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24 };
777 size_t const copy_length_base[24] { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 };
778 size_t const copy_length_extra[24] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24 };
779
780 m_insert_length = insert_length_base[insert_length_code] + TRY(m_input_stream.read_bits(insert_length_extra[insert_length_code]));
781 m_copy_length = copy_length_base[copy_length_code] + TRY(m_input_stream.read_bits(copy_length_extra[copy_length_code]));
782
783 if (m_insert_length > 0) {
784 m_current_state = State::CompressedLiteral;
785 } else {
786 m_current_state = State::CompressedDistance;
787 }
788 } else if (m_current_state == State::CompressedLiteral) {
789 if (m_literal_block.length == 0) {
790 TRY(block_read_new_state(m_literal_block));
791 }
792 m_literal_block.length--;
793
794 size_t literal_code_index = literal_code_index_from_context();
795 size_t literal_value = TRY(m_literal_codes[literal_code_index].read_symbol(m_input_stream));
796
797 output_buffer[bytes_read] = literal_value;
798 m_lookback_buffer.value().write(literal_value);
799 bytes_read++;
800 m_insert_length--;
801 m_bytes_left--;
802
803 if (m_bytes_left == 0)
804 m_current_state = State::Idle;
805 else if (m_insert_length == 0)
806 m_current_state = State::CompressedDistance;
807 } else if (m_current_state == State::CompressedDistance) {
808 size_t distance_symbol;
809 if (m_implicit_zero_distance) {
810 distance_symbol = 0;
811 } else {
812 if (m_distance_block.length == 0) {
813 TRY(block_read_new_state(m_distance_block));
814 }
815 m_distance_block.length--;
816
817 size_t context_id = clamp(m_copy_length - 2, 0, 3);
818 size_t distance_code_index = m_context_mapping_distance[4 * m_distance_block.type + context_id];
819
820 distance_symbol = TRY(m_distance_codes[distance_code_index].read_symbol(m_input_stream));
821 }
822
823 size_t distance;
824 bool reuse_previous_distance = false;
825 if (distance_symbol < 16) {
826 switch (distance_symbol) {
827 case 0:
828 distance = m_distances[0];
829 reuse_previous_distance = true;
830 break;
831 case 1:
832 distance = m_distances[1];
833 break;
834 case 2:
835 distance = m_distances[2];
836 break;
837 case 3:
838 distance = m_distances[3];
839 break;
840 case 4:
841 distance = m_distances[0] - 1;
842 break;
843 case 5:
844 distance = m_distances[0] + 1;
845 break;
846 case 6:
847 distance = m_distances[0] - 2;
848 break;
849 case 7:
850 distance = m_distances[0] + 2;
851 break;
852 case 8:
853 distance = m_distances[0] - 3;
854 break;
855 case 9:
856 distance = m_distances[0] + 3;
857 break;
858 case 10:
859 distance = m_distances[1] - 1;
860 break;
861 case 11:
862 distance = m_distances[1] + 1;
863 break;
864 case 12:
865 distance = m_distances[1] - 2;
866 break;
867 case 13:
868 distance = m_distances[1] + 2;
869 break;
870 case 14:
871 distance = m_distances[1] - 3;
872 break;
873 case 15:
874 distance = m_distances[1] + 3;
875 break;
876 }
877 } else if (distance_symbol < 16 + m_direct_distances) {
878 distance = distance_symbol - 15;
879 } else {
880 size_t POSTFIX_MASK = (1 << m_postfix_bits) - 1;
881
882 size_t ndistbits = 1 + ((distance_symbol - m_direct_distances - 16) >> (m_postfix_bits + 1));
883 size_t dextra = TRY(m_input_stream.read_bits(ndistbits));
884
885 size_t hcode = (distance_symbol - m_direct_distances - 16) >> m_postfix_bits;
886 size_t lcode = (distance_symbol - m_direct_distances - 16) & POSTFIX_MASK;
887 size_t offset = ((2 + (hcode & 1)) << ndistbits) - 4;
888 distance = ((offset + dextra) << m_postfix_bits) + lcode + m_direct_distances + 1;
889 }
890 m_distance = distance;
891
892 size_t total_written = m_lookback_buffer.value().total_written();
893 size_t max_lookback = min(total_written, m_window_size);
894
895 if (distance > max_lookback) {
896 size_t word_index = distance - (max_lookback + 1);
897 m_dictionary_data = TRY(BrotliDictionary::lookup_word(word_index, m_copy_length));
898 m_copy_length = m_dictionary_data.size();
899
900 if (m_copy_length == 0)
901 m_current_state = State::CompressedCommand;
902 else
903 m_current_state = State::CompressedDictionary;
904 } else {
905 if (!reuse_previous_distance) {
906 m_distances[3] = m_distances[2];
907 m_distances[2] = m_distances[1];
908 m_distances[1] = m_distances[0];
909 m_distances[0] = distance;
910 }
911
912 m_current_state = State::CompressedCopy;
913 }
914 } else if (m_current_state == State::CompressedCopy) {
915 u8 copy_value = m_lookback_buffer.value().lookback(m_distance);
916
917 output_buffer[bytes_read] = copy_value;
918 m_lookback_buffer.value().write(copy_value);
919 bytes_read++;
920 m_copy_length--;
921 m_bytes_left--;
922
923 if (m_bytes_left == 0)
924 m_current_state = State::Idle;
925 else if (m_copy_length == 0)
926 m_current_state = State::CompressedCommand;
927 } else if (m_current_state == State::CompressedDictionary) {
928 size_t offset = m_dictionary_data.size() - m_copy_length;
929 u8 dictionary_value = m_dictionary_data[offset];
930
931 output_buffer[bytes_read] = dictionary_value;
932 m_lookback_buffer.value().write(dictionary_value);
933 bytes_read++;
934 m_copy_length--;
935 m_bytes_left--;
936
937 if (m_bytes_left == 0)
938 m_current_state = State::Idle;
939 else if (m_copy_length == 0)
940 m_current_state = State::CompressedCommand;
941 }
942 }
943
944 return output_buffer.slice(0, bytes_read);
945}
946
947bool BrotliDecompressionStream::is_eof() const
948{
949 return m_read_final_block && m_current_state == State::Idle;
950}
951
952}