Serenity Operating System
at master 952 lines 36 kB view raw
1/* 2 * Copyright (c) 2022, Michiel Visser <opensource@webmichiel.nl> 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 */ 6 7#include <AK/BinarySearch.h> 8#include <AK/QuickSort.h> 9#include <LibCompress/Brotli.h> 10#include <LibCompress/BrotliDictionary.h> 11 12namespace Compress { 13 14ErrorOr<size_t> BrotliDecompressionStream::CanonicalCode::read_symbol(LittleEndianInputBitStream& input_stream) 15{ 16 size_t code_bits = 1; 17 18 while (code_bits < (1 << 16)) { 19 // FIXME: This is very inefficient and could greatly be improved by implementing this 20 // algorithm: https://www.hanshq.net/zip.html#huffdec 21 size_t index; 22 if (binary_search(m_symbol_codes.span(), code_bits, &index)) 23 return m_symbol_values[index]; 24 25 code_bits = (code_bits << 1) | TRY(input_stream.read_bit()); 26 } 27 28 return Error::from_string_literal("no matching code found"); 29} 30 31BrotliDecompressionStream::BrotliDecompressionStream(Stream& stream) 32 : m_input_stream(MaybeOwned(stream)) 33{ 34} 35 36ErrorOr<size_t> BrotliDecompressionStream::read_window_length() 37{ 38 if (TRY(m_input_stream.read_bit())) { 39 switch (TRY(m_input_stream.read_bits(3))) { 40 case 0: { 41 switch (TRY(m_input_stream.read_bits(3))) { 42 case 0: 43 return 17; 44 case 1: 45 return Error::from_string_literal("invalid window length"); 46 case 2: 47 return 10; 48 case 3: 49 return 11; 50 case 4: 51 return 12; 52 case 5: 53 return 13; 54 case 6: 55 return 14; 56 case 7: 57 return 15; 58 default: 59 VERIFY_NOT_REACHED(); 60 } 61 } 62 case 1: 63 return 18; 64 case 2: 65 return 19; 66 case 3: 67 return 20; 68 case 4: 69 return 21; 70 case 5: 71 return 22; 72 case 6: 73 return 23; 74 case 7: 75 return 24; 76 default: 77 VERIFY_NOT_REACHED(); 78 } 79 } else { 80 return 16; 81 } 82} 83 84ErrorOr<size_t> BrotliDecompressionStream::read_size_number_of_nibbles() 85{ 86 switch (TRY(m_input_stream.read_bits(2))) { 87 case 0: 88 return 4; 89 case 1: 90 return 5; 91 case 2: 92 return 6; 93 case 3: 94 return 0; 95 default: 96 VERIFY_NOT_REACHED(); 97 } 98} 99 100ErrorOr<size_t> BrotliDecompressionStream::read_variable_length() 101{ 102 // Value Bit Pattern 103 // ----- ----------- 104 // 1 0 105 // 2 0001 106 // 3..4 x0011 107 // 5..8 xx0101 108 // 9..16 xxx0111 109 // 17..32 xxxx1001 110 // 33..64 xxxxx1011 111 // 65..128 xxxxxx1101 112 // 129..256 xxxxxxx1111 113 114 if (TRY(m_input_stream.read_bit())) { 115 switch (TRY(m_input_stream.read_bits(3))) { 116 case 0: 117 return 2; 118 case 1: 119 return 3 + TRY(m_input_stream.read_bits(1)); 120 case 2: 121 return 5 + TRY(m_input_stream.read_bits(2)); 122 case 3: 123 return 9 + TRY(m_input_stream.read_bits(3)); 124 case 4: 125 return 17 + TRY(m_input_stream.read_bits(4)); 126 case 5: 127 return 33 + TRY(m_input_stream.read_bits(5)); 128 case 6: 129 return 65 + TRY(m_input_stream.read_bits(6)); 130 case 7: 131 return 129 + TRY(m_input_stream.read_bits(7)); 132 default: 133 VERIFY_NOT_REACHED(); 134 } 135 } else { 136 return 1; 137 } 138} 139 140ErrorOr<size_t> BrotliDecompressionStream::read_complex_prefix_code_length() 141{ 142 // Symbol Code 143 // ------ ---- 144 // 0 00 145 // 1 0111 146 // 2 011 147 // 3 10 148 // 4 01 149 // 5 1111 150 151 switch (TRY(m_input_stream.read_bits(2))) { 152 case 0: 153 return 0; 154 case 1: 155 return 4; 156 case 2: 157 return 3; 158 case 3: { 159 if (TRY(m_input_stream.read_bit()) == 0) { 160 return 2; 161 } else { 162 if (TRY(m_input_stream.read_bit()) == 0) { 163 return 1; 164 } else { 165 return 5; 166 } 167 } 168 } 169 default: 170 VERIFY_NOT_REACHED(); 171 } 172} 173 174ErrorOr<void> BrotliDecompressionStream::read_prefix_code(CanonicalCode& code, size_t alphabet_size) 175{ 176 size_t hskip = TRY(m_input_stream.read_bits(2)); 177 178 if (hskip == 1) { 179 TRY(read_simple_prefix_code(code, alphabet_size)); 180 } else { 181 TRY(read_complex_prefix_code(code, alphabet_size, hskip)); 182 } 183 184 return {}; 185} 186 187ErrorOr<void> BrotliDecompressionStream::read_simple_prefix_code(CanonicalCode& code, size_t alphabet_size) 188{ 189 VERIFY(code.m_symbol_codes.is_empty()); 190 VERIFY(code.m_symbol_values.is_empty()); 191 192 size_t number_of_symbols = 1 + TRY(m_input_stream.read_bits(2)); 193 194 size_t symbol_size = 0; 195 while ((1u << symbol_size) < alphabet_size) 196 symbol_size++; 197 198 Vector<size_t> symbols; 199 for (size_t i = 0; i < number_of_symbols; i++) { 200 size_t symbol = TRY(m_input_stream.read_bits(symbol_size)); 201 symbols.append(symbol); 202 203 if (symbol >= alphabet_size) 204 return Error::from_string_literal("symbol larger than alphabet"); 205 } 206 207 if (number_of_symbols == 1) { 208 code.m_symbol_codes.append(0b1); 209 code.m_symbol_values = move(symbols); 210 } else if (number_of_symbols == 2) { 211 code.m_symbol_codes.extend({ 0b10, 0b11 }); 212 if (symbols[0] > symbols[1]) 213 swap(symbols[0], symbols[1]); 214 code.m_symbol_values = move(symbols); 215 } else if (number_of_symbols == 3) { 216 code.m_symbol_codes.extend({ 0b10, 0b110, 0b111 }); 217 if (symbols[1] > symbols[2]) 218 swap(symbols[1], symbols[2]); 219 code.m_symbol_values = move(symbols); 220 } else if (number_of_symbols == 4) { 221 bool tree_select = TRY(m_input_stream.read_bit()); 222 if (tree_select) { 223 code.m_symbol_codes.extend({ 0b10, 0b110, 0b1110, 0b1111 }); 224 if (symbols[2] > symbols[3]) 225 swap(symbols[2], symbols[3]); 226 code.m_symbol_values = move(symbols); 227 } else { 228 code.m_symbol_codes.extend({ 0b100, 0b101, 0b110, 0b111 }); 229 quick_sort(symbols); 230 code.m_symbol_values = move(symbols); 231 } 232 } 233 234 return {}; 235} 236 237ErrorOr<void> BrotliDecompressionStream::read_complex_prefix_code(CanonicalCode& code, size_t alphabet_size, size_t hskip) 238{ 239 // hskip should only be 0, 2 or 3 240 VERIFY(hskip != 1); 241 VERIFY(hskip <= 3); 242 243 // Read the prefix code_value that is used to encode the actual prefix code_value 244 size_t const symbol_mapping[18] = { 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; 245 size_t code_length[18] { 0 }; 246 size_t code_length_counts[6] { 0 }; 247 248 size_t sum = 0; 249 size_t number_of_non_zero_symbols = 0; 250 for (size_t i = hskip; i < 18; i++) { 251 size_t len = TRY(read_complex_prefix_code_length()); 252 code_length[symbol_mapping[i]] = len; 253 254 if (len != 0) { 255 code_length_counts[len]++; 256 sum += (32 >> len); 257 number_of_non_zero_symbols++; 258 } 259 260 if (sum == 32) 261 break; 262 else if (sum > 32) 263 return Error::from_string_literal("invalid prefix code"); 264 } 265 266 BrotliDecompressionStream::CanonicalCode temp_code; 267 if (number_of_non_zero_symbols > 1) { 268 size_t code_value = 0; 269 for (size_t bits = 1; bits <= 5; bits++) { 270 code_value = (code_value + code_length_counts[bits - 1]) << 1; 271 size_t current_code_value = code_value; 272 273 for (size_t i = 0; i < 18; i++) { 274 size_t len = code_length[i]; 275 if (len == bits) { 276 temp_code.m_symbol_codes.append((1 << bits) | current_code_value); 277 temp_code.m_symbol_values.append(i); 278 current_code_value++; 279 } 280 } 281 } 282 } else { 283 for (size_t i = 0; i < 18; i++) { 284 size_t len = code_length[i]; 285 if (len != 0) { 286 temp_code.m_symbol_codes.append(1); 287 temp_code.m_symbol_values.append(i); 288 break; 289 } 290 } 291 } 292 293 // Read the actual prefix code_value 294 sum = 0; 295 size_t i = 0; 296 297 size_t previous_non_zero_code_length = 8; 298 size_t last_symbol = 0; 299 size_t last_repeat = 0; 300 301 Vector<size_t> result_symbols; 302 Vector<size_t> result_lengths; 303 size_t result_lengths_count[16] { 0 }; 304 while (i < alphabet_size) { 305 auto symbol = TRY(temp_code.read_symbol(m_input_stream)); 306 307 if (symbol < 16) { 308 result_symbols.append(i); 309 result_lengths.append(symbol); 310 result_lengths_count[symbol]++; 311 312 if (symbol != 0) { 313 previous_non_zero_code_length = symbol; 314 sum += (32768 >> symbol); 315 if (sum == 32768) 316 break; 317 else if (sum > 32768) 318 return Error::from_string_literal("invalid prefix code"); 319 } 320 321 last_repeat = 0; 322 i++; 323 } else if (symbol == 16) { 324 size_t repeat_count = 0; 325 if (last_symbol == 16 && last_repeat != 0) { 326 repeat_count = (4 * (last_repeat - 2)); 327 } else { 328 last_repeat = 0; 329 } 330 repeat_count += 3 + TRY(m_input_stream.read_bits(2)); 331 332 for (size_t rep = 0; rep < (repeat_count - last_repeat); rep++) { 333 result_symbols.append(i); 334 result_lengths.append(previous_non_zero_code_length); 335 result_lengths_count[previous_non_zero_code_length]++; 336 337 if (previous_non_zero_code_length != 0) { 338 sum += (32768 >> previous_non_zero_code_length); 339 if (sum == 32768) 340 break; 341 else if (sum > 32768) 342 return Error::from_string_literal("invalid prefix code"); 343 } 344 345 i++; 346 if (i >= alphabet_size) 347 break; 348 } 349 if (sum == 32768) 350 break; 351 VERIFY(sum < 32768); 352 353 last_repeat = repeat_count; 354 } else if (symbol == 17) { 355 size_t repeat_count = 0; 356 if (last_symbol == 17 && last_repeat != 0) { 357 repeat_count = (8 * (last_repeat - 2)); 358 } else { 359 last_repeat = 0; 360 } 361 repeat_count += 3 + TRY(m_input_stream.read_bits(3)); 362 363 i += (repeat_count - last_repeat); 364 last_repeat = repeat_count; 365 } 366 367 last_symbol = symbol; 368 } 369 result_lengths_count[0] = 0; 370 371 size_t code_value = 0; 372 for (size_t bits = 1; bits < 16; bits++) { 373 code_value = (code_value + result_lengths_count[bits - 1]) << 1; 374 size_t current_code_value = code_value; 375 376 for (size_t n = 0; n < result_symbols.size(); n++) { 377 size_t len = result_lengths[n]; 378 if (len == bits) { 379 code.m_symbol_codes.append((1 << bits) | current_code_value); 380 code.m_symbol_values.append(result_symbols[n]); 381 current_code_value++; 382 } 383 } 384 } 385 386 return {}; 387} 388 389static void inverse_move_to_front_transform(Span<u8> v) 390{ 391 // RFC 7932 section 7.3 392 u8 mtf[256]; 393 for (size_t i = 0; i < 256; ++i) { 394 mtf[i] = (u8)i; 395 } 396 for (size_t i = 0; i < v.size(); ++i) { 397 u8 index = v[i]; 398 u8 value = mtf[index]; 399 v[i] = value; 400 for (; index; --index) { 401 mtf[index] = mtf[index - 1]; 402 } 403 mtf[0] = value; 404 } 405} 406 407ErrorOr<void> BrotliDecompressionStream::read_context_map(size_t number_of_codes, Vector<u8>& context_map, size_t context_map_size) 408{ 409 bool use_run_length_encoding = TRY(m_input_stream.read_bit()); 410 size_t run_length_encoding_max = 0; 411 if (use_run_length_encoding) { 412 run_length_encoding_max = 1 + TRY(m_input_stream.read_bits(4)); 413 } 414 415 BrotliDecompressionStream::CanonicalCode code; 416 TRY(read_prefix_code(code, number_of_codes + run_length_encoding_max)); 417 418 size_t i = 0; 419 while (i < context_map_size) { 420 size_t symbol = TRY(code.read_symbol(m_input_stream)); 421 422 if (symbol <= run_length_encoding_max) { 423 size_t repeat_base = 1 << symbol; 424 size_t repeat_additional = TRY(m_input_stream.read_bits(symbol)); 425 size_t repeat_count = repeat_base + repeat_additional; 426 while (repeat_count--) { 427 context_map.append(0); 428 i++; 429 } 430 } else { 431 size_t value = symbol - run_length_encoding_max; 432 context_map.append(value); 433 i++; 434 } 435 } 436 437 bool inverse_move_to_front = TRY(m_input_stream.read_bit()); 438 if (inverse_move_to_front) 439 inverse_move_to_front_transform(context_map.span()); 440 441 return {}; 442} 443 444ErrorOr<void> BrotliDecompressionStream::read_block_configuration(Block& block) 445{ 446 size_t blocks_of_type = TRY(read_variable_length()); 447 448 block.type = 0; 449 block.type_previous = 1; 450 block.number_of_types = blocks_of_type; 451 452 block.type_code.clear(); 453 block.length_code.clear(); 454 455 if (blocks_of_type == 1) { 456 block.length = 16 * MiB; 457 } else { 458 TRY(read_prefix_code(block.type_code, 2 + blocks_of_type)); 459 TRY(read_prefix_code(block.length_code, 26)); 460 TRY(block_update_length(block)); 461 } 462 463 return {}; 464} 465 466ErrorOr<void> BrotliDecompressionStream::block_update_length(Block& block) 467{ 468 size_t const block_length_code_base[26] { 1, 5, 9, 13, 17, 25, 33, 41, 49, 65, 81, 97, 113, 145, 177, 209, 241, 305, 369, 497, 753, 1265, 2289, 4337, 8433, 16625 }; 469 size_t const block_length_code_extra[26] { 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 7, 8, 9, 10, 11, 12, 13, 24 }; 470 471 size_t symbol = TRY(block.length_code.read_symbol(m_input_stream)); 472 size_t block_length = block_length_code_base[symbol] + TRY(m_input_stream.read_bits(block_length_code_extra[symbol])); 473 474 block.length = block_length; 475 return {}; 476} 477 478ErrorOr<void> BrotliDecompressionStream::block_read_new_state(Block& block) 479{ 480 size_t block_type_symbol = TRY(block.type_code.read_symbol(m_input_stream)); 481 TRY(block_update_length(block)); 482 483 if (block_type_symbol == 0) { 484 swap(block.type, block.type_previous); 485 } else if (block_type_symbol == 1) { 486 block.type_previous = block.type; 487 block.type = (block.type + 1) % block.number_of_types; 488 } else { 489 block.type_previous = block.type; 490 block.type = block_type_symbol - 2; 491 } 492 493 return {}; 494} 495 496size_t BrotliDecompressionStream::literal_code_index_from_context() 497{ 498 size_t const context_id_lut0[256] { 499 0, 0, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0, 0, 4, 0, 0, 500 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 501 8, 12, 16, 12, 12, 20, 12, 16, 24, 28, 12, 12, 32, 12, 36, 12, 502 44, 44, 44, 44, 44, 44, 44, 44, 44, 44, 32, 32, 24, 40, 28, 12, 503 12, 48, 52, 52, 52, 48, 52, 52, 52, 48, 52, 52, 52, 52, 52, 48, 504 52, 52, 52, 52, 52, 48, 52, 52, 52, 52, 52, 24, 12, 28, 12, 12, 505 12, 56, 60, 60, 60, 56, 60, 60, 60, 56, 60, 60, 60, 60, 60, 56, 506 60, 60, 60, 60, 60, 56, 60, 60, 60, 60, 60, 24, 12, 28, 12, 0, 507 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 508 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 509 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 510 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 511 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 512 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 513 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 514 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3, 2, 3 515 }; 516 size_t const context_id_lut1[256] { 517 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 518 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 519 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 520 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 521 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 522 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 523 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 524 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 1, 1, 1, 1, 0, 525 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 526 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 527 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 528 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 529 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 530 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 531 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 532 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2 533 }; 534 size_t const context_id_lut2[256] { 535 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 536 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 537 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 538 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 539 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 540 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 541 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 542 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 543 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 544 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 545 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 546 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 547 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 548 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 549 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 550 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7 551 }; 552 553 size_t context_mode = m_literal_context_modes[m_literal_block.type]; 554 size_t context_id; 555 switch (context_mode) { 556 case 0: 557 context_id = m_lookback_buffer.value().lookback(1, 0) & 0x3f; 558 break; 559 case 1: 560 context_id = m_lookback_buffer.value().lookback(1, 0) >> 2; 561 break; 562 case 2: 563 context_id = context_id_lut0[m_lookback_buffer.value().lookback(1, 0)] | context_id_lut1[m_lookback_buffer.value().lookback(2, 0)]; 564 break; 565 case 3: 566 context_id = (context_id_lut2[m_lookback_buffer.value().lookback(1, 0)] << 3) | context_id_lut2[m_lookback_buffer.value().lookback(2, 0)]; 567 break; 568 default: 569 VERIFY_NOT_REACHED(); 570 } 571 572 size_t literal_code_index = m_context_mapping_literal[64 * m_literal_block.type + context_id]; 573 return literal_code_index; 574} 575 576ErrorOr<Bytes> BrotliDecompressionStream::read_some(Bytes output_buffer) 577{ 578 size_t bytes_read = 0; 579 while (bytes_read < output_buffer.size()) { 580 if (m_current_state == State::WindowSize) { 581 size_t window_bits = TRY(read_window_length()); 582 m_window_size = (1 << window_bits) - 16; 583 584 m_lookback_buffer = TRY(LookbackBuffer::try_create(m_window_size)); 585 586 m_current_state = State::Idle; 587 } else if (m_current_state == State::Idle) { 588 // If the final block was read, we are done decompressing 589 if (m_read_final_block) 590 break; 591 592 // RFC 7932 section 9.1 593 // 594 // 1 bit: ISLAST, set to 1 if this is the last meta-block 595 m_read_final_block = TRY(m_input_stream.read_bit()); 596 if (m_read_final_block) { 597 // 1 bit: ISLASTEMPTY, if set to 1, the meta-block is empty; this 598 // field is only present if ISLAST bit is set -- if it is 1, 599 // then the meta-block and the brotli stream ends at that 600 // bit, with any remaining bits in the last byte of the 601 // compressed stream filled with zeros (if the fill bits are 602 // not zero, then the stream should be rejected as invalid) 603 bool is_last_block_empty = TRY(m_input_stream.read_bit()); 604 // If the last block is empty we are done decompressing 605 if (is_last_block_empty) 606 break; 607 } 608 609 // 2 bits: MNIBBLES, number of nibbles to represent the uncompressed 610 // length 611 size_t size_number_of_nibbles = TRY(read_size_number_of_nibbles()); 612 613 // If MNIBBLES is 0, the meta-block is empty, i.e., it does 614 // not generate any uncompressed data. In this case, the 615 // rest of the meta-block has the following format: 616 if (size_number_of_nibbles == 0) { 617 618 // 1 bit: reserved, must be zero 619 bool reserved = TRY(m_input_stream.read_bit()); 620 if (reserved) 621 return Error::from_string_literal("invalid reserved bit"); 622 623 // 2 bits: MSKIPBYTES, number of bytes to represent 624 // metadata length 625 // 626 // MSKIPBYTES * 8 bits: MSKIPLEN - 1, where MSKIPLEN is 627 // the number of metadata bytes; this field is 628 // only present if MSKIPBYTES is positive; 629 // otherwise, MSKIPLEN is 0 (if MSKIPBYTES is 630 // greater than 1, and the last byte is all 631 // zeros, then the stream should be rejected as 632 // invalid) 633 size_t skip_bytes = TRY(m_input_stream.read_bits(2)); 634 if (skip_bytes == 0) { 635 // 0..7 bits: fill bits until the next byte boundary, 636 // must be all zeros 637 u8 remainder = m_input_stream.align_to_byte_boundary(); 638 if (remainder != 0) 639 return Error::from_string_literal("remainder bits are non-zero"); 640 continue; 641 } 642 643 // MSKIPLEN bytes of metadata, not part of the 644 // uncompressed data or the sliding window 645 size_t skip_length = 1 + TRY(m_input_stream.read_bits(8 * skip_bytes)); 646 647 u8 remainder = m_input_stream.align_to_byte_boundary(); 648 if (remainder != 0) 649 return Error::from_string_literal("remainder bits are non-zero"); 650 651 // Discard meta-data bytes 652 u8 temp_buffer[4096]; 653 Bytes temp_bytes { temp_buffer, 4096 }; 654 while (skip_length > 0) { 655 Bytes temp_bytes_slice = temp_bytes.slice(0, min(4096, skip_length)); 656 auto metadata_bytes = TRY(m_input_stream.read_some(temp_bytes_slice)); 657 if (metadata_bytes.is_empty()) 658 return Error::from_string_literal("eof"); 659 if (metadata_bytes.last() == 0) 660 return Error::from_string_literal("invalid stream"); 661 skip_length -= metadata_bytes.size(); 662 } 663 664 continue; 665 } 666 667 size_t uncompressed_size = 1 + TRY(m_input_stream.read_bits(4 * size_number_of_nibbles)); 668 669 // 1 bit: ISUNCOMPRESSED, if set to 1, any bits of compressed data 670 // up to the next byte boundary are ignored, and the rest of 671 // the meta-block contains MLEN bytes of literal data; this 672 // field is only present if the ISLAST bit is not set (if the 673 // ignored bits are not all zeros, the stream should be 674 // rejected as invalid) 675 bool is_uncompressed = false; 676 if (!m_read_final_block) 677 is_uncompressed = TRY(m_input_stream.read_bit()); 678 679 m_bytes_left = uncompressed_size; 680 if (is_uncompressed) { 681 u8 remainder = m_input_stream.align_to_byte_boundary(); 682 if (remainder != 0) 683 return Error::from_string_literal("remainder is non-zero"); 684 m_current_state = State::UncompressedData; 685 } else { 686 TRY(read_block_configuration(m_literal_block)); 687 TRY(read_block_configuration(m_insert_and_copy_block)); 688 TRY(read_block_configuration(m_distance_block)); 689 690 m_postfix_bits = TRY(m_input_stream.read_bits(2)); 691 m_direct_distances = TRY(m_input_stream.read_bits(4)) << m_postfix_bits; 692 693 m_literal_context_modes.clear(); 694 for (size_t i = 0; i < m_literal_block.number_of_types; i++) { 695 size_t context_mode = TRY(m_input_stream.read_bits(2)); 696 m_literal_context_modes.append(context_mode); 697 } 698 699 m_context_mapping_literal.clear(); 700 size_t number_of_literal_codes = TRY(read_variable_length()); 701 if (number_of_literal_codes == 1) { 702 for (size_t i = 0; i < 64 * m_literal_block.number_of_types; i++) 703 m_context_mapping_literal.append(0); 704 } else { 705 TRY(read_context_map(number_of_literal_codes, m_context_mapping_literal, 64 * m_literal_block.number_of_types)); 706 } 707 708 m_context_mapping_distance.clear(); 709 size_t number_of_distance_codes = TRY(read_variable_length()); 710 if (number_of_distance_codes == 1) { 711 for (size_t i = 0; i < 4 * m_distance_block.number_of_types; i++) 712 m_context_mapping_distance.append(0); 713 } else { 714 TRY(read_context_map(number_of_distance_codes, m_context_mapping_distance, 4 * m_distance_block.number_of_types)); 715 } 716 717 m_literal_codes.clear(); 718 for (size_t i = 0; i < number_of_literal_codes; i++) { 719 CanonicalCode code; 720 TRY(read_prefix_code(code, 256)); 721 m_literal_codes.append(move(code)); 722 } 723 724 m_insert_and_copy_codes.clear(); 725 for (size_t i = 0; i < m_insert_and_copy_block.number_of_types; i++) { 726 CanonicalCode code; 727 TRY(read_prefix_code(code, 704)); 728 m_insert_and_copy_codes.append(move(code)); 729 } 730 731 m_distance_codes.clear(); 732 for (size_t i = 0; i < number_of_distance_codes; i++) { 733 CanonicalCode code; 734 TRY(read_prefix_code(code, 16 + m_direct_distances + (48 << m_postfix_bits))); 735 m_distance_codes.append(move(code)); 736 } 737 738 m_current_state = State::CompressedCommand; 739 } 740 } else if (m_current_state == State::UncompressedData) { 741 size_t number_of_fitting_bytes = min(output_buffer.size() - bytes_read, m_bytes_left); 742 VERIFY(number_of_fitting_bytes > 0); 743 744 auto uncompressed_bytes = TRY(m_input_stream.read_some(output_buffer.slice(bytes_read, number_of_fitting_bytes))); 745 if (uncompressed_bytes.is_empty()) 746 return Error::from_string_literal("eof"); 747 748 m_bytes_left -= uncompressed_bytes.size(); 749 bytes_read += uncompressed_bytes.size(); 750 751 // If all bytes were read, return to the idle state 752 if (m_bytes_left == 0) 753 m_current_state = State::Idle; 754 } else if (m_current_state == State::CompressedCommand) { 755 if (m_insert_and_copy_block.length == 0) { 756 TRY(block_read_new_state(m_insert_and_copy_block)); 757 } 758 m_insert_and_copy_block.length--; 759 760 size_t insert_and_copy_symbol = TRY(m_insert_and_copy_codes[m_insert_and_copy_block.type].read_symbol(m_input_stream)); 761 762 size_t const insert_length_code_base[11] { 0, 0, 0, 0, 8, 8, 0, 16, 8, 16, 16 }; 763 size_t const copy_length_code_base[11] { 0, 8, 0, 8, 0, 8, 16, 0, 16, 8, 16 }; 764 bool const implicit_zero_distance[11] { true, true, false, false, false, false, false, false, false, false, false }; 765 766 size_t insert_and_copy_index = insert_and_copy_symbol >> 6; 767 size_t insert_length_code_offset = (insert_and_copy_symbol >> 3) & 0b111; 768 size_t copy_length_code_offset = insert_and_copy_symbol & 0b111; 769 770 size_t insert_length_code = insert_length_code_base[insert_and_copy_index] + insert_length_code_offset; 771 size_t copy_length_code = copy_length_code_base[insert_and_copy_index] + copy_length_code_offset; 772 773 m_implicit_zero_distance = implicit_zero_distance[insert_and_copy_index]; 774 775 size_t const insert_length_base[24] { 0, 1, 2, 3, 4, 5, 6, 8, 10, 14, 18, 26, 34, 50, 66, 98, 130, 194, 322, 578, 1090, 2114, 6210, 22594 }; 776 size_t const insert_length_extra[24] { 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 12, 14, 24 }; 777 size_t const copy_length_base[24] { 2, 3, 4, 5, 6, 7, 8, 9, 10, 12, 14, 18, 22, 30, 38, 54, 70, 102, 134, 198, 326, 582, 1094, 2118 }; 778 size_t const copy_length_extra[24] { 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 9, 10, 24 }; 779 780 m_insert_length = insert_length_base[insert_length_code] + TRY(m_input_stream.read_bits(insert_length_extra[insert_length_code])); 781 m_copy_length = copy_length_base[copy_length_code] + TRY(m_input_stream.read_bits(copy_length_extra[copy_length_code])); 782 783 if (m_insert_length > 0) { 784 m_current_state = State::CompressedLiteral; 785 } else { 786 m_current_state = State::CompressedDistance; 787 } 788 } else if (m_current_state == State::CompressedLiteral) { 789 if (m_literal_block.length == 0) { 790 TRY(block_read_new_state(m_literal_block)); 791 } 792 m_literal_block.length--; 793 794 size_t literal_code_index = literal_code_index_from_context(); 795 size_t literal_value = TRY(m_literal_codes[literal_code_index].read_symbol(m_input_stream)); 796 797 output_buffer[bytes_read] = literal_value; 798 m_lookback_buffer.value().write(literal_value); 799 bytes_read++; 800 m_insert_length--; 801 m_bytes_left--; 802 803 if (m_bytes_left == 0) 804 m_current_state = State::Idle; 805 else if (m_insert_length == 0) 806 m_current_state = State::CompressedDistance; 807 } else if (m_current_state == State::CompressedDistance) { 808 size_t distance_symbol; 809 if (m_implicit_zero_distance) { 810 distance_symbol = 0; 811 } else { 812 if (m_distance_block.length == 0) { 813 TRY(block_read_new_state(m_distance_block)); 814 } 815 m_distance_block.length--; 816 817 size_t context_id = clamp(m_copy_length - 2, 0, 3); 818 size_t distance_code_index = m_context_mapping_distance[4 * m_distance_block.type + context_id]; 819 820 distance_symbol = TRY(m_distance_codes[distance_code_index].read_symbol(m_input_stream)); 821 } 822 823 size_t distance; 824 bool reuse_previous_distance = false; 825 if (distance_symbol < 16) { 826 switch (distance_symbol) { 827 case 0: 828 distance = m_distances[0]; 829 reuse_previous_distance = true; 830 break; 831 case 1: 832 distance = m_distances[1]; 833 break; 834 case 2: 835 distance = m_distances[2]; 836 break; 837 case 3: 838 distance = m_distances[3]; 839 break; 840 case 4: 841 distance = m_distances[0] - 1; 842 break; 843 case 5: 844 distance = m_distances[0] + 1; 845 break; 846 case 6: 847 distance = m_distances[0] - 2; 848 break; 849 case 7: 850 distance = m_distances[0] + 2; 851 break; 852 case 8: 853 distance = m_distances[0] - 3; 854 break; 855 case 9: 856 distance = m_distances[0] + 3; 857 break; 858 case 10: 859 distance = m_distances[1] - 1; 860 break; 861 case 11: 862 distance = m_distances[1] + 1; 863 break; 864 case 12: 865 distance = m_distances[1] - 2; 866 break; 867 case 13: 868 distance = m_distances[1] + 2; 869 break; 870 case 14: 871 distance = m_distances[1] - 3; 872 break; 873 case 15: 874 distance = m_distances[1] + 3; 875 break; 876 } 877 } else if (distance_symbol < 16 + m_direct_distances) { 878 distance = distance_symbol - 15; 879 } else { 880 size_t POSTFIX_MASK = (1 << m_postfix_bits) - 1; 881 882 size_t ndistbits = 1 + ((distance_symbol - m_direct_distances - 16) >> (m_postfix_bits + 1)); 883 size_t dextra = TRY(m_input_stream.read_bits(ndistbits)); 884 885 size_t hcode = (distance_symbol - m_direct_distances - 16) >> m_postfix_bits; 886 size_t lcode = (distance_symbol - m_direct_distances - 16) & POSTFIX_MASK; 887 size_t offset = ((2 + (hcode & 1)) << ndistbits) - 4; 888 distance = ((offset + dextra) << m_postfix_bits) + lcode + m_direct_distances + 1; 889 } 890 m_distance = distance; 891 892 size_t total_written = m_lookback_buffer.value().total_written(); 893 size_t max_lookback = min(total_written, m_window_size); 894 895 if (distance > max_lookback) { 896 size_t word_index = distance - (max_lookback + 1); 897 m_dictionary_data = TRY(BrotliDictionary::lookup_word(word_index, m_copy_length)); 898 m_copy_length = m_dictionary_data.size(); 899 900 if (m_copy_length == 0) 901 m_current_state = State::CompressedCommand; 902 else 903 m_current_state = State::CompressedDictionary; 904 } else { 905 if (!reuse_previous_distance) { 906 m_distances[3] = m_distances[2]; 907 m_distances[2] = m_distances[1]; 908 m_distances[1] = m_distances[0]; 909 m_distances[0] = distance; 910 } 911 912 m_current_state = State::CompressedCopy; 913 } 914 } else if (m_current_state == State::CompressedCopy) { 915 u8 copy_value = m_lookback_buffer.value().lookback(m_distance); 916 917 output_buffer[bytes_read] = copy_value; 918 m_lookback_buffer.value().write(copy_value); 919 bytes_read++; 920 m_copy_length--; 921 m_bytes_left--; 922 923 if (m_bytes_left == 0) 924 m_current_state = State::Idle; 925 else if (m_copy_length == 0) 926 m_current_state = State::CompressedCommand; 927 } else if (m_current_state == State::CompressedDictionary) { 928 size_t offset = m_dictionary_data.size() - m_copy_length; 929 u8 dictionary_value = m_dictionary_data[offset]; 930 931 output_buffer[bytes_read] = dictionary_value; 932 m_lookback_buffer.value().write(dictionary_value); 933 bytes_read++; 934 m_copy_length--; 935 m_bytes_left--; 936 937 if (m_bytes_left == 0) 938 m_current_state = State::Idle; 939 else if (m_copy_length == 0) 940 m_current_state = State::CompressedCommand; 941 } 942 } 943 944 return output_buffer.slice(0, bytes_read); 945} 946 947bool BrotliDecompressionStream::is_eof() const 948{ 949 return m_read_final_block && m_current_state == State::Idle; 950} 951 952}