Serenity Operating System
at master 1808 lines 102 kB view raw
1/* 2 * Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com> 3 * Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com> 4 * 5 * SPDX-License-Identifier: BSD-2-Clause 6 */ 7 8#include <AK/IntegralMath.h> 9#include <LibGfx/Size.h> 10#include <LibVideo/Color/CodingIndependentCodePoints.h> 11 12#include "Context.h" 13#include "Decoder.h" 14#include "Utilities.h" 15 16#if defined(AK_COMPILER_GCC) 17# pragma GCC optimize("O3") 18#endif 19 20namespace Video::VP9 { 21 22Decoder::Decoder() 23 : m_parser(make<Parser>(*this)) 24{ 25} 26 27DecoderErrorOr<void> Decoder::receive_sample(ReadonlyBytes chunk_data) 28{ 29 auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data); 30 31 if (superframe_sizes.is_empty()) { 32 return decode_frame(chunk_data); 33 } 34 35 size_t offset = 0; 36 37 for (auto superframe_size : superframe_sizes) { 38 auto checked_size = Checked<size_t>(superframe_size); 39 checked_size += offset; 40 if (checked_size.has_overflow() || checked_size.value() > chunk_data.size()) 41 return DecoderError::with_description(DecoderErrorCategory::Corrupted, "Superframe size invalid"sv); 42 auto frame_data = chunk_data.slice(offset, superframe_size); 43 TRY(decode_frame(frame_data)); 44 offset = checked_size.value(); 45 } 46 47 return {}; 48} 49 50inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride) 51{ 52 return row * stride + column; 53} 54 55DecoderErrorOr<void> Decoder::decode_frame(ReadonlyBytes frame_data) 56{ 57 // 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax 58 // tables include function calls indicating when the block decode processes should be triggered. 59 auto frame_context = TRY(m_parser->parse_frame(frame_data)); 60 61 // 2. If loop_filter_level is not equal to 0, the loop filter process as specified in section 8.8 is invoked once the 62 // coded frame has been decoded. 63 // FIXME: Implement loop filtering. 64 65 // 3. If all of the following conditions are true, PrevSegmentIds[ row ][ col ] is set equal to 66 // SegmentIds[ row ][ col ] for row = 0..MiRows-1, for col = 0..MiCols-1: 67 // − show_existing_frame is equal to 0, 68 // − segmentation_enabled is equal to 1, 69 // − segmentation_update_map is equal to 1. 70 // This is handled by update_reference_frames. 71 72 // 4. The output process as specified in section 8.9 is invoked. 73 if (frame_context.shows_a_frame()) 74 TRY(create_video_frame(frame_context)); 75 76 // 5. The reference frame update process as specified in section 8.10 is invoked. 77 TRY(update_reference_frames(frame_context)); 78 return {}; 79} 80 81inline CodingIndependentCodePoints get_cicp_color_space(FrameContext const& frame_context) 82{ 83 ColorPrimaries color_primaries; 84 TransferCharacteristics transfer_characteristics; 85 MatrixCoefficients matrix_coefficients; 86 87 switch (frame_context.color_config.color_space) { 88 case ColorSpace::Unknown: 89 color_primaries = ColorPrimaries::Unspecified; 90 transfer_characteristics = TransferCharacteristics::Unspecified; 91 matrix_coefficients = MatrixCoefficients::Unspecified; 92 break; 93 case ColorSpace::Bt601: 94 color_primaries = ColorPrimaries::BT601; 95 transfer_characteristics = TransferCharacteristics::BT601; 96 matrix_coefficients = MatrixCoefficients::BT601; 97 break; 98 case ColorSpace::Bt709: 99 color_primaries = ColorPrimaries::BT709; 100 transfer_characteristics = TransferCharacteristics::BT709; 101 matrix_coefficients = MatrixCoefficients::BT709; 102 break; 103 case ColorSpace::Smpte170: 104 // https://www.kernel.org/doc/html/v4.9/media/uapi/v4l/pixfmt-007.html#colorspace-smpte-170m-v4l2-colorspace-smpte170m 105 color_primaries = ColorPrimaries::BT601; 106 transfer_characteristics = TransferCharacteristics::BT709; 107 matrix_coefficients = MatrixCoefficients::BT601; 108 break; 109 case ColorSpace::Smpte240: 110 color_primaries = ColorPrimaries::SMPTE240; 111 transfer_characteristics = TransferCharacteristics::SMPTE240; 112 matrix_coefficients = MatrixCoefficients::SMPTE240; 113 break; 114 case ColorSpace::Bt2020: 115 color_primaries = ColorPrimaries::BT2020; 116 // Bit depth doesn't actually matter to our transfer functions since we 117 // convert in floats of range 0-1 (for now?), but just for correctness set 118 // the TC to match the bit depth here. 119 if (frame_context.color_config.bit_depth == 12) 120 transfer_characteristics = TransferCharacteristics::BT2020BitDepth12; 121 else if (frame_context.color_config.bit_depth == 10) 122 transfer_characteristics = TransferCharacteristics::BT2020BitDepth10; 123 else 124 transfer_characteristics = TransferCharacteristics::BT709; 125 matrix_coefficients = MatrixCoefficients::BT2020NonConstantLuminance; 126 break; 127 case ColorSpace::RGB: 128 color_primaries = ColorPrimaries::BT709; 129 transfer_characteristics = TransferCharacteristics::Linear; 130 matrix_coefficients = MatrixCoefficients::Identity; 131 break; 132 case ColorSpace::Reserved: 133 VERIFY_NOT_REACHED(); 134 break; 135 } 136 137 return { color_primaries, transfer_characteristics, matrix_coefficients, frame_context.color_config.color_range }; 138} 139 140DecoderErrorOr<void> Decoder::create_video_frame(FrameContext const& frame_context) 141{ 142 // (8.9) Output process 143 144 // FIXME: If show_existing_frame is set, output from FrameStore[frame_to_show_map_index] here instead. 145 146 // FIXME: The math isn't entirely accurate to spec. output_uv_size is probably incorrect for certain 147 // sizes, as the spec seems to prefer that the halved sizes be ceiled. 148 u32 decoded_y_width = frame_context.columns() * 8; 149 Gfx::Size<u32> output_y_size = frame_context.size(); 150 auto decoded_uv_width = decoded_y_width >> frame_context.color_config.subsampling_x; 151 Gfx::Size<u32> output_uv_size = { 152 output_y_size.width() >> frame_context.color_config.subsampling_x, 153 output_y_size.height() >> frame_context.color_config.subsampling_y, 154 }; 155 Array<FixedArray<u16>, 3> output_buffers = { 156 DECODER_TRY_ALLOC(FixedArray<u16>::create(output_y_size.width() * output_y_size.height())), 157 DECODER_TRY_ALLOC(FixedArray<u16>::create(output_uv_size.width() * output_uv_size.height())), 158 DECODER_TRY_ALLOC(FixedArray<u16>::create(output_uv_size.width() * output_uv_size.height())), 159 }; 160 for (u8 plane = 0; plane < 3; plane++) { 161 auto& buffer = output_buffers[plane]; 162 auto decoded_width = plane == 0 ? decoded_y_width : decoded_uv_width; 163 auto output_size = plane == 0 ? output_y_size : output_uv_size; 164 auto const& decoded_buffer = get_output_buffer(plane); 165 166 for (u32 row = 0; row < output_size.height(); row++) { 167 memcpy( 168 buffer.data() + row * output_size.width(), 169 decoded_buffer.data() + row * decoded_width, 170 output_size.width() * sizeof(*buffer.data())); 171 } 172 } 173 174 auto frame = DECODER_TRY_ALLOC(adopt_nonnull_own_or_enomem(new (nothrow) SubsampledYUVFrame( 175 { output_y_size.width(), output_y_size.height() }, 176 frame_context.color_config.bit_depth, get_cicp_color_space(frame_context), 177 frame_context.color_config.subsampling_x, frame_context.color_config.subsampling_y, 178 output_buffers[0], output_buffers[1], output_buffers[2]))); 179 m_video_frame_queue.enqueue(move(frame)); 180 181 return {}; 182} 183 184inline size_t buffer_size(size_t width, size_t height) 185{ 186 return width * height; 187} 188 189inline size_t buffer_size(Gfx::Size<size_t> size) 190{ 191 return buffer_size(size.width(), size.height()); 192} 193 194DecoderErrorOr<void> Decoder::allocate_buffers(FrameContext const& frame_context) 195{ 196 for (size_t plane = 0; plane < 3; plane++) { 197 auto size = m_parser->get_decoded_size_for_plane(frame_context, plane); 198 199 auto& output_buffer = get_output_buffer(plane); 200 output_buffer.clear_with_capacity(); 201 DECODER_TRY_ALLOC(output_buffer.try_resize_and_keep_capacity(buffer_size(size))); 202 } 203 return {}; 204} 205 206Vector<u16>& Decoder::get_output_buffer(u8 plane) 207{ 208 return m_output_buffers[plane]; 209} 210 211DecoderErrorOr<NonnullOwnPtr<VideoFrame>> Decoder::get_decoded_frame() 212{ 213 if (m_video_frame_queue.is_empty()) 214 return DecoderError::format(DecoderErrorCategory::NeedsMoreInput, "No video frame in queue."); 215 216 return m_video_frame_queue.dequeue(); 217} 218 219u8 Decoder::merge_prob(u8 pre_prob, u32 count_0, u32 count_1, u8 count_sat, u8 max_update_factor) 220{ 221 auto total_decode_count = count_0 + count_1; 222 u8 prob = 128; 223 if (total_decode_count != 0) { 224 prob = static_cast<u8>(clip_3(1u, 255u, (count_0 * 256 + (total_decode_count >> 1)) / total_decode_count)); 225 } 226 auto count = min(total_decode_count, count_sat); 227 auto factor = (max_update_factor * count) / count_sat; 228 return rounded_right_shift(pre_prob * (256 - factor) + (prob * factor), 8); 229} 230 231u32 Decoder::merge_probs(int const* tree, int index, u8* probs, u32* counts, u8 count_sat, u8 max_update_factor) 232{ 233 auto s = tree[index]; 234 auto left_count = (s <= 0) ? counts[-s] : merge_probs(tree, s, probs, counts, count_sat, max_update_factor); 235 auto r = tree[index + 1]; 236 auto right_count = (r <= 0) ? counts[-r] : merge_probs(tree, r, probs, counts, count_sat, max_update_factor); 237 probs[index >> 1] = merge_prob(probs[index >> 1], left_count, right_count, count_sat, max_update_factor); 238 return left_count + right_count; 239} 240 241DecoderErrorOr<void> Decoder::adapt_coef_probs(bool is_inter_predicted_frame) 242{ 243 u8 update_factor; 244 if (!is_inter_predicted_frame || m_parser->m_previous_frame_type != FrameType::KeyFrame) 245 update_factor = 112; 246 else 247 update_factor = 128; 248 249 for (size_t t = 0; t < 4; t++) { 250 for (size_t i = 0; i < 2; i++) { 251 for (size_t j = 0; j < 2; j++) { 252 for (size_t k = 0; k < 6; k++) { 253 size_t max_l = (k == 0) ? 3 : 6; 254 for (size_t l = 0; l < max_l; l++) { 255 auto& coef_probs = m_parser->m_probability_tables->coef_probs()[t][i][j][k][l]; 256 merge_probs(small_token_tree, 2, coef_probs, 257 m_parser->m_syntax_element_counter->m_counts_token[t][i][j][k][l], 258 24, update_factor); 259 merge_probs(binary_tree, 0, coef_probs, 260 m_parser->m_syntax_element_counter->m_counts_more_coefs[t][i][j][k][l], 261 24, update_factor); 262 } 263 } 264 } 265 } 266 } 267 268 return {}; 269} 270 271#define ADAPT_PROB_TABLE(name, size) \ 272 do { \ 273 for (size_t i = 0; i < (size); i++) { \ 274 auto table = probs.name##_prob(); \ 275 table[i] = adapt_prob(table[i], counter.m_counts_##name[i]); \ 276 } \ 277 } while (0) 278 279#define ADAPT_TREE(tree_name, prob_name, count_name, size) \ 280 do { \ 281 for (size_t i = 0; i < (size); i++) { \ 282 adapt_probs(tree_name##_tree, probs.prob_name##_probs()[i], counter.m_counts_##count_name[i]); \ 283 } \ 284 } while (0) 285 286DecoderErrorOr<void> Decoder::adapt_non_coef_probs(FrameContext const& frame_context) 287{ 288 auto& probs = *m_parser->m_probability_tables; 289 auto& counter = *m_parser->m_syntax_element_counter; 290 ADAPT_PROB_TABLE(is_inter, IS_INTER_CONTEXTS); 291 ADAPT_PROB_TABLE(comp_mode, COMP_MODE_CONTEXTS); 292 ADAPT_PROB_TABLE(comp_ref, REF_CONTEXTS); 293 for (size_t i = 0; i < REF_CONTEXTS; i++) { 294 for (size_t j = 0; j < 2; j++) 295 probs.single_ref_prob()[i][j] = adapt_prob(probs.single_ref_prob()[i][j], counter.m_counts_single_ref[i][j]); 296 } 297 ADAPT_TREE(inter_mode, inter_mode, inter_mode, INTER_MODE_CONTEXTS); 298 ADAPT_TREE(intra_mode, y_mode, intra_mode, BLOCK_SIZE_GROUPS); 299 ADAPT_TREE(intra_mode, uv_mode, uv_mode, INTRA_MODES); 300 ADAPT_TREE(partition, partition, partition, PARTITION_CONTEXTS); 301 ADAPT_PROB_TABLE(skip, SKIP_CONTEXTS); 302 if (frame_context.interpolation_filter == Switchable) { 303 ADAPT_TREE(interp_filter, interp_filter, interp_filter, INTERP_FILTER_CONTEXTS); 304 } 305 if (frame_context.transform_mode == TransformMode::Select) { 306 for (size_t i = 0; i < TX_SIZE_CONTEXTS; i++) { 307 auto& tx_probs = probs.tx_probs(); 308 auto& tx_counts = counter.m_counts_tx_size; 309 adapt_probs(tx_size_8_tree, tx_probs[Transform_8x8][i], tx_counts[Transform_8x8][i]); 310 adapt_probs(tx_size_16_tree, tx_probs[Transform_16x16][i], tx_counts[Transform_16x16][i]); 311 adapt_probs(tx_size_32_tree, tx_probs[Transform_32x32][i], tx_counts[Transform_32x32][i]); 312 } 313 } 314 adapt_probs(mv_joint_tree, probs.mv_joint_probs(), counter.m_counts_mv_joint); 315 for (size_t i = 0; i < 2; i++) { 316 probs.mv_sign_prob()[i] = adapt_prob(probs.mv_sign_prob()[i], counter.m_counts_mv_sign[i]); 317 adapt_probs(mv_class_tree, probs.mv_class_probs()[i], counter.m_counts_mv_class[i]); 318 probs.mv_class0_bit_prob()[i] = adapt_prob(probs.mv_class0_bit_prob()[i], counter.m_counts_mv_class0_bit[i]); 319 for (size_t j = 0; j < MV_OFFSET_BITS; j++) 320 probs.mv_bits_prob()[i][j] = adapt_prob(probs.mv_bits_prob()[i][j], counter.m_counts_mv_bits[i][j]); 321 for (size_t j = 0; j < CLASS0_SIZE; j++) 322 adapt_probs(mv_fr_tree, probs.mv_class0_fr_probs()[i][j], counter.m_counts_mv_class0_fr[i][j]); 323 adapt_probs(mv_fr_tree, probs.mv_fr_probs()[i], counter.m_counts_mv_fr[i]); 324 if (frame_context.high_precision_motion_vectors_allowed) { 325 probs.mv_class0_hp_prob()[i] = adapt_prob(probs.mv_class0_hp_prob()[i], counter.m_counts_mv_class0_hp[i]); 326 probs.mv_hp_prob()[i] = adapt_prob(probs.mv_hp_prob()[i], counter.m_counts_mv_hp[i]); 327 } 328 } 329 return {}; 330} 331 332void Decoder::adapt_probs(int const* tree, u8* probs, u32* counts) 333{ 334 merge_probs(tree, 0, probs, counts, COUNT_SAT, MAX_UPDATE_FACTOR); 335} 336 337u8 Decoder::adapt_prob(u8 prob, u32 counts[2]) 338{ 339 return merge_prob(prob, counts[0], counts[1], COUNT_SAT, MAX_UPDATE_FACTOR); 340} 341 342DecoderErrorOr<void> Decoder::predict_intra(u8 plane, BlockContext const& block_context, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TransformSize tx_size, u32 block_index) 343{ 344 auto& frame_buffer = get_output_buffer(plane); 345 346 // 8.5.1 Intra prediction process 347 348 // The intra prediction process is invoked for intra coded blocks to predict a part of the block corresponding to a 349 // transform block. When the transform size is smaller than the block size, this process can be invoked multiple 350 // times within a single block for the same plane, and the invocations are in raster order within the block. 351 352 // The variable mode is specified by: 353 // 1. If plane is greater than 0, mode is set equal to uv_mode. 354 // 2. Otherwise, if MiSize is greater than or equal to BLOCK_8X8, mode is set equal to y_mode. 355 // 3. Otherwise, mode is set equal to sub_modes[ blockIdx ]. 356 PredictionMode mode; 357 if (plane > 0) 358 mode = block_context.uv_prediction_mode; 359 else if (block_context.size >= Block_8x8) 360 mode = block_context.y_prediction_mode(); 361 else 362 mode = block_context.sub_block_prediction_modes[block_index]; 363 364 // The variable log2Size specifying the base 2 logarithm of the width of the transform block is set equal to txSz + 2. 365 u8 log2_of_block_size = tx_size + 2; 366 // The variable size is set equal to 1 << log2Size. 367 u8 block_size = 1 << log2_of_block_size; 368 369 // The variable maxX is set equal to (MiCols * 8) - 1. 370 // The variable maxY is set equal to (MiRows * 8) - 1. 371 // If plane is greater than 0, then: 372 // − maxX is set equal to ((MiCols * 8) >> subsampling_x) - 1. 373 // − maxY is set equal to ((MiRows * 8) >> subsampling_y) - 1. 374 auto subsampling_x = plane > 0 ? block_context.frame_context.color_config.subsampling_x : false; 375 auto subsampling_y = plane > 0 ? block_context.frame_context.color_config.subsampling_y : false; 376 auto max_x = ((block_context.frame_context.columns() * 8u) >> subsampling_x) - 1u; 377 auto max_y = ((block_context.frame_context.rows() * 8u) >> subsampling_y) - 1u; 378 379 auto const frame_buffer_at = [&](u32 row, u32 column) -> u16& { 380 const auto frame_stride = max_x + 1u; 381 return frame_buffer[index_from_row_and_column(row, column, frame_stride)]; 382 }; 383 384 // The array aboveRow[ i ] for i = 0..size-1 is specified by: 385 // .. 386 // The array aboveRow[ i ] for i = size..2*size-1 is specified by: 387 // .. 388 // The array aboveRow[ i ] for i = -1 is specified by: 389 // .. 390 391 // NOTE: above_row is an array ranging from 0 to (2*block_size). 392 // There are three sections to the array: 393 // - [0] 394 // - [1 .. block_size] 395 // - [block_size + 1 .. block_size * 2] 396 // The array indices must be offset by 1 to accommodate index -1. 397 Array<Intermediate, maximum_block_dimensions * 2 + 1> above_row; 398 auto above_row_at = [&](i32 index) -> Intermediate& { 399 return above_row[index + 1]; 400 }; 401 402 // NOTE: This value is pre-calculated since it is reused in spec below. 403 // Use this to replace spec text "(1<<(BitDepth-1))". 404 Intermediate half_sample_value = (1 << (block_context.frame_context.color_config.bit_depth - 1)); 405 406 // The array aboveRow[ i ] for i = 0..size-1 is specified by: 407 if (!have_above) { 408 // 1. If haveAbove is equal to 0, aboveRow[ i ] is set equal to (1<<(BitDepth-1)) - 1. 409 // FIXME: Use memset? 410 for (auto i = 0u; i < block_size; i++) 411 above_row_at(i) = half_sample_value - 1; 412 } else { 413 // 2. Otherwise, aboveRow[ i ] is set equal to CurrFrame[ plane ][ y-1 ][ Min(maxX, x+i) ]. 414 for (auto i = 0u; i < block_size; i++) 415 above_row_at(i) = frame_buffer_at(y - 1, min(max_x, x + i)); 416 } 417 418 // The array aboveRow[ i ] for i = size..2*size-1 is specified by: 419 if (have_above && not_on_right && tx_size == Transform_4x4) { 420 // 1. If haveAbove is equal to 1 and notOnRight is equal to 1 and txSz is equal to 0, 421 // aboveRow[ i ] is set equal to CurrFrame[ plane ][ y-1 ][ Min(maxX, x+i) ]. 422 for (auto i = block_size; i < block_size * 2; i++) 423 above_row_at(i) = frame_buffer_at(y - 1, min(max_x, x + i)); 424 } else { 425 // 2. Otherwise, aboveRow[ i ] is set equal to aboveRow[ size-1 ]. 426 for (auto i = block_size; i < block_size * 2; i++) 427 above_row_at(i) = above_row_at(block_size - 1); 428 } 429 430 // The array aboveRow[ i ] for i = -1 is specified by: 431 if (have_above && have_left) { 432 // 1. If haveAbove is equal to 1 and haveLeft is equal to 1, aboveRow[ -1 ] is set equal to 433 // CurrFrame[ plane ][ y-1 ][ Min(maxX, x-1) ]. 434 above_row_at(-1) = frame_buffer_at(y - 1, min(max_x, x - 1)); 435 } else if (have_above) { 436 // 2. Otherwise if haveAbove is equal to 1, aboveRow[ -1] is set equal to (1<<(BitDepth-1)) + 1. 437 above_row_at(-1) = half_sample_value + 1; 438 } else { 439 // 3. Otherwise, aboveRow[ -1 ] is set equal to (1<<(BitDepth-1)) - 1 440 above_row_at(-1) = half_sample_value - 1; 441 } 442 443 // The array leftCol[ i ] for i = 0..size-1 is specified by: 444 Array<Intermediate, maximum_block_dimensions> left_column; 445 if (have_left) { 446 // − If haveLeft is equal to 1, leftCol[ i ] is set equal to CurrFrame[ plane ][ Min(maxY, y+i) ][ x-1 ]. 447 for (auto i = 0u; i < block_size; i++) 448 left_column[i] = frame_buffer_at(min(max_y, y + i), x - 1); 449 } else { 450 // − Otherwise, leftCol[ i ] is set equal to (1<<(BitDepth-1)) + 1. 451 for (auto i = 0u; i < block_size; i++) 452 left_column[i] = half_sample_value + 1; 453 } 454 455 // A 2D array named pred containing the intra predicted samples is constructed as follows: 456 Array<Intermediate, maximum_block_size> predicted_samples; 457 auto const predicted_sample_at = [&](u32 row, u32 column) -> Intermediate& { 458 return predicted_samples[index_from_row_and_column(row, column, block_size)]; 459 }; 460 461 // FIXME: One of the two below should be a simple memcpy of 1D arrays. 462 switch (mode) { 463 case PredictionMode::VPred: 464 // − If mode is equal to V_PRED, pred[ i ][ j ] is set equal to aboveRow[ j ] with j = 0..size-1 and i = 0..size-1 465 // (each row of the block is filled with a copy of aboveRow). 466 for (auto j = 0u; j < block_size; j++) { 467 for (auto i = 0u; i < block_size; i++) 468 predicted_sample_at(i, j) = above_row_at(j); 469 } 470 break; 471 case PredictionMode::HPred: 472 // − Otherwise if mode is equal to H_PRED, pred[ i ][ j ] is set equal to leftCol[ i ] with j = 0..size-1 and i = 473 // 0..size-1 (each column of the block is filled with a copy of leftCol). 474 for (auto j = 0u; j < block_size; j++) { 475 for (auto i = 0u; i < block_size; i++) 476 predicted_sample_at(i, j) = left_column[i]; 477 } 478 break; 479 case PredictionMode::D207Pred: 480 // − Otherwise if mode is equal to D207_PRED, the following applies: 481 // 1. pred[ size - 1 ][ j ] = leftCol[ size - 1] for j = 0..size-1 482 for (auto j = 0u; j < block_size; j++) 483 predicted_sample_at(block_size - 1, j) = left_column[block_size - 1]; 484 // 2. pred[ i ][ 0 ] = Round2( leftCol[ i ] + leftCol[ i + 1 ], 1 ) for i = 0..size-2 485 for (auto i = 0u; i < block_size - 1u; i++) 486 predicted_sample_at(i, 0) = rounded_right_shift(left_column[i] + left_column[i + 1], 1); 487 // 3. pred[ i ][ 1 ] = Round2( leftCol[ i ] + 2 * leftCol[ i + 1 ] + leftCol[ i + 2 ], 2 ) for i = 0..size-3 488 for (auto i = 0u; i < block_size - 2u; i++) 489 predicted_sample_at(i, 1) = rounded_right_shift(left_column[i] + (2 * left_column[i + 1]) + left_column[i + 2], 2); 490 // 4. pred[ size - 2 ][ 1 ] = Round2( leftCol[ size - 2 ] + 3 * leftCol[ size - 1 ], 2 ) 491 predicted_sample_at(block_size - 2, 1) = rounded_right_shift(left_column[block_size - 2] + (3 * left_column[block_size - 1]), 2); 492 // 5. pred[ i ][ j ] = pred[ i + 1 ][ j - 2 ] for i = (size-2)..0, for j = 2..size-1 493 // NOTE – In the last step i iterates in reverse order. 494 for (auto i = block_size - 2u;;) { 495 for (auto j = 2u; j < block_size; j++) 496 predicted_sample_at(i, j) = predicted_sample_at(i + 1, j - 2); 497 if (i == 0) 498 break; 499 i--; 500 } 501 break; 502 case PredictionMode::D45Pred: 503 // Otherwise if mode is equal to D45_PRED, 504 // for i = 0..size-1, for j = 0..size-1. 505 for (auto i = 0u; i < block_size; i++) { 506 for (auto j = 0; j < block_size; j++) { 507 // pred[ i ][ j ] is set equal to (i + j + 2 < size * 2) ? 508 if (i + j + 2 < block_size * 2) 509 // Round2( aboveRow[ i + j ] + aboveRow[ i + j + 1 ] * 2 + aboveRow[ i + j + 2 ], 2 ) : 510 predicted_sample_at(i, j) = rounded_right_shift(above_row_at(i + j) + above_row_at(i + j + 1) * 2 + above_row_at(i + j + 2), 2); 511 else 512 // aboveRow[ 2 * size - 1 ] 513 predicted_sample_at(i, j) = above_row_at(2 * block_size - 1); 514 } 515 } 516 break; 517 case PredictionMode::D63Pred: 518 // Otherwise if mode is equal to D63_PRED, 519 for (auto i = 0u; i < block_size; i++) { 520 for (auto j = 0u; j < block_size; j++) { 521 // i/2 + j 522 auto row_index = (i / 2) + j; 523 // pred[ i ][ j ] is set equal to (i & 1) ? 524 if (i & 1) 525 // Round2( aboveRow[ i/2 + j ] + aboveRow[ i/2 + j + 1 ] * 2 + aboveRow[ i/2 + j + 2 ], 2 ) : 526 predicted_sample_at(i, j) = rounded_right_shift(above_row_at(row_index) + above_row_at(row_index + 1) * 2 + above_row_at(row_index + 2), 2); 527 else 528 // Round2( aboveRow[ i/2 + j ] + aboveRow[ i/2 + j + 1 ], 1 ) for i = 0..size-1, for j = 0..size-1. 529 predicted_sample_at(i, j) = rounded_right_shift(above_row_at(row_index) + above_row_at(row_index + 1), 1); 530 } 531 } 532 break; 533 case PredictionMode::D117Pred: 534 // Otherwise if mode is equal to D117_PRED, the following applies: 535 // 1. pred[ 0 ][ j ] = Round2( aboveRow[ j - 1 ] + aboveRow[ j ], 1 ) for j = 0..size-1 536 for (auto j = 0; j < block_size; j++) 537 predicted_sample_at(0, j) = rounded_right_shift(above_row_at(j - 1) + above_row_at(j), 1); 538 // 2. pred[ 1 ][ 0 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 ) 539 predicted_sample_at(1, 0) = rounded_right_shift(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2); 540 // 3. pred[ 1 ][ j ] = Round2( aboveRow[ j - 2 ] + 2 * aboveRow[ j - 1 ] + aboveRow[ j ], 2 ) for j = 1..size-1 541 for (auto j = 1; j < block_size; j++) 542 predicted_sample_at(1, j) = rounded_right_shift(above_row_at(j - 2) + 2 * above_row_at(j - 1) + above_row_at(j), 2); 543 // 4. pred[ 2 ][ 0 ] = Round2( aboveRow[ -1 ] + 2 * leftCol[ 0 ] + leftCol[ 1 ], 2 ) 544 predicted_sample_at(2, 0) = rounded_right_shift(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2); 545 // 5. pred[ i ][ 0 ] = Round2( leftCol[ i - 3 ] + 2 * leftCol[ i - 2 ] + leftCol[ i - 1 ], 2 ) for i = 3..size-1 546 for (auto i = 3u; i < block_size; i++) 547 predicted_sample_at(i, 0) = rounded_right_shift(left_column[i - 3] + 2 * left_column[i - 2] + left_column[i - 1], 2); 548 // 6. pred[ i ][ j ] = pred[ i - 2 ][ j - 1 ] for i = 2..size-1, for j = 1..size-1 549 for (auto i = 2u; i < block_size; i++) { 550 for (auto j = 1u; j < block_size; j++) 551 predicted_sample_at(i, j) = predicted_sample_at(i - 2, j - 1); 552 } 553 break; 554 case PredictionMode::D135Pred: 555 // Otherwise if mode is equal to D135_PRED, the following applies: 556 // 1. pred[ 0 ][ 0 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 ) 557 predicted_sample_at(0, 0) = rounded_right_shift(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2); 558 // 2. pred[ 0 ][ j ] = Round2( aboveRow[ j - 2 ] + 2 * aboveRow[ j - 1 ] + aboveRow[ j ], 2 ) for j = 1..size-1 559 for (auto j = 1; j < block_size; j++) 560 predicted_sample_at(0, j) = rounded_right_shift(above_row_at(j - 2) + 2 * above_row_at(j - 1) + above_row_at(j), 2); 561 // 3. pred[ 1 ][ 0 ] = Round2( aboveRow [ -1 ] + 2 * leftCol[ 0 ] + leftCol[ 1 ], 2 ) for i = 1..size-1 562 predicted_sample_at(1, 0) = rounded_right_shift(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2); 563 // 4. pred[ i ][ 0 ] = Round2( leftCol[ i - 2 ] + 2 * leftCol[ i - 1 ] + leftCol[ i ], 2 ) for i = 2..size-1 564 for (auto i = 2u; i < block_size; i++) 565 predicted_sample_at(i, 0) = rounded_right_shift(left_column[i - 2] + 2 * left_column[i - 1] + left_column[i], 2); 566 // 5. pred[ i ][ j ] = pred[ i - 1 ][ j - 1 ] for i = 1..size-1, for j = 1..size-1 567 for (auto i = 1u; i < block_size; i++) { 568 for (auto j = 1; j < block_size; j++) 569 predicted_sample_at(i, j) = predicted_sample_at(i - 1, j - 1); 570 } 571 break; 572 case PredictionMode::D153Pred: 573 // Otherwise if mode is equal to D153_PRED, the following applies: 574 // 1. pred[ 0 ][ 0 ] = Round2( leftCol[ 0 ] + aboveRow[ -1 ], 1 ) 575 predicted_sample_at(0, 0) = rounded_right_shift(left_column[0] + above_row_at(-1), 1); 576 // 2. pred[ i ][ 0 ] = Round2( leftCol[ i - 1] + leftCol[ i ], 1 ) for i = 1..size-1 577 for (auto i = 1u; i < block_size; i++) 578 predicted_sample_at(i, 0) = rounded_right_shift(left_column[i - 1] + left_column[i], 1); 579 // 3. pred[ 0 ][ 1 ] = Round2( leftCol[ 0 ] + 2 * aboveRow[ -1 ] + aboveRow[ 0 ], 2 ) 580 predicted_sample_at(0, 1) = rounded_right_shift(left_column[0] + 2 * above_row_at(-1) + above_row_at(0), 2); 581 // 4. pred[ 1 ][ 1 ] = Round2( aboveRow[ -1 ] + 2 * leftCol [ 0 ] + leftCol [ 1 ], 2 ) 582 predicted_sample_at(1, 1) = rounded_right_shift(above_row_at(-1) + 2 * left_column[0] + left_column[1], 2); 583 // 5. pred[ i ][ 1 ] = Round2( leftCol[ i - 2 ] + 2 * leftCol[ i - 1 ] + leftCol[ i ], 2 ) for i = 2..size-1 584 for (auto i = 2u; i < block_size; i++) 585 predicted_sample_at(i, 1) = rounded_right_shift(left_column[i - 2] + 2 * left_column[i - 1] + left_column[i], 2); 586 // 6. pred[ 0 ][ j ] = Round2( aboveRow[ j - 3 ] + 2 * aboveRow[ j - 2 ] + aboveRow[ j - 1 ], 2 ) for j = 2..size-1 587 for (auto j = 2; j < block_size; j++) 588 predicted_sample_at(0, j) = rounded_right_shift(above_row_at(j - 3) + 2 * above_row_at(j - 2) + above_row_at(j - 1), 2); 589 // 7. pred[ i ][ j ] = pred[ i - 1 ][ j - 2 ] for i = 1..size-1, for j = 2..size-1 590 for (auto i = 1u; i < block_size; i++) { 591 for (auto j = 2u; j < block_size; j++) 592 predicted_sample_at(i, j) = predicted_sample_at(i - 1, j - 2); 593 } 594 break; 595 case PredictionMode::TmPred: 596 // Otherwise if mode is equal to TM_PRED, 597 // pred[ i ][ j ] is set equal to Clip1( aboveRow[ j ] + leftCol[ i ] - aboveRow[ -1 ] ) 598 // for i = 0..size-1, for j = 0..size-1. 599 for (auto i = 0u; i < block_size; i++) { 600 for (auto j = 0u; j < block_size; j++) 601 predicted_sample_at(i, j) = clip_1(block_context.frame_context.color_config.bit_depth, above_row_at(j) + left_column[i] - above_row_at(-1)); 602 } 603 break; 604 case PredictionMode::DcPred: { 605 Intermediate average = 0; 606 607 if (have_left && have_above) { 608 // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 1 and haveAbove is equal to 1, 609 // The variable avg (the average of the samples in union of aboveRow and leftCol) 610 // is specified as follows: 611 // sum = 0 612 // for ( k = 0; k < size; k++ ) { 613 // sum += leftCol[ k ] 614 // sum += aboveRow[ k ] 615 // } 616 // avg = (sum + size) >> (log2Size + 1) 617 Intermediate sum = 0; 618 for (auto k = 0u; k < block_size; k++) { 619 sum += left_column[k]; 620 sum += above_row_at(k); 621 } 622 average = (sum + block_size) >> (log2_of_block_size + 1); 623 } else if (have_left && !have_above) { 624 // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 1 and haveAbove is equal to 0, 625 // The variable leftAvg is specified as follows: 626 // sum = 0 627 // for ( k = 0; k < size; k++ ) { 628 // sum += leftCol[ k ] 629 // } 630 // leftAvg = (sum + (1 << (log2Size - 1) ) ) >> log2Size 631 Intermediate sum = 0; 632 for (auto k = 0u; k < block_size; k++) 633 sum += left_column[k]; 634 average = (sum + (1 << (log2_of_block_size - 1))) >> log2_of_block_size; 635 } else if (!have_left && have_above) { 636 // Otherwise if mode is equal to DC_PRED and haveLeft is equal to 0 and haveAbove is equal to 1, 637 // The variable aboveAvg is specified as follows: 638 // sum = 0 639 // for ( k = 0; k < size; k++ ) { 640 // sum += aboveRow[ k ] 641 // } 642 // aboveAvg = (sum + (1 << (log2Size - 1) ) ) >> log2Size 643 Intermediate sum = 0; 644 for (auto k = 0u; k < block_size; k++) 645 sum += above_row_at(k); 646 average = (sum + (1 << (log2_of_block_size - 1))) >> log2_of_block_size; 647 } else { 648 // Otherwise (mode is DC_PRED), 649 // pred[ i ][ j ] is set equal to 1<<(BitDepth - 1) with i = 0..size-1 and j = 0..size-1. 650 average = 1 << (block_context.frame_context.color_config.bit_depth - 1); 651 } 652 653 // pred[ i ][ j ] is set equal to avg with i = 0..size-1 and j = 0..size-1. 654 for (auto i = 0u; i < block_size; i++) { 655 for (auto j = 0u; j < block_size; j++) 656 predicted_sample_at(i, j) = average; 657 } 658 break; 659 } 660 default: 661 dbgln("Unknown prediction mode {}", static_cast<u8>(mode)); 662 VERIFY_NOT_REACHED(); 663 } 664 665 // The current frame is updated as follows: 666 // − CurrFrame[ plane ][ y + i ][ x + j ] is set equal to pred[ i ][ j ] for i = 0..size-1 and j = 0..size-1. 667 auto width_in_frame_buffer = min(static_cast<u32>(block_size), max_x - x + 1); 668 auto height_in_frame_buffer = min(static_cast<u32>(block_size), max_y - y + 1); 669 670 for (auto i = 0u; i < height_in_frame_buffer; i++) { 671 for (auto j = 0u; j < width_in_frame_buffer; j++) 672 frame_buffer_at(y + i, x + j) = predicted_sample_at(i, j); 673 } 674 675 return {}; 676} 677 678MotionVector Decoder::select_motion_vector(u8 plane, BlockContext const& block_context, ReferenceIndex reference_index, u32 block_index) 679{ 680 // The inputs to this process are: 681 // − a variable plane specifying which plane is being predicted, 682 // − a variable refList specifying that we should select the motion vector from BlockMvs[ refList ], 683 // − a variable blockIdx, specifying how much of the block has already been predicted in units of 4x4 samples. 684 // The output of this process is a 2 element array called mv containing the motion vector for this block. 685 686 // The purpose of this process is to find the motion vector for this block. Motion vectors are specified for each 687 // luma block, but a chroma block may cover more than one luma block due to subsampling. In this case, an 688 // average motion vector is constructed for the chroma block. 689 690 // The functions round_mv_comp_q2 and round_mv_comp_q4 perform division with rounding to the nearest 691 // integer and are specified as: 692 auto round_mv_comp_q2 = [&](MotionVector in) { 693 // return (value < 0 ? value - 1 : value + 1) / 2 694 return MotionVector { 695 (in.row() < 0 ? in.row() - 1 : in.row() + 1) >> 1, 696 (in.column() < 0 ? in.column() - 1 : in.column() + 1) >> 1 697 }; 698 }; 699 auto round_mv_comp_q4 = [&](MotionVector in) { 700 // return (value < 0 ? value - 2 : value + 2) / 4 701 return MotionVector { 702 (in.row() < 0 ? in.row() - 2 : in.row() + 2) >> 2, 703 (in.column() < 0 ? in.column() - 2 : in.column() + 2) >> 2 704 }; 705 }; 706 707 auto vectors = block_context.sub_block_motion_vectors; 708 709 // The motion vector array mv is derived as follows: 710 // − If plane is equal to 0, or MiSize is greater than or equal to BLOCK_8X8, mv is set equal to 711 // BlockMvs[ refList ][ blockIdx ]. 712 if (plane == 0 || block_context.size >= Block_8x8) 713 return vectors[block_index][reference_index]; 714 // − Otherwise, if subsampling_x is equal to 0 and subsampling_y is equal to 0, mv is set equal to 715 // BlockMvs[ refList ][ blockIdx ]. 716 if (!block_context.frame_context.color_config.subsampling_x && !block_context.frame_context.color_config.subsampling_y) 717 return vectors[block_index][reference_index]; 718 // − Otherwise, if subsampling_x is equal to 0 and subsampling_y is equal to 1, mv[ comp ] is set equal to 719 // round_mv_comp_q2( BlockMvs[ refList ][ blockIdx ][ comp ] + BlockMvs[ refList ][ blockIdx + 2 ][ comp ] ) 720 // for comp = 0..1. 721 if (!block_context.frame_context.color_config.subsampling_x && block_context.frame_context.color_config.subsampling_y) 722 return round_mv_comp_q2(vectors[block_index][reference_index] + vectors[block_index + 2][reference_index]); 723 // − Otherwise, if subsampling_x is equal to 1 and subsampling_y is equal to 0, mv[ comp ] is set equal to 724 // round_mv_comp_q2( BlockMvs[ refList ][ blockIdx ][ comp ] + BlockMvs[ refList ][ blockIdx + 1 ][ comp ] ) 725 // for comp = 0..1. 726 if (block_context.frame_context.color_config.subsampling_x && !block_context.frame_context.color_config.subsampling_y) 727 return round_mv_comp_q2(vectors[block_index][reference_index] + vectors[block_index + 1][reference_index]); 728 // − Otherwise, (subsampling_x is equal to 1 and subsampling_y is equal to 1), mv[ comp ] is set equal to 729 // round_mv_comp_q4( BlockMvs[ refList ][ 0 ][ comp ] + BlockMvs[ refList ][ 1 ][ comp ] + 730 // BlockMvs[ refList ][ 2 ][ comp ] + BlockMvs[ refList ][ 3 ][ comp ] ) for comp = 0..1. 731 VERIFY(block_context.frame_context.color_config.subsampling_x && block_context.frame_context.color_config.subsampling_y); 732 return round_mv_comp_q4(vectors[0][reference_index] + vectors[1][reference_index] 733 + vectors[2][reference_index] + vectors[3][reference_index]); 734} 735 736MotionVector Decoder::clamp_motion_vector(u8 plane, BlockContext const& block_context, u32 block_row, u32 block_column, MotionVector vector) 737{ 738 // FIXME: This function is named very similarly to Parser::clamp_mv. Rename one or the other? 739 740 // The purpose of this process is to change the motion vector into the appropriate precision for the current plane 741 // and to clamp motion vectors that go too far off the edge of the frame. 742 // The variables sx and sy are set equal to the subsampling for the current plane as follows: 743 // − If plane is equal to 0, sx is set equal to 0 and sy is set equal to 0. 744 // − Otherwise, sx is set equal to subsampling_x and sy is set equal to subsampling_y. 745 bool subsampling_x = plane > 0 ? block_context.frame_context.color_config.subsampling_x : false; 746 bool subsampling_y = plane > 0 ? block_context.frame_context.color_config.subsampling_y : false; 747 748 // The output array clampedMv is specified by the following steps: 749 i32 blocks_high = num_8x8_blocks_high_lookup[block_context.size]; 750 // Casts must be done here to prevent subtraction underflow from wrapping the values. 751 i32 mb_to_top_edge = -(static_cast<i32>(block_row * MI_SIZE) * 16) >> subsampling_y; 752 i32 mb_to_bottom_edge = (((static_cast<i32>(block_context.frame_context.rows()) - blocks_high - static_cast<i32>(block_row)) * MI_SIZE) * 16) >> subsampling_y; 753 754 i32 blocks_wide = num_8x8_blocks_wide_lookup[block_context.size]; 755 i32 mb_to_left_edge = -(static_cast<i32>(block_column * MI_SIZE) * 16) >> subsampling_x; 756 i32 mb_to_right_edge = (((static_cast<i32>(block_context.frame_context.columns()) - blocks_wide - static_cast<i32>(block_column)) * MI_SIZE) * 16) >> subsampling_x; 757 758 i32 subpel_left = (INTERP_EXTEND + ((blocks_wide * MI_SIZE) >> subsampling_x)) << SUBPEL_BITS; 759 i32 subpel_right = subpel_left - SUBPEL_SHIFTS; 760 i32 subpel_top = (INTERP_EXTEND + ((blocks_high * MI_SIZE) >> subsampling_y)) << SUBPEL_BITS; 761 i32 subpel_bottom = subpel_top - SUBPEL_SHIFTS; 762 return { 763 clip_3(mb_to_top_edge - subpel_top, mb_to_bottom_edge + subpel_bottom, (2 * vector.row()) >> subsampling_y), 764 clip_3(mb_to_left_edge - subpel_left, mb_to_right_edge + subpel_right, (2 * vector.column()) >> subsampling_x) 765 }; 766} 767 768DecoderErrorOr<void> Decoder::predict_inter_block(u8 plane, BlockContext const& block_context, ReferenceIndex reference_index, u32 block_row, u32 block_column, u32 x, u32 y, u32 width, u32 height, u32 block_index, Span<u16> block_buffer) 769{ 770 VERIFY(width <= maximum_block_dimensions && height <= maximum_block_dimensions); 771 // 2. The motion vector selection process in section 8.5.2.1 is invoked with plane, refList, blockIdx as inputs 772 // and the output being the motion vector mv. 773 auto motion_vector = select_motion_vector(plane, block_context, reference_index, block_index); 774 775 // 3. The motion vector clamping process in section 8.5.2.2 is invoked with plane, mv as inputs and the output 776 // being the clamped motion vector clampedMv 777 auto clamped_vector = clamp_motion_vector(plane, block_context, block_row, block_column, motion_vector); 778 779 // 4. The motion vector scaling process in section 8.5.2.3 is invoked with plane, refList, x, y, clampedMv as 780 // inputs and the output being the initial location startX, startY, and the step sizes stepX, stepY. 781 // 8.5.2.3 Motion vector scaling process 782 // The inputs to this process are: 783 // − a variable plane specifying which plane is being predicted, 784 // − a variable refList specifying that we should scale to match reference frame ref_frame[ refList ], 785 // − variables x and y specifying the location of the top left sample in the CurrFrame[ plane ] array of the region 786 // to be predicted, 787 // − a variable clampedMv specifying the clamped motion vector. 788 // The outputs of this process are the variables startX and startY giving the reference block location in units of 789 // 1/16 th of a sample, and variables xStep and yStep giving the step size in units of 1/16 th of a sample. 790 // This process is responsible for computing the sampling locations in the reference frame based on the motion 791 // vector. The sampling locations are also adjusted to compensate for any difference in the size of the reference 792 // frame compared to the current frame. 793 794 // A variable refIdx specifying which reference frame is being used is set equal to 795 // ref_frame_idx[ ref_frame[ refList ] - LAST_FRAME ]. 796 auto reference_frame_index = block_context.frame_context.reference_frame_indices[block_context.reference_frame_types[reference_index] - ReferenceFrameType::LastFrame]; 797 798 // It is a requirement of bitstream conformance that all the following conditions are satisfied: 799 // − 2 * FrameWidth >= RefFrameWidth[ refIdx ] 800 // − 2 * FrameHeight >= RefFrameHeight[ refIdx ] 801 // − FrameWidth <= 16 * RefFrameWidth[ refIdx ] 802 // − FrameHeight <= 16 * RefFrameHeight[ refIdx ] 803 auto& reference_frame = m_parser->m_reference_frames[reference_frame_index]; 804 if (!reference_frame.is_valid()) 805 return DecoderError::format(DecoderErrorCategory::Corrupted, "Attempted to use reference frame {} that has not been saved", reference_frame_index); 806 auto double_frame_size = block_context.frame_context.size().scaled_by(2); 807 if (double_frame_size.width() < reference_frame.size.width() || double_frame_size.height() < reference_frame.size.height()) 808 return DecoderError::format(DecoderErrorCategory::Corrupted, "Inter frame size is too small relative to reference frame {}", reference_frame_index); 809 if (!reference_frame.size.scaled_by(16).contains(block_context.frame_context.size())) 810 return DecoderError::format(DecoderErrorCategory::Corrupted, "Inter frame size is too large relative to reference frame {}", reference_frame_index); 811 812 // FIXME: Convert all the operations in this function to vector operations supported by 813 // MotionVector. 814 815 // A variable xScale is set equal to (RefFrameWidth[ refIdx ] << REF_SCALE_SHIFT) / FrameWidth. 816 // A variable yScale is set equal to (RefFrameHeight[ refIdx ] << REF_SCALE_SHIFT) / FrameHeight. 817 // (xScale and yScale specify the size of the reference frame relative to the current frame in units where 16 is 818 // equivalent to the reference frame having the same size.) 819 i32 x_scale = (reference_frame.size.width() << REF_SCALE_SHIFT) / block_context.frame_context.size().width(); 820 i32 y_scale = (reference_frame.size.height() << REF_SCALE_SHIFT) / block_context.frame_context.size().height(); 821 822 // The variable baseX is set equal to (x * xScale) >> REF_SCALE_SHIFT. 823 // The variable baseY is set equal to (y * yScale) >> REF_SCALE_SHIFT. 824 // (baseX and baseY specify the location of the block in the reference frame if a zero motion vector is used). 825 i32 base_x = (x * x_scale) >> REF_SCALE_SHIFT; 826 i32 base_y = (y * y_scale) >> REF_SCALE_SHIFT; 827 828 // The variable lumaX is set equal to (plane > 0) ? x << subsampling_x : x. 829 // The variable lumaY is set equal to (plane > 0) ? y << subsampling_y : y. 830 // (lumaX and lumaY specify the location of the block to be predicted in the current frame in units of luma 831 // samples.) 832 bool subsampling_x = plane > 0 ? block_context.frame_context.color_config.subsampling_x : false; 833 bool subsampling_y = plane > 0 ? block_context.frame_context.color_config.subsampling_y : false; 834 i32 luma_x = x << subsampling_x; 835 i32 luma_y = y << subsampling_y; 836 837 // The variable fracX is set equal to ( (16 * lumaX * xScale) >> REF_SCALE_SHIFT) & SUBPEL_MASK. 838 // The variable fracY is set equal to ( (16 * lumaY * yScale) >> REF_SCALE_SHIFT) & SUBPEL_MASK. 839 i32 frac_x = ((16 * luma_x * x_scale) >> REF_SCALE_SHIFT) & SUBPEL_MASK; 840 i32 frac_y = ((16 * luma_y * y_scale) >> REF_SCALE_SHIFT) & SUBPEL_MASK; 841 842 // The variable dX is set equal to ( (clampedMv[ 1 ] * xScale) >> REF_SCALE_SHIFT) + fracX. 843 // The variable dY is set equal to ( (clampedMv[ 0 ] * yScale) >> REF_SCALE_SHIFT) + fracY. 844 // (dX and dY specify a scaled motion vector.) 845 i32 scaled_vector_x = ((clamped_vector.column() * x_scale) >> REF_SCALE_SHIFT) + frac_x; 846 i32 scaled_vector_y = ((clamped_vector.row() * y_scale) >> REF_SCALE_SHIFT) + frac_y; 847 848 // The output variable stepX is set equal to (16 * xScale) >> REF_SCALE_SHIFT. 849 // The output variable stepY is set equal to (16 * yScale) >> REF_SCALE_SHIFT. 850 i32 scaled_step_x = (16 * x_scale) >> REF_SCALE_SHIFT; 851 i32 scaled_step_y = (16 * y_scale) >> REF_SCALE_SHIFT; 852 853 // The output variable startX is set equal to (baseX << SUBPEL_BITS) + dX. 854 // The output variable startY is set equal to (baseY << SUBPEL_BITS) + dY. 855 i32 offset_scaled_block_x = (base_x << SUBPEL_BITS) + scaled_vector_x; 856 i32 offset_scaled_block_y = (base_y << SUBPEL_BITS) + scaled_vector_y; 857 858 // 5. The block inter prediction process in section 8.5.2.4 is invoked with plane, refList, startX, startY, stepX, 859 // stepY, w, h as inputs and the output is assigned to the 2D array preds[ refList ]. 860 861 // 8.5.2.4 Block inter prediction process 862 // The inputs to this process are: 863 // − a variable plane, 864 // − a variable refList specifying that we should predict from ref_frame[ refList ], 865 // − variables x and y giving the block location in units of 1/16 th of a sample, 866 // − variables xStep and yStep giving the step size in units of 1/16 th of a sample. (These will be at most equal 867 // to 80 due to the restrictions on scaling between reference frames.) 868 static constexpr i32 MAX_SCALED_STEP = 80; 869 VERIFY(scaled_step_x <= MAX_SCALED_STEP && scaled_step_y <= MAX_SCALED_STEP); 870 // − variables w and h giving the width and height of the block in units of samples 871 // The output from this process is the 2D array named pred containing inter predicted samples. 872 873 // A variable ref specifying the reference frame contents is set equal to FrameStore[ refIdx ]. 874 auto& reference_frame_buffer = reference_frame.frame_planes[plane]; 875 auto reference_frame_width = reference_frame.size.width() >> subsampling_x; 876 auto reference_frame_buffer_at = [&](u32 row, u32 column) -> u16& { 877 return reference_frame_buffer[row * reference_frame_width + column]; 878 }; 879 880 auto block_buffer_at = [&](u32 row, u32 column) -> u16& { 881 return block_buffer[row * width + column]; 882 }; 883 884 // The variable lastX is set equal to ( (RefFrameWidth[ refIdx ] + subX) >> subX) - 1. 885 // The variable lastY is set equal to ( (RefFrameHeight[ refIdx ] + subY) >> subY) - 1. 886 // (lastX and lastY specify the coordinates of the bottom right sample of the reference plane.) 887 i32 scaled_right = ((reference_frame.size.width() + subsampling_x) >> subsampling_x) - 1; 888 i32 scaled_bottom = ((reference_frame.size.height() + subsampling_y) >> subsampling_y) - 1; 889 890 // The variable intermediateHeight specifying the height required for the intermediate array is set equal to (((h - 891 // 1) * yStep + 15) >> 4) + 8. 892 static constexpr auto maximum_intermediate_height = (((maximum_block_dimensions - 1) * MAX_SCALED_STEP + 15) >> 4) + 8; 893 auto intermediate_height = (((height - 1) * scaled_step_y + 15) >> 4) + 8; 894 VERIFY(intermediate_height <= maximum_intermediate_height); 895 // The sub-sample interpolation is effected via two one-dimensional convolutions. First a horizontal filter is used 896 // to build up a temporary array, and then this array is vertically filtered to obtain the final prediction. The 897 // fractional parts of the motion vectors determine the filtering process. If the fractional part is zero, then the 898 // filtering is equivalent to a straight sample copy. 899 // The filtering is applied as follows: 900 // The array intermediate is specified as follows: 901 // Note: Height is specified by `intermediate_height`, width is specified by `width` 902 Array<u16, maximum_intermediate_height * maximum_block_dimensions> intermediate_buffer; 903 auto intermediate_buffer_at = [&](u32 row, u32 column) -> u16& { 904 return intermediate_buffer[row * width + column]; 905 }; 906 907 for (auto row = 0u; row < intermediate_height; row++) { 908 for (auto column = 0u; column < width; column++) { 909 auto samples_start = offset_scaled_block_x + static_cast<i32>(scaled_step_x * column); 910 911 i32 accumulated_samples = 0; 912 for (auto t = 0u; t < 8u; t++) { 913 auto sample = reference_frame_buffer_at( 914 clip_3(0, scaled_bottom, (offset_scaled_block_y >> 4) + static_cast<i32>(row) - 3), 915 clip_3(0, scaled_right, (samples_start >> 4) + static_cast<i32>(t) - 3)); 916 accumulated_samples += subpel_filters[block_context.interpolation_filter][samples_start & 15][t] * sample; 917 } 918 intermediate_buffer_at(row, column) = clip_1(block_context.frame_context.color_config.bit_depth, rounded_right_shift(accumulated_samples, 7)); 919 } 920 } 921 922 for (auto row = 0u; row < height; row++) { 923 for (auto column = 0u; column < width; column++) { 924 auto samples_start = (offset_scaled_block_y & 15) + static_cast<i32>(scaled_step_y * row); 925 926 i32 accumulated_samples = 0; 927 for (auto t = 0u; t < 8u; t++) { 928 auto sample = intermediate_buffer_at((samples_start >> 4) + t, column); 929 accumulated_samples += subpel_filters[block_context.interpolation_filter][samples_start & 15][t] * sample; 930 } 931 block_buffer_at(row, column) = clip_1(block_context.frame_context.color_config.bit_depth, rounded_right_shift(accumulated_samples, 7)); 932 } 933 } 934 935 return {}; 936} 937 938DecoderErrorOr<void> Decoder::predict_inter(u8 plane, BlockContext const& block_context, u32 x, u32 y, u32 width, u32 height, u32 block_index) 939{ 940 // The inter prediction process is invoked for inter coded blocks. When MiSize is smaller than BLOCK_8X8, the 941 // prediction is done with a granularity of 4x4 samples, otherwise the whole plane is predicted at the same time. 942 // The inputs to this process are: 943 // − a variable plane specifying which plane is being predicted, 944 // − variables x and y specifying the location of the top left sample in the CurrFrame[ plane ] array of the region 945 // to be predicted, 946 // − variables w and h specifying the width and height of the region to be predicted, 947 // − a variable blockIdx, specifying how much of the block has already been predicted in units of 4x4 samples. 948 // The outputs of this process are inter predicted samples in the current frame CurrFrame. 949 950 // The prediction arrays are formed by the following ordered steps: 951 // 1. The variable refList is set equal to 0. 952 // 2. through 5. 953 Array<u16, maximum_block_size> predicted_buffer; 954 auto predicted_span = predicted_buffer.span().trim(width * height); 955 TRY(predict_inter_block(plane, block_context, ReferenceIndex::Primary, block_context.row, block_context.column, x, y, width, height, block_index, predicted_span)); 956 auto predicted_buffer_at = [&](Span<u16> buffer, u32 row, u32 column) -> u16& { 957 return buffer[row * width + column]; 958 }; 959 960 // 6. If isCompound is equal to 1, then the variable refList is set equal to 1 and steps 2, 3, 4 and 5 are repeated 961 // to form the prediction for the second reference. 962 // The inter predicted samples are then derived as follows: 963 auto& frame_buffer = get_output_buffer(plane); 964 VERIFY(!frame_buffer.is_empty()); 965 auto frame_width = (block_context.frame_context.columns() * 8u) >> (plane > 0 ? block_context.frame_context.color_config.subsampling_x : false); 966 auto frame_height = (block_context.frame_context.rows() * 8u) >> (plane > 0 ? block_context.frame_context.color_config.subsampling_y : false); 967 auto frame_buffer_at = [&](u32 row, u32 column) -> u16& { 968 return frame_buffer[row * frame_width + column]; 969 }; 970 971 auto width_in_frame_buffer = min(width, frame_width - x); 972 auto height_in_frame_buffer = min(height, frame_height - y); 973 974 // The variable isCompound is set equal to ref_frame[ 1 ] > NONE. 975 // − If isCompound is equal to 0, CurrFrame[ plane ][ y + i ][ x + j ] is set equal to preds[ 0 ][ i ][ j ] for i = 0..h-1 976 // and j = 0..w-1. 977 if (!block_context.is_compound()) { 978 for (auto i = 0u; i < height_in_frame_buffer; i++) { 979 for (auto j = 0u; j < width_in_frame_buffer; j++) 980 frame_buffer_at(y + i, x + j) = predicted_buffer_at(predicted_span, i, j); 981 } 982 983 return {}; 984 } 985 986 // − Otherwise, CurrFrame[ plane ][ y + i ][ x + j ] is set equal to Round2( preds[ 0 ][ i ][ j ] + preds[ 1 ][ i ][ j ], 1 ) 987 // for i = 0..h-1 and j = 0..w-1. 988 Array<u16, maximum_block_size> second_predicted_buffer; 989 auto second_predicted_span = second_predicted_buffer.span().trim(width * height); 990 TRY(predict_inter_block(plane, block_context, ReferenceIndex::Secondary, block_context.row, block_context.column, x, y, width, height, block_index, second_predicted_span)); 991 992 for (auto i = 0u; i < height_in_frame_buffer; i++) { 993 for (auto j = 0u; j < width_in_frame_buffer; j++) 994 frame_buffer_at(y + i, x + j) = rounded_right_shift(predicted_buffer_at(predicted_span, i, j) + predicted_buffer_at(second_predicted_span, i, j), 1); 995 } 996 997 return {}; 998} 999 1000inline u16 dc_q(u8 bit_depth, u8 b) 1001{ 1002 // The function dc_q( b ) is specified as dc_qlookup[ (BitDepth-8) >> 1 ][ Clip3( 0, 255, b ) ] where dc_lookup is 1003 // defined as follows: 1004 constexpr u16 dc_qlookup[3][256] = { 1005 { 4, 8, 8, 9, 10, 11, 12, 12, 13, 14, 15, 16, 17, 18, 19, 19, 20, 21, 22, 23, 24, 25, 26, 26, 27, 28, 29, 30, 31, 32, 32, 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, 43, 43, 44, 45, 46, 47, 48, 48, 49, 50, 51, 52, 53, 53, 54, 55, 56, 57, 57, 58, 59, 60, 61, 62, 62, 63, 64, 65, 66, 66, 67, 68, 69, 70, 70, 71, 72, 73, 74, 74, 75, 76, 77, 78, 78, 79, 80, 81, 81, 82, 83, 84, 85, 85, 87, 88, 90, 92, 93, 95, 96, 98, 99, 101, 102, 104, 105, 107, 108, 110, 111, 113, 114, 116, 117, 118, 120, 121, 123, 125, 127, 129, 131, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 154, 156, 158, 161, 164, 166, 169, 172, 174, 177, 180, 182, 185, 187, 190, 192, 195, 199, 202, 205, 208, 211, 214, 217, 220, 223, 226, 230, 233, 237, 240, 243, 247, 250, 253, 257, 261, 265, 269, 272, 276, 280, 284, 288, 292, 296, 300, 304, 309, 313, 317, 322, 326, 330, 335, 340, 344, 349, 354, 359, 364, 369, 374, 379, 384, 389, 395, 400, 406, 411, 417, 423, 429, 435, 441, 447, 454, 461, 467, 475, 482, 489, 497, 505, 513, 522, 530, 539, 549, 559, 569, 579, 590, 602, 614, 626, 640, 654, 668, 684, 700, 717, 736, 755, 775, 796, 819, 843, 869, 896, 925, 955, 988, 1022, 1058, 1098, 1139, 1184, 1232, 1282, 1336 }, 1006 { 4, 9, 10, 13, 15, 17, 20, 22, 25, 28, 31, 34, 37, 40, 43, 47, 50, 53, 57, 60, 64, 68, 71, 75, 78, 82, 86, 90, 93, 97, 101, 105, 109, 113, 116, 120, 124, 128, 132, 136, 140, 143, 147, 151, 155, 159, 163, 166, 170, 174, 178, 182, 185, 189, 193, 197, 200, 204, 208, 212, 215, 219, 223, 226, 230, 233, 237, 241, 244, 248, 251, 255, 259, 262, 266, 269, 273, 276, 280, 283, 287, 290, 293, 297, 300, 304, 307, 310, 314, 317, 321, 324, 327, 331, 334, 337, 343, 350, 356, 362, 369, 375, 381, 387, 394, 400, 406, 412, 418, 424, 430, 436, 442, 448, 454, 460, 466, 472, 478, 484, 490, 499, 507, 516, 525, 533, 542, 550, 559, 567, 576, 584, 592, 601, 609, 617, 625, 634, 644, 655, 666, 676, 687, 698, 708, 718, 729, 739, 749, 759, 770, 782, 795, 807, 819, 831, 844, 856, 868, 880, 891, 906, 920, 933, 947, 961, 975, 988, 1001, 1015, 1030, 1045, 1061, 1076, 1090, 1105, 1120, 1137, 1153, 1170, 1186, 1202, 1218, 1236, 1253, 1271, 1288, 1306, 1323, 1342, 1361, 1379, 1398, 1416, 1436, 1456, 1476, 1496, 1516, 1537, 1559, 1580, 1601, 1624, 1647, 1670, 1692, 1717, 1741, 1766, 1791, 1817, 1844, 1871, 1900, 1929, 1958, 1990, 2021, 2054, 2088, 2123, 2159, 2197, 2236, 2276, 2319, 2363, 2410, 2458, 2508, 2561, 2616, 2675, 2737, 2802, 2871, 2944, 3020, 3102, 3188, 3280, 3375, 3478, 3586, 3702, 3823, 3953, 4089, 4236, 4394, 4559, 4737, 4929, 5130, 5347 }, 1007 { 4, 12, 18, 25, 33, 41, 50, 60, 70, 80, 91, 103, 115, 127, 140, 153, 166, 180, 194, 208, 222, 237, 251, 266, 281, 296, 312, 327, 343, 358, 374, 390, 405, 421, 437, 453, 469, 484, 500, 516, 532, 548, 564, 580, 596, 611, 627, 643, 659, 674, 690, 706, 721, 737, 752, 768, 783, 798, 814, 829, 844, 859, 874, 889, 904, 919, 934, 949, 964, 978, 993, 1008, 1022, 1037, 1051, 1065, 1080, 1094, 1108, 1122, 1136, 1151, 1165, 1179, 1192, 1206, 1220, 1234, 1248, 1261, 1275, 1288, 1302, 1315, 1329, 1342, 1368, 1393, 1419, 1444, 1469, 1494, 1519, 1544, 1569, 1594, 1618, 1643, 1668, 1692, 1717, 1741, 1765, 1789, 1814, 1838, 1862, 1885, 1909, 1933, 1957, 1992, 2027, 2061, 2096, 2130, 2165, 2199, 2233, 2267, 2300, 2334, 2367, 2400, 2434, 2467, 2499, 2532, 2575, 2618, 2661, 2704, 2746, 2788, 2830, 2872, 2913, 2954, 2995, 3036, 3076, 3127, 3177, 3226, 3275, 3324, 3373, 3421, 3469, 3517, 3565, 3621, 3677, 3733, 3788, 3843, 3897, 3951, 4005, 4058, 4119, 4181, 4241, 4301, 4361, 4420, 4479, 4546, 4612, 4677, 4742, 4807, 4871, 4942, 5013, 5083, 5153, 5222, 5291, 5367, 5442, 5517, 5591, 5665, 5745, 5825, 5905, 5984, 6063, 6149, 6234, 6319, 6404, 6495, 6587, 6678, 6769, 6867, 6966, 7064, 7163, 7269, 7376, 7483, 7599, 7715, 7832, 7958, 8085, 8214, 8352, 8492, 8635, 8788, 8945, 9104, 9275, 9450, 9639, 9832, 10031, 10245, 10465, 10702, 10946, 11210, 11482, 11776, 12081, 12409, 12750, 13118, 13501, 13913, 14343, 14807, 15290, 15812, 16356, 16943, 17575, 18237, 18949, 19718, 20521, 21387 } 1008 }; 1009 1010 return dc_qlookup[(bit_depth - 8) >> 1][clip_3<u8>(0, 255, b)]; 1011} 1012 1013inline u16 ac_q(u8 bit_depth, u8 b) 1014{ 1015 // The function ac_q( b ) is specified as ac_qlookup[ (BitDepth-8) >> 1 ][ Clip3( 0, 255, b ) ] where ac_lookup is 1016 // defined as follows: 1017 constexpr u16 ac_qlookup[3][256] = { 1018 { 4, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 104, 106, 108, 110, 112, 114, 116, 118, 120, 122, 124, 126, 128, 130, 132, 134, 136, 138, 140, 142, 144, 146, 148, 150, 152, 155, 158, 161, 164, 167, 170, 173, 176, 179, 182, 185, 188, 191, 194, 197, 200, 203, 207, 211, 215, 219, 223, 227, 231, 235, 239, 243, 247, 251, 255, 260, 265, 270, 275, 280, 285, 290, 295, 300, 305, 311, 317, 323, 329, 335, 341, 347, 353, 359, 366, 373, 380, 387, 394, 401, 408, 416, 424, 432, 440, 448, 456, 465, 474, 483, 492, 501, 510, 520, 530, 540, 550, 560, 571, 582, 593, 604, 615, 627, 639, 651, 663, 676, 689, 702, 715, 729, 743, 757, 771, 786, 801, 816, 832, 848, 864, 881, 898, 915, 933, 951, 969, 988, 1007, 1026, 1046, 1066, 1087, 1108, 1129, 1151, 1173, 1196, 1219, 1243, 1267, 1292, 1317, 1343, 1369, 1396, 1423, 1451, 1479, 1508, 1537, 1567, 1597, 1628, 1660, 1692, 1725, 1759, 1793, 1828 }, 1019 { 4, 9, 11, 13, 16, 18, 21, 24, 27, 30, 33, 37, 40, 44, 48, 51, 55, 59, 63, 67, 71, 75, 79, 83, 88, 92, 96, 100, 105, 109, 114, 118, 122, 127, 131, 136, 140, 145, 149, 154, 158, 163, 168, 172, 177, 181, 186, 190, 195, 199, 204, 208, 213, 217, 222, 226, 231, 235, 240, 244, 249, 253, 258, 262, 267, 271, 275, 280, 284, 289, 293, 297, 302, 306, 311, 315, 319, 324, 328, 332, 337, 341, 345, 349, 354, 358, 362, 367, 371, 375, 379, 384, 388, 392, 396, 401, 409, 417, 425, 433, 441, 449, 458, 466, 474, 482, 490, 498, 506, 514, 523, 531, 539, 547, 555, 563, 571, 579, 588, 596, 604, 616, 628, 640, 652, 664, 676, 688, 700, 713, 725, 737, 749, 761, 773, 785, 797, 809, 825, 841, 857, 873, 889, 905, 922, 938, 954, 970, 986, 1002, 1018, 1038, 1058, 1078, 1098, 1118, 1138, 1158, 1178, 1198, 1218, 1242, 1266, 1290, 1314, 1338, 1362, 1386, 1411, 1435, 1463, 1491, 1519, 1547, 1575, 1603, 1631, 1663, 1695, 1727, 1759, 1791, 1823, 1859, 1895, 1931, 1967, 2003, 2039, 2079, 2119, 2159, 2199, 2239, 2283, 2327, 2371, 2415, 2459, 2507, 2555, 2603, 2651, 2703, 2755, 2807, 2859, 2915, 2971, 3027, 3083, 3143, 3203, 3263, 3327, 3391, 3455, 3523, 3591, 3659, 3731, 3803, 3876, 3952, 4028, 4104, 4184, 4264, 4348, 4432, 4516, 4604, 4692, 4784, 4876, 4972, 5068, 5168, 5268, 5372, 5476, 5584, 5692, 5804, 5916, 6032, 6148, 6268, 6388, 6512, 6640, 6768, 6900, 7036, 7172, 7312 }, 1020 { 4, 13, 19, 27, 35, 44, 54, 64, 75, 87, 99, 112, 126, 139, 154, 168, 183, 199, 214, 230, 247, 263, 280, 297, 314, 331, 349, 366, 384, 402, 420, 438, 456, 475, 493, 511, 530, 548, 567, 586, 604, 623, 642, 660, 679, 698, 716, 735, 753, 772, 791, 809, 828, 846, 865, 884, 902, 920, 939, 957, 976, 994, 1012, 1030, 1049, 1067, 1085, 1103, 1121, 1139, 1157, 1175, 1193, 1211, 1229, 1246, 1264, 1282, 1299, 1317, 1335, 1352, 1370, 1387, 1405, 1422, 1440, 1457, 1474, 1491, 1509, 1526, 1543, 1560, 1577, 1595, 1627, 1660, 1693, 1725, 1758, 1791, 1824, 1856, 1889, 1922, 1954, 1987, 2020, 2052, 2085, 2118, 2150, 2183, 2216, 2248, 2281, 2313, 2346, 2378, 2411, 2459, 2508, 2556, 2605, 2653, 2701, 2750, 2798, 2847, 2895, 2943, 2992, 3040, 3088, 3137, 3185, 3234, 3298, 3362, 3426, 3491, 3555, 3619, 3684, 3748, 3812, 3876, 3941, 4005, 4069, 4149, 4230, 4310, 4390, 4470, 4550, 4631, 4711, 4791, 4871, 4967, 5064, 5160, 5256, 5352, 5448, 5544, 5641, 5737, 5849, 5961, 6073, 6185, 6297, 6410, 6522, 6650, 6778, 6906, 7034, 7162, 7290, 7435, 7579, 7723, 7867, 8011, 8155, 8315, 8475, 8635, 8795, 8956, 9132, 9308, 9484, 9660, 9836, 10028, 10220, 10412, 10604, 10812, 11020, 11228, 11437, 11661, 11885, 12109, 12333, 12573, 12813, 13053, 13309, 13565, 13821, 14093, 14365, 14637, 14925, 15213, 15502, 15806, 16110, 16414, 16734, 17054, 17390, 17726, 18062, 18414, 18766, 19134, 19502, 19886, 20270, 20670, 21070, 21486, 21902, 22334, 22766, 23214, 23662, 24126, 24590, 25070, 25551, 26047, 26559, 27071, 27599, 28143, 28687, 29247 } 1021 }; 1022 1023 return ac_qlookup[(bit_depth - 8) >> 1][clip_3<u8>(0, 255, b)]; 1024} 1025 1026u8 Decoder::get_base_quantizer_index(BlockContext const& block_context) 1027{ 1028 // The function get_qindex( ) returns the quantizer index for the current block and is specified by the following: 1029 // − If seg_feature_active( SEG_LVL_ALT_Q ) is equal to 1 the following ordered steps apply: 1030 if (Parser::seg_feature_active(block_context, SEG_LVL_ALT_Q)) { 1031 // 1. Set the variable data equal to FeatureData[ segment_id ][ SEG_LVL_ALT_Q ]. 1032 auto data = block_context.frame_context.segmentation_features[block_context.segment_id][SEG_LVL_ALT_Q].value; 1033 1034 // 2. If segmentation_abs_or_delta_update is equal to 0, set data equal to base_q_idx + data 1035 if (!block_context.frame_context.should_use_absolute_segment_base_quantizer) { 1036 data += block_context.frame_context.base_quantizer_index; 1037 } 1038 1039 // 3. Return Clip3( 0, 255, data ). 1040 return clip_3<u8>(0, 255, data); 1041 } 1042 1043 // − Otherwise, return base_q_idx. 1044 return block_context.frame_context.base_quantizer_index; 1045} 1046 1047u16 Decoder::get_dc_quantizer(BlockContext const& block_context, u8 plane) 1048{ 1049 // FIXME: The result of this function can be cached. This does not change per frame. 1050 1051 // The function get_dc_quant( plane ) returns the quantizer value for the dc coefficient for a particular plane and 1052 // is derived as follows: 1053 // − If plane is equal to 0, return dc_q( get_qindex( ) + delta_q_y_dc ). 1054 // − Otherwise, return dc_q( get_qindex( ) + delta_q_uv_dc ). 1055 // Instead of if { return }, select the value to add and return. 1056 i8 offset = plane == 0 ? block_context.frame_context.y_dc_quantizer_index_delta : block_context.frame_context.uv_dc_quantizer_index_delta; 1057 return dc_q(block_context.frame_context.color_config.bit_depth, static_cast<u8>(get_base_quantizer_index(block_context) + offset)); 1058} 1059 1060u16 Decoder::get_ac_quantizer(BlockContext const& block_context, u8 plane) 1061{ 1062 // FIXME: The result of this function can be cached. This does not change per frame. 1063 1064 // The function get_ac_quant( plane ) returns the quantizer value for the ac coefficient for a particular plane and 1065 // is derived as follows: 1066 // − If plane is equal to 0, return ac_q( get_qindex( ) ). 1067 // − Otherwise, return ac_q( get_qindex( ) + delta_q_uv_ac ). 1068 // Instead of if { return }, select the value to add and return. 1069 i8 offset = plane == 0 ? 0 : block_context.frame_context.uv_ac_quantizer_index_delta; 1070 return ac_q(block_context.frame_context.color_config.bit_depth, static_cast<u8>(get_base_quantizer_index(block_context) + offset)); 1071} 1072 1073DecoderErrorOr<void> Decoder::reconstruct(u8 plane, BlockContext const& block_context, u32 transform_block_x, u32 transform_block_y, TransformSize transform_block_size, TransformSet transform_set) 1074{ 1075 // 8.6.2 Reconstruct process 1076 1077 // The variable dqDenom is set equal to 2 if txSz is equal to Transform_32X32, otherwise dqDenom is set equal to 1. 1078 Intermediate dq_denominator = transform_block_size == Transform_32x32 ? 2 : 1; 1079 // The variable n (specifying the base 2 logarithm of the width of the transform block) is set equal to 2 + txSz. 1080 u8 log2_of_block_size = 2u + transform_block_size; 1081 // The variable n0 (specifying the width of the transform block) is set equal to 1 << n. 1082 auto block_size = 1u << log2_of_block_size; 1083 1084 // 1. Dequant[ i ][ j ] is set equal to ( Tokens[ i * n0 + j ] * get_ac_quant( plane ) ) / dqDenom 1085 // for i = 0..(n0-1), for j = 0..(n0-1) 1086 Array<Intermediate, maximum_transform_size> dequantized; 1087 Intermediate ac_quant = get_ac_quantizer(block_context, plane); 1088 for (auto i = 0u; i < block_size; i++) { 1089 for (auto j = 0u; j < block_size; j++) { 1090 auto index = index_from_row_and_column(i, j, block_size); 1091 if (index == 0) 1092 continue; 1093 dequantized[index] = (block_context.residual_tokens[index] * ac_quant) / dq_denominator; 1094 } 1095 } 1096 1097 // 2. Dequant[ 0 ][ 0 ] is set equal to ( Tokens[ 0 ] * get_dc_quant( plane ) ) / dqDenom 1098 dequantized[0] = (block_context.residual_tokens[0] * get_dc_quantizer(block_context, plane)) / dq_denominator; 1099 1100 // It is a requirement of bitstream conformance that the values written into the Dequant array in steps 1 and 2 1101 // are representable by a signed integer with 8 + BitDepth bits. 1102 // Note: Since bounds checks just ensure that we will not have resulting values that will overflow, it's non-fatal 1103 // to allow these bounds to be violated. Therefore, we can avoid the performance cost here. 1104 1105 // 3. Invoke the 2D inverse transform block process defined in section 8.7.2 with the variable n as input. 1106 // The inverse transform outputs are stored back to the Dequant buffer. 1107 TRY(inverse_transform_2d(block_context, dequantized, log2_of_block_size, transform_set)); 1108 1109 // 4. CurrFrame[ plane ][ y + i ][ x + j ] is set equal to Clip1( CurrFrame[ plane ][ y + i ][ x + j ] + Dequant[ i ][ j ] ) 1110 // for i = 0..(n0-1) and j = 0..(n0-1). 1111 auto& current_buffer = get_output_buffer(plane); 1112 auto subsampling_x = (plane > 0 ? block_context.frame_context.color_config.subsampling_x : 0); 1113 auto subsampling_y = (plane > 0 ? block_context.frame_context.color_config.subsampling_y : 0); 1114 auto frame_width = (block_context.frame_context.columns() * 8) >> subsampling_x; 1115 auto frame_height = (block_context.frame_context.rows() * 8) >> subsampling_y; 1116 auto width_in_frame_buffer = min(block_size, frame_width - transform_block_x); 1117 auto height_in_frame_buffer = min(block_size, frame_height - transform_block_y); 1118 1119 for (auto i = 0u; i < height_in_frame_buffer; i++) { 1120 for (auto j = 0u; j < width_in_frame_buffer; j++) { 1121 auto index = index_from_row_and_column(transform_block_y + i, transform_block_x + j, frame_width); 1122 auto dequantized_value = dequantized[index_from_row_and_column(i, j, block_size)]; 1123 current_buffer[index] = clip_1(block_context.frame_context.color_config.bit_depth, current_buffer[index] + dequantized_value); 1124 } 1125 } 1126 1127 return {}; 1128} 1129 1130inline DecoderErrorOr<void> Decoder::inverse_walsh_hadamard_transform(Span<Intermediate> data, u8 log2_of_block_size, u8 shift) 1131{ 1132 (void)data; 1133 (void)shift; 1134 // The input to this process is a variable shift that specifies the amount of pre-scaling. 1135 // This process does an in-place transform of the array T (of length 4) by the following ordered steps: 1136 if (1 << log2_of_block_size != 4) 1137 return DecoderError::corrupted("Block size was not 4"sv); 1138 1139 return DecoderError::not_implemented(); 1140} 1141 1142inline i32 Decoder::cos64(u8 angle) 1143{ 1144 const i32 cos64_lookup[33] = { 16384, 16364, 16305, 16207, 16069, 15893, 15679, 15426, 15137, 14811, 14449, 14053, 13623, 13160, 12665, 12140, 11585, 11003, 10394, 9760, 9102, 8423, 7723, 7005, 6270, 5520, 4756, 3981, 3196, 2404, 1606, 804, 0 }; 1145 1146 // 1. Set a variable angle2 equal to angle & 127. 1147 angle &= 127; 1148 // 2. If angle2 is greater than or equal to 0 and less than or equal to 32, return cos64_lookup[ angle2 ]. 1149 if (angle <= 32) 1150 return cos64_lookup[angle]; 1151 // 3. If angle2 is greater than 32 and less than or equal to 64, return cos64_lookup[ 64 - angle2 ] * -1. 1152 if (angle <= 64) 1153 return -cos64_lookup[64 - angle]; 1154 // 4. If angle2 is greater than 64 and less than or equal to 96, return cos64_lookup[ angle2 - 64 ] * -1. 1155 if (angle <= 96) 1156 return -cos64_lookup[angle - 64]; 1157 // 5. Otherwise (if angle2 is greater than 96 and less than 128), return cos64_lookup[ 128 - angle2 ]. 1158 return cos64_lookup[128 - angle]; 1159} 1160 1161inline i32 Decoder::sin64(u8 angle) 1162{ 1163 if (angle < 32) 1164 angle += 128; 1165 return cos64(angle - 32u); 1166} 1167 1168template<typename T> 1169inline i32 Decoder::rounded_right_shift(T value, u8 bits) 1170{ 1171 value = (value + static_cast<T>(1u << (bits - 1u))) >> bits; 1172 return static_cast<i32>(value); 1173} 1174 1175// (8.7.1.1) The function B( a, b, angle, 0 ) performs a butterfly rotation. 1176inline void Decoder::butterfly_rotation_in_place(Span<Intermediate> data, size_t index_a, size_t index_b, u8 angle, bool flip) 1177{ 1178 auto cos = cos64(angle); 1179 auto sin = sin64(angle); 1180 // 1. The variable x is set equal to T[ a ] * cos64( angle ) - T[ b ] * sin64( angle ). 1181 i64 rotated_a = data[index_a] * cos - data[index_b] * sin; 1182 // 2. The variable y is set equal to T[ a ] * sin64( angle ) + T[ b ] * cos64( angle ). 1183 i64 rotated_b = data[index_a] * sin + data[index_b] * cos; 1184 // 3. T[ a ] is set equal to Round2( x, 14 ). 1185 data[index_a] = rounded_right_shift(rotated_a, 14); 1186 // 4. T[ b ] is set equal to Round2( y, 14 ). 1187 data[index_b] = rounded_right_shift(rotated_b, 14); 1188 1189 // The function B( a ,b, angle, 1 ) performs a butterfly rotation and flip specified by the following ordered steps: 1190 // 1. The function B( a, b, angle, 0 ) is invoked. 1191 // 2. The contents of T[ a ] and T[ b ] are exchanged. 1192 if (flip) 1193 swap(data[index_a], data[index_b]); 1194 1195 // It is a requirement of bitstream conformance that the values saved into the array T by this function are 1196 // representable by a signed integer using 8 + BitDepth bits of precision. 1197 // Note: Since bounds checks just ensure that we will not have resulting values that will overflow, it's non-fatal 1198 // to allow these bounds to be violated. Therefore, we can avoid the performance cost here. 1199} 1200 1201// (8.7.1.1) The function H( a, b, 0 ) performs a Hadamard rotation. 1202inline void Decoder::hadamard_rotation_in_place(Span<Intermediate> data, size_t index_a, size_t index_b, bool flip) 1203{ 1204 // The function H( a, b, 1 ) performs a Hadamard rotation with flipped indices and is specified as follows: 1205 // 1. The function H( b, a, 0 ) is invoked. 1206 if (flip) 1207 swap(index_a, index_b); 1208 1209 // The function H( a, b, 0 ) performs a Hadamard rotation specified by the following ordered steps: 1210 1211 // 1. The variable x is set equal to T[ a ]. 1212 auto a_value = data[index_a]; 1213 // 2. The variable y is set equal to T[ b ]. 1214 auto b_value = data[index_b]; 1215 // 3. T[ a ] is set equal to x + y. 1216 data[index_a] = a_value + b_value; 1217 // 4. T[ b ] is set equal to x - y. 1218 data[index_b] = a_value - b_value; 1219 1220 // It is a requirement of bitstream conformance that the values saved into the array T by this function are 1221 // representable by a signed integer using 8 + BitDepth bits of precision. 1222 // Note: Since bounds checks just ensure that we will not have resulting values that will overflow, it's non-fatal 1223 // to allow these bounds to be violated. Therefore, we can avoid the performance cost here. 1224} 1225 1226inline DecoderErrorOr<void> Decoder::inverse_discrete_cosine_transform_array_permutation(Span<Intermediate> data, u8 log2_of_block_size) 1227{ 1228 u8 block_size = 1 << log2_of_block_size; 1229 1230 // This process performs an in-place permutation of the array T of length 2^n for 2 ≤ n ≤ 5 which is required before 1231 // execution of the inverse DCT process. 1232 if (log2_of_block_size < 2 || log2_of_block_size > 5) 1233 return DecoderError::corrupted("Block size was out of range"sv); 1234 1235 // 1.1. A temporary array named copyT is set equal to T. 1236 Array<Intermediate, maximum_transform_size> data_copy; 1237 AK::TypedTransfer<Intermediate>::copy(data_copy.data(), data.data(), block_size); 1238 1239 // 1.2. T[ i ] is set equal to copyT[ brev( n, i ) ] for i = 0..((1<<n) - 1). 1240 for (auto i = 0u; i < block_size; i++) 1241 data[i] = data_copy[brev(log2_of_block_size, i)]; 1242 1243 return {}; 1244} 1245 1246inline DecoderErrorOr<void> Decoder::inverse_discrete_cosine_transform(Span<Intermediate> data, u8 log2_of_block_size) 1247{ 1248 // 2.1. The variable n0 is set equal to 1<<n. 1249 u8 block_size = 1 << log2_of_block_size; 1250 1251 // 8.7.1.3 Inverse DCT process 1252 1253 // 2.2. The variable n1 is set equal to 1<<(n-1). 1254 u8 half_block_size = block_size >> 1; 1255 // 2.3 The variable n2 is set equal to 1<<(n-2). 1256 u8 quarter_block_size = half_block_size >> 1; 1257 // 2.4 The variable n3 is set equal to 1<<(n-3). 1258 u8 eighth_block_size = quarter_block_size >> 1; 1259 1260 // 2.5 If n is equal to 2, invoke B( 0, 1, 16, 1 ), otherwise recursively invoke the inverse DCT defined in this 1261 // section with the variable n set equal to n - 1. 1262 if (log2_of_block_size == 2) 1263 butterfly_rotation_in_place(data, 0, 1, 16, true); 1264 else 1265 TRY(inverse_discrete_cosine_transform(data, log2_of_block_size - 1)); 1266 1267 // 2.6 Invoke B( n1+i, n0-1-i, 32-brev( 5, n1+i), 0 ) for i = 0..(n2-1). 1268 for (auto i = 0u; i < quarter_block_size; i++) { 1269 auto index = half_block_size + i; 1270 butterfly_rotation_in_place(data, index, block_size - 1 - i, 32 - brev(5, index), false); 1271 } 1272 1273 // 2.7 If n is greater than or equal to 3: 1274 if (log2_of_block_size >= 3) { 1275 // a. Invoke H( n1+4*i+2*j, n1+1+4*i+2*j, j ) for i = 0..(n3-1), j = 0..1. 1276 for (auto i = 0u; i < eighth_block_size; i++) { 1277 for (auto j = 0u; j < 2; j++) { 1278 auto index = half_block_size + (4 * i) + (2 * j); 1279 hadamard_rotation_in_place(data, index, index + 1, j); 1280 } 1281 } 1282 } 1283 1284 // 4. If n is equal to 5: 1285 if (log2_of_block_size == 5) { 1286 // a. Invoke B( n0-n+3-n2*j-4*i, n1+n-4+n2*j+4*i, 28-16*i+56*j, 1 ) for i = 0..1, j = 0..1. 1287 for (auto i = 0u; i < 2; i++) { 1288 for (auto j = 0u; j < 2; j++) { 1289 auto index_a = block_size - log2_of_block_size + 3 - (quarter_block_size * j) - (4 * i); 1290 auto index_b = half_block_size + log2_of_block_size - 4 + (quarter_block_size * j) + (4 * i); 1291 auto angle = 28 - (16 * i) + (56 * j); 1292 butterfly_rotation_in_place(data, index_a, index_b, angle, true); 1293 } 1294 } 1295 1296 // b. Invoke H( n1+n3*j+i, n1+n2-5+n3*j-i, j&1 ) for i = 0..1, j = 0..3. 1297 for (auto i = 0u; i < 2; i++) { 1298 for (auto j = 0u; j < 4; j++) { 1299 auto index_a = half_block_size + (eighth_block_size * j) + i; 1300 auto index_b = half_block_size + quarter_block_size - 5 + (eighth_block_size * j) - i; 1301 hadamard_rotation_in_place(data, index_a, index_b, (j & 1) != 0); 1302 } 1303 } 1304 } 1305 1306 // 5. If n is greater than or equal to 4: 1307 if (log2_of_block_size >= 4) { 1308 // a. Invoke B( n0-n+2-i-n2*j, n1+n-3+i+n2*j, 24+48*j, 1 ) for i = 0..(n==5), j = 0..1. 1309 for (auto i = 0u; i <= (log2_of_block_size == 5); i++) { 1310 for (auto j = 0u; j < 2; j++) { 1311 auto index_a = block_size - log2_of_block_size + 2 - i - (quarter_block_size * j); 1312 auto index_b = half_block_size + log2_of_block_size - 3 + i + (quarter_block_size * j); 1313 butterfly_rotation_in_place(data, index_a, index_b, 24 + (48 * j), true); 1314 } 1315 } 1316 1317 // b. Invoke H( n1+n2*j+i, n1+n2-1+n2*j-i, j&1 ) for i = 0..(2n-7), j = 0..1. 1318 for (auto i = 0u; i < (2 * log2_of_block_size) - 6u; i++) { 1319 for (auto j = 0u; j < 2; j++) { 1320 auto index_a = half_block_size + (quarter_block_size * j) + i; 1321 auto index_b = half_block_size + quarter_block_size - 1 + (quarter_block_size * j) - i; 1322 hadamard_rotation_in_place(data, index_a, index_b, (j & 1) != 0); 1323 } 1324 } 1325 } 1326 1327 // 6. If n is greater than or equal to 3: 1328 if (log2_of_block_size >= 3) { 1329 // a. Invoke B( n0-n3-1-i, n1+n3+i, 16, 1 ) for i = 0..(n3-1). 1330 for (auto i = 0u; i < eighth_block_size; i++) { 1331 auto index_a = block_size - eighth_block_size - 1 - i; 1332 auto index_b = half_block_size + eighth_block_size + i; 1333 butterfly_rotation_in_place(data, index_a, index_b, 16, true); 1334 } 1335 } 1336 1337 // 7. Invoke H( i, n0-1-i, 0 ) for i = 0..(n1-1). 1338 for (auto i = 0u; i < half_block_size; i++) 1339 hadamard_rotation_in_place(data, i, block_size - 1 - i, false); 1340 1341 return {}; 1342} 1343 1344inline void Decoder::inverse_asymmetric_discrete_sine_transform_input_array_permutation(Span<Intermediate> data, u8 log2_of_block_size) 1345{ 1346 // The variable n0 is set equal to 1<<n. 1347 auto block_size = 1u << log2_of_block_size; 1348 // The variable n1 is set equal to 1<<(n-1). 1349 // We can iterate by 2 at a time instead of taking half block size. 1350 1351 // A temporary array named copyT is set equal to T. 1352 Array<Intermediate, maximum_transform_size> data_copy; 1353 AK::TypedTransfer<Intermediate>::copy(data_copy.data(), data.data(), block_size); 1354 1355 // The values at even locations T[ 2 * i ] are set equal to copyT[ n0 - 1 - 2 * i ] for i = 0..(n1-1). 1356 // The values at odd locations T[ 2 * i + 1 ] are set equal to copyT[ 2 * i ] for i = 0..(n1-1). 1357 for (auto i = 0u; i < block_size; i += 2) { 1358 data[i] = data_copy[block_size - 1 - i]; 1359 data[i + 1] = data_copy[i]; 1360 } 1361} 1362 1363inline void Decoder::inverse_asymmetric_discrete_sine_transform_output_array_permutation(Span<Intermediate> data, u8 log2_of_block_size) 1364{ 1365 auto block_size = 1u << log2_of_block_size; 1366 1367 // A temporary array named copyT is set equal to T. 1368 Array<Intermediate, maximum_transform_size> data_copy; 1369 AK::TypedTransfer<Intermediate>::copy(data_copy.data(), data.data(), block_size); 1370 1371 // The permutation depends on n as follows: 1372 if (log2_of_block_size == 4) { 1373 // − If n is equal to 4, 1374 // T[ 8*a + 4*b + 2*c + d ] is set equal to copyT[ 8*(d^c) + 4*(c^b) + 2*(b^a) + a ] for a = 0..1 1375 // and b = 0..1 and c = 0..1 and d = 0..1. 1376 for (auto a = 0u; a < 2; a++) 1377 for (auto b = 0u; b < 2; b++) 1378 for (auto c = 0u; c < 2; c++) 1379 for (auto d = 0u; d < 2; d++) 1380 data[(8 * a) + (4 * b) + (2 * c) + d] = data_copy[8 * (d ^ c) + 4 * (c ^ b) + 2 * (b ^ a) + a]; 1381 } else { 1382 VERIFY(log2_of_block_size == 3); 1383 // − Otherwise (n is equal to 3), 1384 // T[ 4*a + 2*b + c ] is set equal to copyT[ 4*(c^b) + 2*(b^a) + a ] for a = 0..1 and 1385 // b = 0..1 and c = 0..1. 1386 for (auto a = 0u; a < 2; a++) 1387 for (auto b = 0u; b < 2; b++) 1388 for (auto c = 0u; c < 2; c++) 1389 data[4 * a + 2 * b + c] = data_copy[4 * (c ^ b) + 2 * (b ^ a) + a]; 1390 } 1391} 1392 1393inline void Decoder::inverse_asymmetric_discrete_sine_transform_4(Span<Intermediate> data) 1394{ 1395 VERIFY(data.size() == 4); 1396 const i64 sinpi_1_9 = 5283; 1397 const i64 sinpi_2_9 = 9929; 1398 const i64 sinpi_3_9 = 13377; 1399 const i64 sinpi_4_9 = 15212; 1400 1401 // Steps are derived from pseudocode in (8.7.1.6): 1402 // s0 = SINPI_1_9 * T[ 0 ] 1403 i64 s0 = sinpi_1_9 * data[0]; 1404 // s1 = SINPI_2_9 * T[ 0 ] 1405 i64 s1 = sinpi_2_9 * data[0]; 1406 // s2 = SINPI_3_9 * T[ 1 ] 1407 i64 s2 = sinpi_3_9 * data[1]; 1408 // s3 = SINPI_4_9 * T[ 2 ] 1409 i64 s3 = sinpi_4_9 * data[2]; 1410 // s4 = SINPI_1_9 * T[ 2 ] 1411 i64 s4 = sinpi_1_9 * data[2]; 1412 // s5 = SINPI_2_9 * T[ 3 ] 1413 i64 s5 = sinpi_2_9 * data[3]; 1414 // s6 = SINPI_4_9 * T[ 3 ] 1415 i64 s6 = sinpi_4_9 * data[3]; 1416 // v = T[ 0 ] - T[ 2 ] + T[ 3 ] 1417 // s7 = SINPI_3_9 * v 1418 i64 s7 = sinpi_3_9 * (data[0] - data[2] + data[3]); 1419 1420 // x0 = s0 + s3 + s5 1421 auto x0 = s0 + s3 + s5; 1422 // x1 = s1 - s4 - s6 1423 auto x1 = s1 - s4 - s6; 1424 // x2 = s7 1425 auto x2 = s7; 1426 // x3 = s2 1427 auto x3 = s2; 1428 1429 // s0 = x0 + x3 1430 s0 = x0 + x3; 1431 // s1 = x1 + x3 1432 s1 = x1 + x3; 1433 // s2 = x2 1434 s2 = x2; 1435 // s3 = x0 + x1 - x3 1436 s3 = x0 + x1 - x3; 1437 1438 // T[ 0 ] = Round2( s0, 14 ) 1439 data[0] = rounded_right_shift(s0, 14); 1440 // T[ 1 ] = Round2( s1, 14 ) 1441 data[1] = rounded_right_shift(s1, 14); 1442 // T[ 2 ] = Round2( s2, 14 ) 1443 data[2] = rounded_right_shift(s2, 14); 1444 // T[ 3 ] = Round2( s3, 14 ) 1445 data[3] = rounded_right_shift(s3, 14); 1446 1447 // (8.7.1.1) The inverse asymmetric discrete sine transforms also make use of an intermediate array named S. 1448 // The values in this array require higher precision to avoid overflow. Using signed integers with 24 + 1449 // BitDepth bits of precision is enough to avoid overflow. 1450 // Note: Since bounds checks just ensure that we will not have resulting values that will overflow, it's non-fatal 1451 // to allow these bounds to be violated. Therefore, we can avoid the performance cost here. 1452} 1453 1454// The function SB( a, b, angle, 0 ) performs a butterfly rotation. 1455// Spec defines the source as array T, and the destination array as S. 1456template<typename S, typename D> 1457inline void Decoder::butterfly_rotation(Span<S> source, Span<D> destination, size_t index_a, size_t index_b, u8 angle, bool flip) 1458{ 1459 // The function SB( a, b, angle, 0 ) performs a butterfly rotation according to the following ordered steps: 1460 auto cos = cos64(angle); 1461 auto sin = sin64(angle); 1462 // Expand to the destination buffer's precision. 1463 D a = source[index_a]; 1464 D b = source[index_b]; 1465 // 1. S[ a ] is set equal to T[ a ] * cos64( angle ) - T[ b ] * sin64( angle ). 1466 destination[index_a] = a * cos - b * sin; 1467 // 2. S[ b ] is set equal to T[ a ] * sin64( angle ) + T[ b ] * cos64( angle ). 1468 destination[index_b] = a * sin + b * cos; 1469 1470 // The function SB( a, b, angle, 1 ) performs a butterfly rotation and flip according to the following ordered steps: 1471 // 1. The function SB( a, b, angle, 0 ) is invoked. 1472 // 2. The contents of S[ a ] and S[ b ] are exchanged. 1473 if (flip) 1474 swap(destination[index_a], destination[index_b]); 1475} 1476 1477// The function SH( a, b ) performs a Hadamard rotation and rounding. 1478// Spec defines the source array as S, and the destination array as T. 1479template<typename S, typename D> 1480inline void Decoder::hadamard_rotation(Span<S> source, Span<D> destination, size_t index_a, size_t index_b) 1481{ 1482 // Keep the source buffer's precision until rounding. 1483 S a = source[index_a]; 1484 S b = source[index_b]; 1485 // 1. T[ a ] is set equal to Round2( S[ a ] + S[ b ], 14 ). 1486 destination[index_a] = rounded_right_shift(a + b, 14); 1487 // 2. T[ b ] is set equal to Round2( S[ a ] - S[ b ], 14 ). 1488 destination[index_b] = rounded_right_shift(a - b, 14); 1489} 1490 1491inline DecoderErrorOr<void> Decoder::inverse_asymmetric_discrete_sine_transform_8(Span<Intermediate> data) 1492{ 1493 VERIFY(data.size() == 8); 1494 // This process does an in-place transform of the array T using: 1495 1496 // A higher precision array S for intermediate results. 1497 // (8.7.1.1) NOTE - The values in array S require higher precision to avoid overflow. Using signed integers with 1498 // 24 + BitDepth bits of precision is enough to avoid overflow. 1499 Array<i64, 8> high_precision_temp; 1500 1501 // The following ordered steps apply: 1502 1503 // 1. Invoke the ADST input array permutation process specified in section 8.7.1.4 with the input variable n set 1504 // equal to 3. 1505 inverse_asymmetric_discrete_sine_transform_input_array_permutation(data, 3); 1506 1507 // 2. Invoke SB( 2*i, 1+2*i, 30-8*i, 1 ) for i = 0..3. 1508 for (auto i = 0u; i < 4; i++) 1509 butterfly_rotation(data, high_precision_temp.span(), 2 * i, 1 + (2 * i), 30 - (8 * i), true); 1510 1511 // 3. Invoke SH( i, 4+i ) for i = 0..3. 1512 for (auto i = 0u; i < 4; i++) 1513 hadamard_rotation(high_precision_temp.span(), data, i, 4 + i); 1514 1515 // 4. Invoke SB( 4+3*i, 5+i, 24-16*i, 1 ) for i = 0..1. 1516 for (auto i = 0u; i < 2; i++) 1517 butterfly_rotation(data, high_precision_temp.span(), 4 + (3 * i), 5 + i, 24 - (16 * i), true); 1518 // 5. Invoke SH( 4+i, 6+i ) for i = 0..1. 1519 for (auto i = 0u; i < 2; i++) 1520 hadamard_rotation(high_precision_temp.span(), data, 4 + i, 6 + i); 1521 1522 // 6. Invoke H( i, 2+i, 0 ) for i = 0..1. 1523 for (auto i = 0u; i < 2; i++) 1524 hadamard_rotation_in_place(data, i, 2 + i, false); 1525 1526 // 7. Invoke B( 2+4*i, 3+4*i, 16, 1 ) for i = 0..1. 1527 for (auto i = 0u; i < 2; i++) 1528 butterfly_rotation_in_place(data, 2 + (4 * i), 3 + (4 * i), 16, true); 1529 1530 // 8. Invoke the ADST output array permutation process specified in section 8.7.1.5 with the input variable n 1531 // set equal to 3. 1532 inverse_asymmetric_discrete_sine_transform_output_array_permutation(data, 3); 1533 1534 // 9. Set T[ 1+2*i ] equal to -T[ 1+2*i ] for i = 0..3. 1535 for (auto i = 0u; i < 4; i++) { 1536 auto index = 1 + (2 * i); 1537 data[index] = -data[index]; 1538 } 1539 return {}; 1540} 1541 1542inline DecoderErrorOr<void> Decoder::inverse_asymmetric_discrete_sine_transform_16(Span<Intermediate> data) 1543{ 1544 VERIFY(data.size() == 16); 1545 // This process does an in-place transform of the array T using: 1546 1547 // A higher precision array S for intermediate results. 1548 // (8.7.1.1) The inverse asymmetric discrete sine transforms also make use of an intermediate array named S. 1549 // The values in this array require higher precision to avoid overflow. Using signed integers with 24 + 1550 // BitDepth bits of precision is enough to avoid overflow. 1551 Array<i64, 16> high_precision_temp; 1552 1553 // The following ordered steps apply: 1554 1555 // 1. Invoke the ADST input array permutation process specified in section 8.7.1.4 with the input variable n set 1556 // equal to 4. 1557 inverse_asymmetric_discrete_sine_transform_input_array_permutation(data, 4); 1558 1559 // 2. Invoke SB( 2*i, 1+2*i, 31-4*i, 1 ) for i = 0..7. 1560 for (auto i = 0u; i < 8; i++) 1561 butterfly_rotation(data, high_precision_temp.span(), 2 * i, 1 + (2 * i), 31 - (4 * i), true); 1562 // 3. Invoke SH( i, 8+i ) for i = 0..7. 1563 for (auto i = 0u; i < 8; i++) 1564 hadamard_rotation(high_precision_temp.span(), data, i, 8 + i); 1565 1566 // 4. Invoke SB( 8+2*i, 9+2*i, 28-16*i, 1 ) for i = 0..3. 1567 for (auto i = 0u; i < 4; i++) 1568 butterfly_rotation(data, high_precision_temp.span(), 8 + (2 * i), 9 + (2 * i), 128 + 28 - (16 * i), true); 1569 // 5. Invoke SH( 8+i, 12+i ) for i = 0..3. 1570 for (auto i = 0u; i < 4; i++) 1571 hadamard_rotation(high_precision_temp.span(), data, 8 + i, 12 + i); 1572 1573 // 6. Invoke H( i, 4+i, 0 ) for i = 0..3. 1574 for (auto i = 0u; i < 4; i++) 1575 hadamard_rotation_in_place(data, i, 4 + i, false); 1576 1577 // 7. Invoke SB( 4+8*i+3*j, 5+8*i+j, 24-16*j, 1 ) for i = 0..1, for j = 0..1. 1578 for (auto i = 0u; i < 2; i++) 1579 for (auto j = 0u; j < 2; j++) 1580 butterfly_rotation(data, high_precision_temp.span(), 4 + (8 * i) + (3 * j), 5 + (8 * i) + j, 24 - (16 * j), true); 1581 // 8. Invoke SH( 4+8*j+i, 6+8*j+i ) for i = 0..1, j = 0..1. 1582 for (auto i = 0u; i < 2; i++) 1583 for (auto j = 0u; j < 2; j++) 1584 hadamard_rotation(high_precision_temp.span(), data, 4 + (8 * j) + i, 6 + (8 * j) + i); 1585 1586 // 9. Invoke H( 8*j+i, 2+8*j+i, 0 ) for i = 0..1, for j = 0..1. 1587 for (auto i = 0u; i < 2; i++) 1588 for (auto j = 0u; j < 2; j++) 1589 hadamard_rotation_in_place(data, (8 * j) + i, 2 + (8 * j) + i, false); 1590 // 10. Invoke B( 2+4*j+8*i, 3+4*j+8*i, 48+64*(i^j), 0 ) for i = 0..1, for j = 0..1. 1591 for (auto i = 0u; i < 2; i++) 1592 for (auto j = 0u; j < 2; j++) 1593 butterfly_rotation_in_place(data, 2 + (4 * j) + (8 * i), 3 + (4 * j) + (8 * i), 48 + (64 * (i ^ j)), false); 1594 1595 // 11. Invoke the ADST output array permutation process specified in section 8.7.1.5 with the input variable n 1596 // set equal to 4. 1597 inverse_asymmetric_discrete_sine_transform_output_array_permutation(data, 4); 1598 1599 // 12. Set T[ 1+12*j+2*i ] equal to -T[ 1+12*j+2*i ] for i = 0..1, for j = 0..1. 1600 for (auto i = 0u; i < 2; i++) { 1601 for (auto j = 0u; j < 2; j++) { 1602 auto index = 1 + (12 * j) + (2 * i); 1603 data[index] = -data[index]; 1604 } 1605 } 1606 return {}; 1607} 1608 1609inline DecoderErrorOr<void> Decoder::inverse_asymmetric_discrete_sine_transform(Span<Intermediate> data, u8 log2_of_block_size) 1610{ 1611 // 8.7.1.9 Inverse ADST Process 1612 1613 // This process performs an in-place inverse ADST process on the array T of size 2^n for 2 ≤ n ≤ 4. 1614 if (log2_of_block_size < 2 || log2_of_block_size > 4) 1615 return DecoderError::corrupted("Block size was out of range"sv); 1616 1617 // The process to invoke depends on n as follows: 1618 if (log2_of_block_size == 2) { 1619 // − If n is equal to 2, invoke the Inverse ADST4 process specified in section 8.7.1.6. 1620 inverse_asymmetric_discrete_sine_transform_4(data); 1621 return {}; 1622 } 1623 if (log2_of_block_size == 3) { 1624 // − Otherwise if n is equal to 3, invoke the Inverse ADST8 process specified in section 8.7.1.7. 1625 return inverse_asymmetric_discrete_sine_transform_8(data); 1626 } 1627 // − Otherwise (n is equal to 4), invoke the Inverse ADST16 process specified in section 8.7.1.8. 1628 return inverse_asymmetric_discrete_sine_transform_16(data); 1629} 1630 1631DecoderErrorOr<void> Decoder::inverse_transform_2d(BlockContext const& block_context, Span<Intermediate> dequantized, u8 log2_of_block_size, TransformSet transform_set) 1632{ 1633 // This process performs a 2D inverse transform for an array of size 2^n by 2^n stored in the 2D array Dequant. 1634 // The input to this process is a variable n (log2_of_block_size) that specifies the base 2 logarithm of the width of the transform. 1635 1636 // 1. Set the variable n0 (block_size) equal to 1 << n. 1637 auto block_size = 1u << log2_of_block_size; 1638 1639 Array<Intermediate, maximum_transform_size> row_array; 1640 Span<Intermediate> row = row_array.span().trim(block_size); 1641 1642 // 2. The row transforms with i = 0..(n0-1) are applied as follows: 1643 for (auto i = 0u; i < block_size; i++) { 1644 // 1. Set T[ j ] equal to Dequant[ i ][ j ] for j = 0..(n0-1). 1645 for (auto j = 0u; j < block_size; j++) 1646 row[j] = dequantized[index_from_row_and_column(i, j, block_size)]; 1647 1648 // 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal 1649 // to 2. 1650 if (block_context.frame_context.is_lossless()) { 1651 TRY(inverse_walsh_hadamard_transform(row, log2_of_block_size, 2)); 1652 continue; 1653 } 1654 switch (transform_set.second_transform) { 1655 case TransformType::DCT: 1656 // Otherwise, if TxType is equal to DCT_DCT or TxType is equal to ADST_DCT, apply an inverse DCT as 1657 // follows: 1658 // 1. Invoke the inverse DCT permutation process as specified in section 8.7.1.2 with the input variable n. 1659 TRY(inverse_discrete_cosine_transform_array_permutation(row, log2_of_block_size)); 1660 // 2. Invoke the inverse DCT process as specified in section 8.7.1.3 with the input variable n. 1661 TRY(inverse_discrete_cosine_transform(row, log2_of_block_size)); 1662 break; 1663 case TransformType::ADST: 1664 // 4. Otherwise (TxType is equal to DCT_ADST or TxType is equal to ADST_ADST), invoke the inverse ADST 1665 // process as specified in section 8.7.1.9 with input variable n. 1666 TRY(inverse_asymmetric_discrete_sine_transform(row, log2_of_block_size)); 1667 break; 1668 default: 1669 return DecoderError::corrupted("Unknown tx_type"sv); 1670 } 1671 1672 // 5. Set Dequant[ i ][ j ] equal to T[ j ] for j = 0..(n0-1). 1673 for (auto j = 0u; j < block_size; j++) 1674 dequantized[index_from_row_and_column(i, j, block_size)] = row[j]; 1675 } 1676 1677 Array<Intermediate, maximum_transform_size> column_array; 1678 auto column = column_array.span().trim(block_size); 1679 1680 // 3. The column transforms with j = 0..(n0-1) are applied as follows: 1681 for (auto j = 0u; j < block_size; j++) { 1682 // 1. Set T[ i ] equal to Dequant[ i ][ j ] for i = 0..(n0-1). 1683 for (auto i = 0u; i < block_size; i++) 1684 column[i] = dequantized[index_from_row_and_column(i, j, block_size)]; 1685 1686 // 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal 1687 // to 0. 1688 if (block_context.frame_context.is_lossless()) { 1689 TRY(inverse_walsh_hadamard_transform(column, log2_of_block_size, 2)); 1690 continue; 1691 } 1692 switch (transform_set.first_transform) { 1693 case TransformType::DCT: 1694 // Otherwise, if TxType is equal to DCT_DCT or TxType is equal to DCT_ADST, apply an inverse DCT as 1695 // follows: 1696 // 1. Invoke the inverse DCT permutation process as specified in section 8.7.1.2 with the input variable n. 1697 TRY(inverse_discrete_cosine_transform_array_permutation(column, log2_of_block_size)); 1698 // 2. Invoke the inverse DCT process as specified in section 8.7.1.3 with the input variable n. 1699 TRY(inverse_discrete_cosine_transform(column, log2_of_block_size)); 1700 break; 1701 case TransformType::ADST: 1702 // 4. Otherwise (TxType is equal to ADST_DCT or TxType is equal to ADST_ADST), invoke the inverse ADST 1703 // process as specified in section 8.7.1.9 with input variable n. 1704 TRY(inverse_asymmetric_discrete_sine_transform(column, log2_of_block_size)); 1705 break; 1706 default: 1707 VERIFY_NOT_REACHED(); 1708 } 1709 1710 // 5. If Lossless is equal to 1, set Dequant[ i ][ j ] equal to T[ i ] for i = 0..(n0-1). 1711 for (auto i = 0u; i < block_size; i++) 1712 dequantized[index_from_row_and_column(i, j, block_size)] = column[i]; 1713 1714 // 6. Otherwise (Lossless is equal to 0), set Dequant[ i ][ j ] equal to Round2( T[ i ], Min( 6, n + 2 ) ) 1715 // for i = 0..(n0-1). 1716 if (!block_context.frame_context.is_lossless()) { 1717 for (auto i = 0u; i < block_size; i++) { 1718 auto index = index_from_row_and_column(i, j, block_size); 1719 dequantized[index] = rounded_right_shift(dequantized[index], min(6, log2_of_block_size + 2)); 1720 } 1721 } 1722 } 1723 1724 return {}; 1725} 1726 1727DecoderErrorOr<void> Decoder::update_reference_frames(FrameContext const& frame_context) 1728{ 1729 // This process is invoked as the final step in decoding a frame. 1730 // The inputs to this process are the samples in the current frame CurrFrame[ plane ][ x ][ y ]. 1731 // The output from this process is an updated set of reference frames and previous motion vectors. 1732 // The following ordered steps apply: 1733 1734 // 1. For each value of i from 0 to NUM_REF_FRAMES - 1, the following applies if bit i of refresh_frame_flags 1735 // is equal to 1 (i.e. if (refresh_frame_flags>>i)&1 is equal to 1): 1736 for (u8 i = 0; i < NUM_REF_FRAMES; i++) { 1737 if (frame_context.should_update_reference_frame_at_index(i)) { 1738 auto& reference_frame = m_parser->m_reference_frames[i]; 1739 1740 // − RefFrameWidth[ i ] is set equal to FrameWidth. 1741 // − RefFrameHeight[ i ] is set equal to FrameHeight. 1742 reference_frame.size = frame_context.size(); 1743 // − RefSubsamplingX[ i ] is set equal to subsampling_x. 1744 reference_frame.subsampling_x = frame_context.color_config.subsampling_x; 1745 // − RefSubsamplingY[ i ] is set equal to subsampling_y. 1746 reference_frame.subsampling_y = frame_context.color_config.subsampling_y; 1747 // − RefBitDepth[ i ] is set equal to BitDepth. 1748 reference_frame.bit_depth = frame_context.color_config.bit_depth; 1749 1750 // − FrameStore[ i ][ 0 ][ y ][ x ] is set equal to CurrFrame[ 0 ][ y ][ x ] for x = 0..FrameWidth-1, for y = 1751 // 0..FrameHeight-1. 1752 // − FrameStore[ i ][ plane ][ y ][ x ] is set equal to CurrFrame[ plane ][ y ][ x ] for plane = 1..2, for x = 1753 // 0..((FrameWidth+subsampling_x) >> subsampling_x)-1, for y = 0..((FrameHeight+subsampling_y) >> 1754 // subsampling_y)-1. 1755 1756 // FIXME: Frame width is not equal to the buffer's stride. If we store the stride of the buffer with the reference 1757 // frame, we can just copy the framebuffer data instead. Alternatively, we should crop the output framebuffer. 1758 for (auto plane = 0u; plane < 3; plane++) { 1759 auto width = frame_context.size().width(); 1760 auto height = frame_context.size().height(); 1761 auto stride = frame_context.columns() * 8; 1762 1763 if (plane > 0) { 1764 width = (width + frame_context.color_config.subsampling_x) >> frame_context.color_config.subsampling_x; 1765 height = (height + frame_context.color_config.subsampling_y) >> frame_context.color_config.subsampling_y; 1766 stride >>= frame_context.color_config.subsampling_x; 1767 } 1768 1769 auto original_buffer = get_output_buffer(plane); 1770 auto& frame_store_buffer = reference_frame.frame_planes[plane]; 1771 frame_store_buffer.resize_and_keep_capacity(width * height); 1772 1773 for (auto x = 0u; x < width; x++) { 1774 for (auto y = 0u; y < height; y++) { 1775 auto sample = original_buffer[index_from_row_and_column(y, x, stride)]; 1776 frame_store_buffer[index_from_row_and_column(y, x, width)] = sample; 1777 } 1778 } 1779 } 1780 } 1781 } 1782 1783 // 2. If show_existing_frame is equal to 0, the following applies: 1784 if (!frame_context.shows_existing_frame()) { 1785 DECODER_TRY_ALLOC(m_parser->m_previous_block_contexts.try_resize_to_match_other_vector2d(frame_context.block_contexts())); 1786 // − PrevRefFrames[ row ][ col ][ list ] is set equal to RefFrames[ row ][ col ][ list ] for row = 0..MiRows-1, 1787 // for col = 0..MiCols-1, for list = 0..1. 1788 // − PrevMvs[ row ][ col ][ list ][ comp ] is set equal to Mvs[ row ][ col ][ list ][ comp ] for row = 0..MiRows-1, 1789 // for col = 0..MiCols-1, for list = 0..1, for comp = 0..1. 1790 // And from decode_frame(): 1791 // - If all of the following conditions are true, PrevSegmentIds[ row ][ col ] is set equal to 1792 // SegmentIds[ row ][ col ] for row = 0..MiRows-1, for col = 0..MiCols-1: 1793 // − show_existing_frame is equal to 0, 1794 // − segmentation_enabled is equal to 1, 1795 // − segmentation_update_map is equal to 1. 1796 bool keep_segment_ids = !frame_context.shows_existing_frame() && frame_context.segmentation_enabled && frame_context.use_full_segment_id_tree; 1797 frame_context.block_contexts().copy_to(m_parser->m_previous_block_contexts, [keep_segment_ids](FrameBlockContext context) { 1798 auto persistent_context = PersistentBlockContext(context); 1799 if (!keep_segment_ids) 1800 persistent_context.segment_id = 0; 1801 return persistent_context; 1802 }); 1803 } 1804 1805 return {}; 1806} 1807 1808}