From 06082d310f9664dbe816bda1e031013a3943d055 Mon Sep 17 00:00:00 2001 From: Zaggy1024 Date: Mon, 28 Nov 2022 00:15:53 -0600 Subject: [PATCH] LibVideo/VP9: Split/clean up the token tree-parsing context function Since the context information for parsing residual tokens changes based on whether we're parsing the first coefficient or subsequent ones, the TreeParser::get_tokens_context function was split into two new ones to allow them to read more cleanly. All variables now have meaningful names to aid in readability as well. The math used in the function for the first token was changed to be more friendly to tile- or block-specific coordinates to facilitate range-restricted Spans of the above and left context arrays. --- Userland/Libraries/LibVideo/VP9/Parser.cpp | 20 ++-- .../Libraries/LibVideo/VP9/TreeParser.cpp | 96 ++++++++++--------- Userland/Libraries/LibVideo/VP9/TreeParser.h | 4 +- 3 files changed, 68 insertions(+), 52 deletions(-) diff --git a/Userland/Libraries/LibVideo/VP9/Parser.cpp b/Userland/Libraries/LibVideo/VP9/Parser.cpp index 6753f5b2c8..c9dd89c6a4 100644 --- a/Userland/Libraries/LibVideo/VP9/Parser.cpp +++ b/Userland/Libraries/LibVideo/VP9/Parser.cpp @@ -1391,7 +1391,7 @@ DecoderErrorOr Parser::residual(BlockContext& block_context, bool has_bloc TRY(m_decoder.predict_intra(plane, block_context, transform_x_in_px, transform_y_in_px, has_block_left || x > 0, has_block_above || y > 0, (x + transform_size_in_sub_blocks) < block_size_in_sub_blocks.width(), transform_size, sub_block_index)); if (!block_context.should_skip_residuals) { auto transform_set = select_transform_type(block_context, plane, transform_size, sub_block_index); - sub_block_had_non_zero_tokens = TRY(tokens(block_context, plane, transform_x_in_px, transform_y_in_px, transform_size, transform_set, token_cache)); + sub_block_had_non_zero_tokens = TRY(tokens(block_context, plane, x, y, transform_size, transform_set, token_cache)); block_had_non_zero_tokens = block_had_non_zero_tokens || sub_block_had_non_zero_tokens; TRY(m_decoder.reconstruct(plane, block_context, transform_x_in_px, transform_y_in_px, transform_size, transform_set)); } @@ -1445,7 +1445,7 @@ static u16 const* get_scan(TransformSize transform_size, TransformSet transform_ return default_scan_32x32; } -DecoderErrorOr Parser::tokens(BlockContext& block_context, size_t plane, u32 start_x, u32 start_y, TransformSize transform_size, TransformSet transform_set, Array token_cache) +DecoderErrorOr Parser::tokens(BlockContext& block_context, size_t plane, u32 sub_block_column, u32 sub_block_row, TransformSize transform_size, TransformSet transform_set, Array token_cache) { block_context.residual_tokens.fill(0); @@ -1453,17 +1453,21 @@ DecoderErrorOr Parser::tokens(BlockContext& block_context, size_t plane, u auto check_for_more_coefficients = true; u16 coef_index = 0; - u16 segment_eob = 16 << (transform_size << 1); - for (; coef_index < segment_eob; coef_index++) { - auto pos = scan[coef_index]; + u16 transform_pixel_count = 16 << (transform_size << 1); + for (; coef_index < transform_pixel_count; coef_index++) { auto band = (transform_size == Transform_4x4) ? coefband_4x4[coef_index] : coefband_8x8plus[coef_index]; - auto tokens_context = TreeParser::get_tokens_context(block_context.frame_context.color_config.subsampling_x, block_context.frame_context.color_config.subsampling_y, block_context.frame_context.rows(), block_context.frame_context.columns(), m_above_nonzero_context, m_left_nonzero_context, token_cache, transform_size, transform_set, plane, start_x, start_y, pos, block_context.is_inter_predicted(), band, coef_index); + auto token_position = scan[coef_index]; + TokensContext tokens_context; + if (coef_index == 0) + tokens_context = TreeParser::get_context_for_first_token(block_context, m_above_nonzero_context, m_left_nonzero_context, transform_size, plane, sub_block_column, sub_block_row, block_context.is_inter_predicted(), band); + else + tokens_context = TreeParser::get_context_for_other_tokens(token_cache, transform_size, transform_set, plane, token_position, block_context.is_inter_predicted(), band); if (check_for_more_coefficients && !TRY_READ(TreeParser::parse_more_coefficients(*m_bit_stream, *m_probability_tables, *m_syntax_element_counter, tokens_context))) break; auto token = TRY_READ(TreeParser::parse_token(*m_bit_stream, *m_probability_tables, *m_syntax_element_counter, tokens_context)); - token_cache[pos] = energy_class[token]; + token_cache[token_position] = energy_class[token]; i32 coef; if (token == ZeroToken) { @@ -1473,7 +1477,7 @@ DecoderErrorOr Parser::tokens(BlockContext& block_context, size_t plane, u coef = TRY(read_coef(block_context.frame_context.color_config.bit_depth, token)); check_for_more_coefficients = true; } - block_context.residual_tokens[pos] = coef; + block_context.residual_tokens[token_position] = coef; } return coef_index > 0; diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp index 5c8c571cdb..a1dcb1dfaf 100644 --- a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp +++ b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp @@ -7,10 +7,12 @@ #include +#include "Context.h" #include "Enums.h" #include "LookupTables.h" #include "Parser.h" #include "TreeParser.h" +#include "Utilities.h" namespace Video::VP9 { @@ -624,54 +626,62 @@ ErrorOr TreeParser::parse_motion_vector_hp(BitStream& bit_stream, Probabil return value; } -TokensContext TreeParser::get_tokens_context(bool subsampling_x, bool subsampling_y, u32 rows, u32 columns, Array, 3> const& above_nonzero_context, Array, 3> const& left_nonzero_context, Array token_cache, TransformSize transform_size, TransformSet transform_set, u8 plane, u32 start_x, u32 start_y, u16 position, bool is_inter, u8 band, u16 coef_index) +TokensContext TreeParser::get_context_for_first_token(BlockContext const& block_context, Array, 3> const& above_non_zero_tokens, Array, 3> const& left_non_zero_tokens, TransformSize transform_size, u8 plane, u32 sub_block_column, u32 sub_block_row, bool is_inter, u8 band) { - u8 context; - if (coef_index == 0) { - auto sx = plane > 0 ? subsampling_x : false; - auto sy = plane > 0 ? subsampling_y : false; - auto max_x = (2 * columns) >> sx; - auto max_y = (2 * rows) >> sy; - u8 numpts = 1 << transform_size; - auto x4 = start_x >> 2; - auto y4 = start_y >> 2; - u32 above = 0; - u32 left = 0; - for (size_t i = 0; i < numpts; i++) { - if (x4 + i < max_x) - above |= above_nonzero_context[plane][x4 + i]; - if (y4 + i < max_y) - left |= left_nonzero_context[plane][y4 + i]; + auto subsampling_x = plane > 0 ? block_context.frame_context.color_config.subsampling_x : false; + auto subsampling_y = plane > 0 ? block_context.frame_context.color_config.subsampling_y : false; + auto transform_top_in_sub_blocks = (blocks_to_sub_blocks(block_context.row) >> subsampling_y) + sub_block_row; + auto transform_left_in_sub_blocks = (blocks_to_sub_blocks(block_context.column) >> subsampling_x) + sub_block_column; + u8 transform_size_in_sub_blocks = transform_size_to_sub_blocks(transform_size); + bool above_has_non_zero_tokens = false; + for (u8 x = 0; x < transform_size_in_sub_blocks && x < above_non_zero_tokens[plane].size() - transform_left_in_sub_blocks; x++) { + if (above_non_zero_tokens[plane][transform_left_in_sub_blocks + x]) { + above_has_non_zero_tokens = true; + break; } - context = above + left; - } else { - u32 neighbor_0, neighbor_1; - auto n = 4 << transform_size; - auto i = position / n; - auto j = position % n; - auto a = i > 0 ? (i - 1) * n + j : 0; - auto a2 = i * n + j - 1; - if (i > 0 && j > 0) { - if (transform_set == TransformSet { TransformType::DCT, TransformType::ADST }) { - neighbor_0 = a; - neighbor_1 = a; - } else if (transform_set == TransformSet { TransformType::ADST, TransformType::DCT }) { - neighbor_0 = a2; - neighbor_1 = a2; - } else { - neighbor_0 = a; - neighbor_1 = a2; - } - } else if (i > 0) { - neighbor_0 = a; - neighbor_1 = a; - } else { - neighbor_0 = a2; - neighbor_1 = a2; + } + bool left_has_non_zero_tokens = false; + for (u8 y = 0; y < transform_size_in_sub_blocks && y < left_non_zero_tokens[plane].size() - transform_top_in_sub_blocks; y++) { + if (left_non_zero_tokens[plane][transform_top_in_sub_blocks + y]) { + left_has_non_zero_tokens = true; + break; } - context = (1 + token_cache[neighbor_0] + token_cache[neighbor_1]) >> 1; } + u8 context = above_has_non_zero_tokens + left_has_non_zero_tokens; + return TokensContext { transform_size, plane > 0, is_inter, band, context }; +} + +TokensContext TreeParser::get_context_for_other_tokens(Array token_cache, TransformSize transform_size, TransformSet transform_set, u8 plane, u16 token_position, bool is_inter, u8 band) +{ + auto transform_size_in_pixels = sub_blocks_to_pixels(transform_size_to_sub_blocks(transform_size)); + auto log2_of_transform_size = transform_size + 2; + auto pixel_y = token_position >> log2_of_transform_size; + auto pixel_x = token_position - (pixel_y << log2_of_transform_size); + auto above_token_energy = pixel_y > 0 ? (pixel_y - 1) * transform_size_in_pixels + pixel_x : 0; + auto left_token_energy = pixel_y * transform_size_in_pixels + pixel_x - 1; + + u32 neighbor_a, neighbor_b; + if (pixel_y > 0 && pixel_x > 0) { + if (transform_set == TransformSet { TransformType::DCT, TransformType::ADST }) { + neighbor_a = above_token_energy; + neighbor_b = above_token_energy; + } else if (transform_set == TransformSet { TransformType::ADST, TransformType::DCT }) { + neighbor_a = left_token_energy; + neighbor_b = left_token_energy; + } else { + neighbor_a = above_token_energy; + neighbor_b = left_token_energy; + } + } else if (pixel_y > 0) { + neighbor_a = above_token_energy; + neighbor_b = above_token_energy; + } else { + neighbor_a = left_token_energy; + neighbor_b = left_token_energy; + } + + u8 context = (1 + token_cache[neighbor_a] + token_cache[neighbor_b]) >> 1; return TokensContext { transform_size, plane > 0, is_inter, band, context }; } diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.h b/Userland/Libraries/LibVideo/VP9/TreeParser.h index 0fd29241aa..349a4d4f16 100644 --- a/Userland/Libraries/LibVideo/VP9/TreeParser.h +++ b/Userland/Libraries/LibVideo/VP9/TreeParser.h @@ -17,6 +17,7 @@ namespace Video::VP9 { class Parser; +struct BlockContext; struct FrameBlockContext; struct TokensContext { @@ -86,7 +87,8 @@ public: static ErrorOr parse_motion_vector_fr(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, u8 component); static ErrorOr parse_motion_vector_hp(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, u8 component, bool use_hp); - static TokensContext get_tokens_context(bool subsampling_x, bool subsampling_y, u32 rows, u32 columns, Array, 3> const& above_nonzero_context, Array, 3> const& left_nonzero_context, Array token_cache, TransformSize, TransformSet, u8 plane, u32 start_x, u32 start_y, u16 position, bool is_inter, u8 band, u16 coef_index); + static TokensContext get_context_for_first_token(BlockContext const& block_context, Array, 3> const& above_non_zero_tokens, Array, 3> const& left_non_zero_tokens, TransformSize transform_size, u8 plane, u32 sub_block_column, u32 sub_block_row, bool is_inter, u8 band); + static TokensContext get_context_for_other_tokens(Array token_cache, TransformSize transform_size, TransformSet transform_set, u8 plane, u16 token_position, bool is_inter, u8 band); static ErrorOr parse_more_coefficients(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, TokensContext const& context); static ErrorOr parse_token(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, TokensContext const& context); };