From 316dad7bf79a32ab94f6f8e236c03e2693e2ee4e Mon Sep 17 00:00:00 2001
From: Zaggy1024 <zaggy1024@gmail.com>
Date: Sun, 27 Nov 2022 00:41:09 -0600
Subject: [PATCH] LibVideo/VP9: Remove m_tokens and m_token_cache from Parser

Only the residual tokens array needs to be kept for the transforms to
use after all the tokens have been parsed. The token cache is able to
be kept in the stack only for the duration of the token parsing loop.
---
 Userland/Libraries/LibVideo/VP9/Context.h     |  2 +
 Userland/Libraries/LibVideo/VP9/Decoder.cpp   |  4 +-
 Userland/Libraries/LibVideo/VP9/Parser.cpp    | 45 ++++++++++---------
 Userland/Libraries/LibVideo/VP9/Parser.h      |  4 +-
 .../Libraries/LibVideo/VP9/TreeParser.cpp     |  2 +-
 Userland/Libraries/LibVideo/VP9/TreeParser.h  |  2 +-
 6 files changed, 32 insertions(+), 27 deletions(-)
diff --git a/Userland/Libraries/LibVideo/VP9/Context.h b/Userland/Libraries/LibVideo/VP9/Context.h
index 4a8186d9f8..12619a1b85 100644
--- a/Userland/Libraries/LibVideo/VP9/Context.h
+++ b/Userland/Libraries/LibVideo/VP9/Context.h
@@ -202,6 +202,8 @@ struct BlockContext {
 
     InterpolationFilter interpolation_filter { EightTap };
     Array<MotionVectorPair, 4> sub_block_motion_vectors;
+
+    Array<i32, 1024> residual_tokens;
 };
 
 struct BlockMotionVectorCandidateSet {
diff --git a/Userland/Libraries/LibVideo/VP9/Decoder.cpp b/Userland/Libraries/LibVideo/VP9/Decoder.cpp
index 745b391d35..c281a8a05d 100644
--- a/Userland/Libraries/LibVideo/VP9/Decoder.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Decoder.cpp
@@ -1087,12 +1087,12 @@ DecoderErrorOr<void> Decoder::reconstruct(u8 plane, BlockContext const& block_co
             auto index = index_from_row_and_column(i, j, block_size);
             if (index == 0)
                 continue;
-            dequantized[index] = (m_parser->m_tokens[index] * ac_quant) / dq_denominator;
+            dequantized[index] = (block_context.residual_tokens[index] * ac_quant) / dq_denominator;
         }
     }
 
     // 2. Dequant[ 0 ][ 0 ] is set equal to ( Tokens[ 0 ] * get_dc_quant( plane ) ) / dqDenom
-    dequantized[0] = (m_parser->m_tokens[0] * get_dc_quantizer(block_context, plane)) / dq_denominator;
+    dequantized[0] = (block_context.residual_tokens[0] * get_dc_quantizer(block_context, plane)) / dq_denominator;
 
     // It is a requirement of bitstream conformance that the values written into the Dequant array in steps 1 and 2
     // are representable by a signed integer with 8 + BitDepth bits.
diff --git a/Userland/Libraries/LibVideo/VP9/Parser.cpp b/Userland/Libraries/LibVideo/VP9/Parser.cpp
index 801f5b475b..b14231934f 100644
--- a/Userland/Libraries/LibVideo/VP9/Parser.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Parser.cpp
@@ -1353,6 +1353,7 @@ static TransformSet select_transform_type(BlockContext const& block_context, u8
 DecoderErrorOr<bool> Parser::residual(BlockContext& block_context, bool has_block_above, bool has_block_left)
 {
     bool block_had_non_zero_tokens = false;
+    Array<u8, 1024> token_cache;
     for (u8 plane = 0; plane < 3; plane++) {
         auto plane_subsampling_x = (plane > 0) ? block_context.frame_context.color_config.subsampling_x : 0;
         auto plane_subsampling_y = (plane > 0) ? block_context.frame_context.color_config.subsampling_y : 0;
@@ -1390,7 +1391,7 @@ DecoderErrorOr<bool> Parser::residual(BlockContext& block_context, bool has_bloc
                         TRY(m_decoder.predict_intra(plane, block_context, transform_x_in_px, transform_y_in_px, has_block_left || x > 0, has_block_above || y > 0, (x + transform_size_in_sub_blocks) < block_size_in_sub_blocks.width(), transform_size, sub_block_index));
                     if (!block_context.should_skip_residuals) {
                         auto transform_set = select_transform_type(block_context, plane, transform_size, sub_block_index);
-                        sub_block_had_non_zero_tokens = TRY(tokens(block_context, plane, transform_x_in_px, transform_y_in_px, transform_size, transform_set));
+                        sub_block_had_non_zero_tokens = TRY(tokens(block_context, plane, transform_x_in_px, transform_y_in_px, transform_size, transform_set, token_cache));
                         block_had_non_zero_tokens = block_had_non_zero_tokens || sub_block_had_non_zero_tokens;
                         TRY(m_decoder.reconstruct(plane, block_context, transform_x_in_px, transform_y_in_px, transform_size, transform_set));
                     }
@@ -1444,35 +1445,37 @@ static u16 const* get_scan(TransformSize transform_size, TransformSet transform_
     return default_scan_32x32;
 }
 
-DecoderErrorOr<bool> Parser::tokens(BlockContext& block_context, size_t plane, u32 start_x, u32 start_y, TransformSize transform_size, TransformSet transform_set)
+DecoderErrorOr<bool> Parser::tokens(BlockContext& block_context, size_t plane, u32 start_x, u32 start_y, TransformSize transform_size, TransformSet transform_set, Array<u8, 1024> token_cache)
 {
-    u16 segment_eob = 16 << (transform_size << 1);
+    block_context.residual_tokens.fill(0);
+
     auto const* scan = get_scan(transform_size, transform_set);
-    auto check_eob = true;
+
+    auto check_for_more_coefficients = true;
     u16 coef_index = 0;
+    u16 segment_eob = 16 << (transform_size << 1);
     for (; coef_index < segment_eob; coef_index++) {
         auto pos = scan[coef_index];
         auto band = (transform_size == Transform_4x4) ? coefband_4x4[coef_index] : coefband_8x8plus[coef_index];
-        auto tokens_context = TreeParser::get_tokens_context(block_context.frame_context.color_config.subsampling_x, block_context.frame_context.color_config.subsampling_y, block_context.frame_context.rows(), block_context.frame_context.columns(), m_above_nonzero_context, m_left_nonzero_context, m_token_cache, transform_size, transform_set, plane, start_x, start_y, pos, block_context.is_inter_predicted(), band, coef_index);
-        if (check_eob) {
-            auto more_coefs = TRY_READ(TreeParser::parse_more_coefficients(*m_bit_stream, *m_probability_tables, *m_syntax_element_counter, tokens_context));
-            if (!more_coefs)
-                break;
-        }
+        auto tokens_context = TreeParser::get_tokens_context(block_context.frame_context.color_config.subsampling_x, block_context.frame_context.color_config.subsampling_y, block_context.frame_context.rows(), block_context.frame_context.columns(), m_above_nonzero_context, m_left_nonzero_context, token_cache, transform_size, transform_set, plane, start_x, start_y, pos, block_context.is_inter_predicted(), band, coef_index);
+
+        if (check_for_more_coefficients && !TRY_READ(TreeParser::parse_more_coefficients(*m_bit_stream, *m_probability_tables, *m_syntax_element_counter, tokens_context)))
+            break;
+
         auto token = TRY_READ(TreeParser::parse_token(*m_bit_stream, *m_probability_tables, *m_syntax_element_counter, tokens_context));
-        m_token_cache[pos] = energy_class[token];
+        token_cache[pos] = energy_class[token];
+
+        i32 coef;
         if (token == ZeroToken) {
-            m_tokens[pos] = 0;
-            check_eob = false;
+            coef = 0;
+            check_for_more_coefficients = false;
         } else {
-            i32 coef = TRY(read_coef(block_context.frame_context.color_config.bit_depth, token));
-            bool sign_bit = TRY_READ(m_bit_stream->read_literal(1));
-            m_tokens[pos] = sign_bit ? -coef : coef;
-            check_eob = true;
+            coef = TRY(read_coef(block_context.frame_context.color_config.bit_depth, token));
+            check_for_more_coefficients = true;
         }
+        block_context.residual_tokens[pos] = coef;
     }
-    for (u16 i = coef_index; i < segment_eob; i++)
-        m_tokens[scan[i]] = 0;
+
     return coef_index > 0;
 }
 
@@ -1480,7 +1483,7 @@ DecoderErrorOr<i32> Parser::read_coef(u8 bit_depth, Token token)
 {
     auto cat = extra_bits[token][0];
     auto num_extra = extra_bits[token][1];
-    u32 coef = extra_bits[token][2];
+    i32 coef = extra_bits[token][2];
     if (token == DctValCat6) {
         for (size_t e = 0; e < (u8)(bit_depth - 8); e++) {
             auto high_bit = TRY_READ(m_bit_stream->read_bool(255));
@@ -1491,6 +1494,8 @@ DecoderErrorOr<i32> Parser::read_coef(u8 bit_depth, Token token)
         auto coef_bit = TRY_READ(m_bit_stream->read_bool(cat_probs[cat][e]));
         coef += coef_bit << (num_extra - 1 - e);
     }
+    bool sign_bit = TRY_READ(m_bit_stream->read_literal(1));
+    coef = sign_bit ? -coef : coef;
     return coef;
 }
 
diff --git a/Userland/Libraries/LibVideo/VP9/Parser.h b/Userland/Libraries/LibVideo/VP9/Parser.h
index 8893f084e2..e48407b34c 100644
--- a/Userland/Libraries/LibVideo/VP9/Parser.h
+++ b/Userland/Libraries/LibVideo/VP9/Parser.h
@@ -122,7 +122,7 @@ private:
     DecoderErrorOr<MotionVector> read_motion_vector(BlockContext const&, BlockMotionVectorCandidates const&, ReferenceIndex);
     DecoderErrorOr<i32> read_single_motion_vector_component(u8 component);
     DecoderErrorOr<bool> residual(BlockContext&, bool has_block_above, bool has_block_left);
-    DecoderErrorOr<bool> tokens(BlockContext&, size_t plane, u32 x, u32 y, TransformSize, TransformSet);
+    DecoderErrorOr<bool> tokens(BlockContext&, size_t plane, u32 x, u32 y, TransformSize, TransformSet, Array<u8, 1024> token_cache);
     DecoderErrorOr<i32> read_coef(u8 bit_depth, Token token);
 
     /* (6.5) Motion Vector Prediction */
@@ -162,8 +162,6 @@ private:
 
     Vector<u16> m_frame_store[NUM_REF_FRAMES][3];
 
-    u8 m_token_cache[1024];
-    i32 m_tokens[1024];
     bool m_use_hp { false };
 
     bool m_use_prev_frame_mvs;
diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp
index a236c61a21..5c8c571cdb 100644
--- a/Userland/Libraries/LibVideo/VP9/TreeParser.cpp
+++ b/Userland/Libraries/LibVideo/VP9/TreeParser.cpp
@@ -624,7 +624,7 @@ ErrorOr<bool> TreeParser::parse_motion_vector_hp(BitStream& bit_stream, Probabil
     return value;
 }
 
-TokensContext TreeParser::get_tokens_context(bool subsampling_x, bool subsampling_y, u32 rows, u32 columns, Array<Vector<bool>, 3> const& above_nonzero_context, Array<Vector<bool>, 3> const& left_nonzero_context, u8 token_cache[1024], TransformSize transform_size, TransformSet transform_set, u8 plane, u32 start_x, u32 start_y, u16 position, bool is_inter, u8 band, u16 coef_index)
+TokensContext TreeParser::get_tokens_context(bool subsampling_x, bool subsampling_y, u32 rows, u32 columns, Array<Vector<bool>, 3> const& above_nonzero_context, Array<Vector<bool>, 3> const& left_nonzero_context, Array<u8, 1024> token_cache, TransformSize transform_size, TransformSet transform_set, u8 plane, u32 start_x, u32 start_y, u16 position, bool is_inter, u8 band, u16 coef_index)
 {
     u8 context;
     if (coef_index == 0) {
diff --git a/Userland/Libraries/LibVideo/VP9/TreeParser.h b/Userland/Libraries/LibVideo/VP9/TreeParser.h
index 52c97d1adf..0fd29241aa 100644
--- a/Userland/Libraries/LibVideo/VP9/TreeParser.h
+++ b/Userland/Libraries/LibVideo/VP9/TreeParser.h
@@ -86,7 +86,7 @@ public:
     static ErrorOr<u8> parse_motion_vector_fr(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, u8 component);
     static ErrorOr<bool> parse_motion_vector_hp(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, u8 component, bool use_hp);
 
-    static TokensContext get_tokens_context(bool subsampling_x, bool subsampling_y, u32 rows, u32 columns, Array<Vector<bool>, 3> const& above_nonzero_context, Array<Vector<bool>, 3> const& left_nonzero_context, u8 token_cache[1024], TransformSize, TransformSet, u8 plane, u32 start_x, u32 start_y, u16 position, bool is_inter, u8 band, u16 coef_index);
+    static TokensContext get_tokens_context(bool subsampling_x, bool subsampling_y, u32 rows, u32 columns, Array<Vector<bool>, 3> const& above_nonzero_context, Array<Vector<bool>, 3> const& left_nonzero_context, Array<u8, 1024> token_cache, TransformSize, TransformSet, u8 plane, u32 start_x, u32 start_y, u16 position, bool is_inter, u8 band, u16 coef_index);
     static ErrorOr<bool> parse_more_coefficients(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, TokensContext const& context);
     static ErrorOr<Token> parse_token(BitStream&, ProbabilityTables const&, SyntaxElementCounter&, TokensContext const& context);
 };