1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 11:07:45 +00:00

LibVideo/VP9: Pre-calculate the quantizers at the start of each frame

Quantizers are a constant for the whole frame, except when segment
features override them, in which case they are a constant per segment
ID. We take advantage of this by pre-calculating those after reading
the quantization parameters and segmentation features for a frame.
This results in a small 1.5% improvement (~12.9s -> ~12.7s).
This commit is contained in:
Zaggy1024 2023-04-17 00:27:13 -05:00 committed by Tim Flynn
parent 094b0d8a78
commit 90c0e1ad8f
6 changed files with 104 additions and 70 deletions

View file

@ -32,6 +32,14 @@ enum class FrameShowMode {
DoNotShowFrame, DoNotShowFrame,
}; };
struct Quantizers {
u16 y_ac_quantizer { 0 };
u16 uv_ac_quantizer { 0 };
u16 y_dc_quantizer { 0 };
u16 uv_dc_quantizer { 0 };
};
struct FrameContext { struct FrameContext {
public: public:
static ErrorOr<FrameContext> create(ReadonlyBytes data, static ErrorOr<FrameContext> create(ReadonlyBytes data,
@ -126,15 +134,9 @@ public:
Array<i8, MAX_REF_FRAMES> loop_filter_reference_deltas; Array<i8, MAX_REF_FRAMES> loop_filter_reference_deltas;
Array<i8, 2> loop_filter_mode_deltas; Array<i8, 2> loop_filter_mode_deltas;
u8 base_quantizer_index { 0 }; // Set based on quantization_params( ) in the spec.
i8 y_dc_quantizer_index_delta { 0 }; bool lossless { false };
i8 uv_dc_quantizer_index_delta { 0 }; Array<Quantizers, MAX_SEGMENTS> segment_quantizers;
i8 uv_ac_quantizer_index_delta { 0 };
bool is_lossless() const
{
// From quantization_params( ) in the spec.
return base_quantizer_index == 0 && y_dc_quantizer_index_delta == 0 && uv_dc_quantizer_index_delta == 0 && uv_ac_quantizer_index_delta == 0;
}
bool segmentation_enabled { false }; bool segmentation_enabled { false };
// Note: We can use Optional<Array<...>> for these tree probabilities, but unfortunately it seems to have measurable performance overhead. // Note: We can use Optional<Array<...>> for these tree probabilities, but unfortunately it seems to have measurable performance overhead.

View file

@ -1169,18 +1169,17 @@ inline u16 ac_q(u8 bit_depth, u8 b)
return ac_qlookup[(bit_depth - 8) >> 1][clip_3<u8>(0, 255, b)]; return ac_qlookup[(bit_depth - 8) >> 1][clip_3<u8>(0, 255, b)];
} }
u8 Decoder::get_base_quantizer_index(BlockContext const& block_context) u8 Decoder::get_base_quantizer_index(SegmentFeatureStatus alternative_quantizer_feature, bool should_use_absolute_segment_base_quantizer, u8 base_quantizer_index)
{ {
// The function get_qindex( ) returns the quantizer index for the current block and is specified by the following: // The function get_qindex( ) returns the quantizer index for the current block and is specified by the following:
// If seg_feature_active( SEG_LVL_ALT_Q ) is equal to 1 the following ordered steps apply: // If seg_feature_active( SEG_LVL_ALT_Q ) is equal to 1 the following ordered steps apply:
auto alternative_quantizer_feature = block_context.get_segment_feature(SegmentFeature::UseAlternativeQuantizerBase);
if (alternative_quantizer_feature.enabled) { if (alternative_quantizer_feature.enabled) {
// 1. Set the variable data equal to FeatureData[ segment_id ][ SEG_LVL_ALT_Q ]. // 1. Set the variable data equal to FeatureData[ segment_id ][ SEG_LVL_ALT_Q ].
auto data = alternative_quantizer_feature.value; auto data = alternative_quantizer_feature.value;
// 2. If segmentation_abs_or_delta_update is equal to 0, set data equal to base_q_idx + data // 2. If segmentation_abs_or_delta_update is equal to 0, set data equal to base_q_idx + data
if (!block_context.frame_context.should_use_absolute_segment_base_quantizer) { if (!should_use_absolute_segment_base_quantizer) {
data += block_context.frame_context.base_quantizer_index; data += base_quantizer_index;
} }
// 3. Return Clip3( 0, 255, data ). // 3. Return Clip3( 0, 255, data ).
@ -1188,33 +1187,29 @@ u8 Decoder::get_base_quantizer_index(BlockContext const& block_context)
} }
// Otherwise, return base_q_idx. // Otherwise, return base_q_idx.
return block_context.frame_context.base_quantizer_index; return base_quantizer_index;
} }
u16 Decoder::get_dc_quantizer(BlockContext const& block_context, u8 plane) u16 Decoder::get_dc_quantizer(u8 bit_depth, u8 base, i8 delta)
{ {
// FIXME: The result of this function can be cached. This does not change per frame. // NOTE: Delta is selected by the caller based on whether it is for the Y or UV planes.
// The function get_dc_quant( plane ) returns the quantizer value for the dc coefficient for a particular plane and // The function get_dc_quant( plane ) returns the quantizer value for the dc coefficient for a particular plane and
// is derived as follows: // is derived as follows:
// If plane is equal to 0, return dc_q( get_qindex( ) + delta_q_y_dc ). // If plane is equal to 0, return dc_q( get_qindex( ) + delta_q_y_dc ).
// Otherwise, return dc_q( get_qindex( ) + delta_q_uv_dc ). // Otherwise, return dc_q( get_qindex( ) + delta_q_uv_dc ).
// Instead of if { return }, select the value to add and return. return dc_q(bit_depth, static_cast<u8>(base + delta));
i8 offset = plane == 0 ? block_context.frame_context.y_dc_quantizer_index_delta : block_context.frame_context.uv_dc_quantizer_index_delta;
return dc_q(block_context.frame_context.color_config.bit_depth, static_cast<u8>(get_base_quantizer_index(block_context) + offset));
} }
u16 Decoder::get_ac_quantizer(BlockContext const& block_context, u8 plane) u16 Decoder::get_ac_quantizer(u8 bit_depth, u8 base, i8 delta)
{ {
// FIXME: The result of this function can be cached. This does not change per frame. // NOTE: Delta is selected by the caller based on whether it is for the Y or UV planes.
// The function get_ac_quant( plane ) returns the quantizer value for the ac coefficient for a particular plane and // The function get_ac_quant( plane ) returns the quantizer value for the ac coefficient for a particular plane and
// is derived as follows: // is derived as follows:
// If plane is equal to 0, return ac_q( get_qindex( ) ). // If plane is equal to 0, return ac_q( get_qindex( ) ).
// Otherwise, return ac_q( get_qindex( ) + delta_q_uv_ac ). // Otherwise, return ac_q( get_qindex( ) + delta_q_uv_ac ).
// Instead of if { return }, select the value to add and return. return ac_q(bit_depth, static_cast<u8>(base + delta));
i8 offset = plane == 0 ? 0 : block_context.frame_context.uv_ac_quantizer_index_delta;
return ac_q(block_context.frame_context.color_config.bit_depth, static_cast<u8>(get_base_quantizer_index(block_context) + offset));
} }
DecoderErrorOr<void> Decoder::reconstruct(u8 plane, BlockContext const& block_context, u32 transform_block_x, u32 transform_block_y, TransformSize transform_block_size, TransformSet transform_set) DecoderErrorOr<void> Decoder::reconstruct(u8 plane, BlockContext const& block_context, u32 transform_block_x, u32 transform_block_y, TransformSize transform_block_size, TransformSet transform_set)
@ -1254,18 +1249,15 @@ DecoderErrorOr<void> Decoder::reconstruct_templated(u8 plane, BlockContext const
// 1. Dequant[ i ][ j ] is set equal to ( Tokens[ i * n0 + j ] * get_ac_quant( plane ) ) / dqDenom // 1. Dequant[ i ][ j ] is set equal to ( Tokens[ i * n0 + j ] * get_ac_quant( plane ) ) / dqDenom
// for i = 0..(n0-1), for j = 0..(n0-1) // for i = 0..(n0-1), for j = 0..(n0-1)
Array<Intermediate, block_size * block_size> dequantized; Array<Intermediate, block_size * block_size> dequantized;
Intermediate ac_quant = get_ac_quantizer(block_context, plane); auto quantizers = block_context.frame_context.segment_quantizers[block_context.segment_id];
for (auto i = 0u; i < block_size; i++) { Intermediate ac_quant = plane == 0 ? quantizers.y_ac_quantizer : quantizers.uv_ac_quantizer;
for (auto j = 0u; j < block_size; j++) { auto const* tokens_raw = block_context.residual_tokens.data();
auto index = i * block_size + j; for (u32 i = 0; i < dequantized.size(); i++) {
if (index == 0) dequantized[i] = (tokens_raw[i] * ac_quant) / dq_denominator;
continue;
dequantized[index] = (block_context.residual_tokens[index] * ac_quant) / dq_denominator;
}
} }
// 2. Dequant[ 0 ][ 0 ] is set equal to ( Tokens[ 0 ] * get_dc_quant( plane ) ) / dqDenom // 2. Dequant[ 0 ][ 0 ] is set equal to ( Tokens[ 0 ] * get_dc_quant( plane ) ) / dqDenom
dequantized[0] = (block_context.residual_tokens[0] * get_dc_quantizer(block_context, plane)) / dq_denominator; dequantized[0] = (block_context.residual_tokens[0] * (plane == 0 ? quantizers.y_dc_quantizer : quantizers.uv_dc_quantizer)) / dq_denominator;
// It is a requirement of bitstream conformance that the values written into the Dequant array in steps 1 and 2 // It is a requirement of bitstream conformance that the values written into the Dequant array in steps 1 and 2
// are representable by a signed integer with 8 + BitDepth bits. // are representable by a signed integer with 8 + BitDepth bits.
@ -1819,7 +1811,7 @@ DecoderErrorOr<void> Decoder::inverse_transform_2d(BlockContext const& block_con
// 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal // 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal
// to 2. // to 2.
if (block_context.frame_context.is_lossless()) { if (block_context.frame_context.lossless) {
TRY(inverse_walsh_hadamard_transform(row, log2_of_block_size, 2)); TRY(inverse_walsh_hadamard_transform(row, log2_of_block_size, 2));
continue; continue;
} }
@ -1857,7 +1849,7 @@ DecoderErrorOr<void> Decoder::inverse_transform_2d(BlockContext const& block_con
// 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal // 2. If Lossless is equal to 1, invoke the Inverse WHT process as specified in section 8.7.1.10 with shift equal
// to 0. // to 0.
if (block_context.frame_context.is_lossless()) { if (block_context.frame_context.lossless) {
TRY(inverse_walsh_hadamard_transform(column, log2_of_block_size, 2)); TRY(inverse_walsh_hadamard_transform(column, log2_of_block_size, 2));
continue; continue;
} }
@ -1885,7 +1877,7 @@ DecoderErrorOr<void> Decoder::inverse_transform_2d(BlockContext const& block_con
// 6. Otherwise (Lossless is equal to 0), set Dequant[ i ][ j ] equal to Round2( T[ i ], Min( 6, n + 2 ) ) // 6. Otherwise (Lossless is equal to 0), set Dequant[ i ][ j ] equal to Round2( T[ i ], Min( 6, n + 2 ) )
// for i = 0..(n0-1). // for i = 0..(n0-1).
if (!block_context.frame_context.is_lossless()) { if (!block_context.frame_context.lossless) {
for (auto i = 0u; i < block_size; i++) { for (auto i = 0u; i < block_size; i++) {
auto index = i * block_size + j; auto index = i * block_size + j;
dequantized[index] = rounded_right_shift(dequantized[index], min(6, log2_of_block_size + 2)); dequantized[index] = rounded_right_shift(dequantized[index], min(6, log2_of_block_size + 2));

View file

@ -73,11 +73,11 @@ private:
/* (8.6) Reconstruction and Dequantization */ /* (8.6) Reconstruction and Dequantization */
// Returns the quantizer index for the current block // Returns the quantizer index for the current block
static u8 get_base_quantizer_index(BlockContext const&); static u8 get_base_quantizer_index(SegmentFeatureStatus alternative_quantizer_feature, bool should_use_absolute_segment_base_quantizer, u8 base_quantizer_index);
// Returns the quantizer value for the dc coefficient for a particular plane // Returns the quantizer value for the dc coefficient for a particular plane
static u16 get_dc_quantizer(BlockContext const&, u8 plane); static u16 get_dc_quantizer(u8 bit_depth, u8 base, i8 delta);
// Returns the quantizer value for the ac coefficient for a particular plane // Returns the quantizer value for the ac coefficient for a particular plane
static u16 get_ac_quantizer(BlockContext const&, u8 plane); static u16 get_ac_quantizer(u8 bit_depth, u8 base, i8 delta);
// (8.6.2) Reconstruct process // (8.6.2) Reconstruct process
DecoderErrorOr<void> reconstruct(u8 plane, BlockContext const&, u32 transform_block_x, u32 transform_block_y, TransformSize transform_block_size, TransformSet); DecoderErrorOr<void> reconstruct(u8 plane, BlockContext const&, u32 transform_block_x, u32 transform_block_y, TransformSize transform_block_size, TransformSet);

View file

@ -163,9 +163,9 @@ enum Token : u8 {
enum class SegmentFeature : u8 { enum class SegmentFeature : u8 {
// SEG_LVL_ALT_Q // SEG_LVL_ALT_Q
UseAlternativeQuantizerBase, AlternativeQuantizerBase,
// SEG_LVL_ALT_L // SEG_LVL_ALT_L
UseAlternativeLoopFilterBase, AlternativeLoopFilterBase,
// SEG_LVL_REF_FRAME // SEG_LVL_REF_FRAME
ReferenceFrameOverride, ReferenceFrameOverride,
// SEG_LVL_SKIP // SEG_LVL_SKIP

View file

@ -143,6 +143,39 @@ DecoderErrorOr<VideoFullRangeFlag> Parser::read_video_full_range_flag(BigEndianI
return VideoFullRangeFlag::Studio; return VideoFullRangeFlag::Studio;
} }
template<Signed T = i8>
static ErrorOr<T> read_signed(BigEndianInputBitStream& bit_stream, u8 bits)
{
auto value_unsigned = static_cast<T>(TRY(bit_stream.read_bits(bits)));
if (TRY(bit_stream.read_bit()))
return -value_unsigned;
return value_unsigned;
}
static DecoderErrorOr<i8> read_delta_q(BigEndianInputBitStream& bit_stream)
{
if (TRY_READ(bit_stream.read_bit()))
return TRY_READ(read_signed(bit_stream, 4));
return 0;
}
struct QuantizationParameters {
u8 base_quantizer_index { 0 };
i8 y_dc_quantizer_index_delta { 0 };
i8 uv_dc_quantizer_index_delta { 0 };
i8 uv_ac_quantizer_index_delta { 0 };
};
static DecoderErrorOr<QuantizationParameters> quantization_params(BigEndianInputBitStream& bit_stream)
{
QuantizationParameters result;
result.base_quantizer_index = TRY_READ(bit_stream.read_bits(8));
result.y_dc_quantizer_index_delta = TRY(read_delta_q(bit_stream));
result.uv_dc_quantizer_index_delta = TRY(read_delta_q(bit_stream));
result.uv_ac_quantizer_index_delta = TRY(read_delta_q(bit_stream));
return result;
}
/* (6.2) */ /* (6.2) */
DecoderErrorOr<void> Parser::uncompressed_header(FrameContext& frame_context) DecoderErrorOr<void> Parser::uncompressed_header(FrameContext& frame_context)
{ {
@ -266,8 +299,10 @@ DecoderErrorOr<void> Parser::uncompressed_header(FrameContext& frame_context)
frame_context.probability_context_index = probability_context_index; frame_context.probability_context_index = probability_context_index;
TRY(loop_filter_params(frame_context)); TRY(loop_filter_params(frame_context));
TRY(quantization_params(frame_context)); auto quantization_parameters = TRY(quantization_params(frame_context.bit_stream));
TRY(segmentation_params(frame_context)); TRY(segmentation_params(frame_context));
precalculate_quantizers(frame_context, quantization_parameters);
TRY(parse_tile_counts(frame_context)); TRY(parse_tile_counts(frame_context));
frame_context.header_size_in_bytes = TRY_READ(frame_context.bit_stream.read_bits(16)); frame_context.header_size_in_bytes = TRY_READ(frame_context.bit_stream.read_bits(16));
@ -399,15 +434,6 @@ DecoderErrorOr<InterpolationFilter> Parser::read_interpolation_filter(BigEndianI
return literal_to_type[TRY_READ(bit_stream.read_bits(2))]; return literal_to_type[TRY_READ(bit_stream.read_bits(2))];
} }
template<Signed T = i8>
static ErrorOr<T> read_signed(BigEndianInputBitStream& bit_stream, u8 bits)
{
auto value_unsigned = static_cast<T>(TRY(bit_stream.read_bits(bits)));
if (TRY(bit_stream.read_bit()))
return -value_unsigned;
return value_unsigned;
}
DecoderErrorOr<void> Parser::loop_filter_params(FrameContext& frame_context) DecoderErrorOr<void> Parser::loop_filter_params(FrameContext& frame_context)
{ {
// FIXME: These should be moved to their own struct to return here. // FIXME: These should be moved to their own struct to return here.
@ -433,22 +459,6 @@ DecoderErrorOr<void> Parser::loop_filter_params(FrameContext& frame_context)
return {}; return {};
} }
DecoderErrorOr<void> Parser::quantization_params(FrameContext& frame_context)
{
frame_context.base_quantizer_index = TRY_READ(frame_context.bit_stream.read_bits(8));
frame_context.y_dc_quantizer_index_delta = TRY(read_delta_q(frame_context.bit_stream));
frame_context.uv_dc_quantizer_index_delta = TRY(read_delta_q(frame_context.bit_stream));
frame_context.uv_ac_quantizer_index_delta = TRY(read_delta_q(frame_context.bit_stream));
return {};
}
DecoderErrorOr<i8> Parser::read_delta_q(BigEndianInputBitStream& bit_stream)
{
if (TRY_READ(bit_stream.read_bit()))
return TRY_READ(read_signed(bit_stream, 4));
return 0;
}
DecoderErrorOr<void> Parser::segmentation_params(FrameContext& frame_context) DecoderErrorOr<void> Parser::segmentation_params(FrameContext& frame_context)
{ {
frame_context.segmentation_enabled = TRY_READ(frame_context.bit_stream.read_bit()); frame_context.segmentation_enabled = TRY_READ(frame_context.bit_stream.read_bit());
@ -501,6 +511,35 @@ DecoderErrorOr<u8> Parser::read_prob(BigEndianInputBitStream& bit_stream)
return 255; return 255;
} }
void Parser::precalculate_quantizers(FrameContext& frame_context, QuantizationParameters quantization_parameters)
{
frame_context.lossless = quantization_parameters.base_quantizer_index == 0
&& quantization_parameters.y_dc_quantizer_index_delta == 0
&& quantization_parameters.uv_dc_quantizer_index_delta == 0
&& quantization_parameters.uv_ac_quantizer_index_delta == 0;
// Pre-calculate the quantizers so that the decoder doesn't have to do it repeatedly.
for (u8 segment_id = 0; segment_id < MAX_SEGMENTS; segment_id++) {
auto alternative_quantizer_feature = frame_context.get_segment_feature(segment_id, SegmentFeature::AlternativeQuantizerBase);
auto base = Decoder::get_base_quantizer_index(alternative_quantizer_feature, frame_context.should_use_absolute_segment_base_quantizer, quantization_parameters.base_quantizer_index);
// The function get_ac_quant( plane ) returns the quantizer value for the ac coefficient for a particular plane and
// is derived as follows:
// If plane is equal to 0, return ac_q( get_qindex( ) ).
// Otherwise, return ac_q( get_qindex( ) + delta_q_uv_ac ).
auto& current_quantizers = frame_context.segment_quantizers[segment_id];
current_quantizers.y_ac_quantizer = Decoder::get_ac_quantizer(frame_context.color_config.bit_depth, base, 0);
current_quantizers.uv_ac_quantizer = Decoder::get_ac_quantizer(frame_context.color_config.bit_depth, base, quantization_parameters.uv_ac_quantizer_index_delta);
// The function get_dc_quant( plane ) returns the quantizer value for the dc coefficient for a particular plane and
// is derived as follows:
// If plane is equal to 0, return dc_q( get_qindex( ) + delta_q_y_dc ).
// Otherwise, return dc_q( get_qindex( ) + delta_q_uv_dc ).
current_quantizers.y_dc_quantizer = Decoder::get_dc_quantizer(frame_context.color_config.bit_depth, base, quantization_parameters.y_dc_quantizer_index_delta);
current_quantizers.uv_dc_quantizer = Decoder::get_dc_quantizer(frame_context.color_config.bit_depth, base, quantization_parameters.uv_dc_quantizer_index_delta);
}
}
static u16 calc_min_log2_of_tile_columns(u32 superblock_columns) static u16 calc_min_log2_of_tile_columns(u32 superblock_columns)
{ {
auto min_log_2 = 0u; auto min_log_2 = 0u;
@ -577,7 +616,7 @@ DecoderErrorOr<void> Parser::compressed_header(FrameContext& frame_context)
DecoderErrorOr<TransformMode> Parser::read_tx_mode(BooleanDecoder& decoder, FrameContext const& frame_context) DecoderErrorOr<TransformMode> Parser::read_tx_mode(BooleanDecoder& decoder, FrameContext const& frame_context)
{ {
if (frame_context.is_lossless()) { if (frame_context.lossless) {
return TransformMode::Only_4x4; return TransformMode::Only_4x4;
} }
@ -1373,7 +1412,7 @@ static TransformSet select_transform_type(BlockContext const& block_context, u8
if (plane > 0 || transform_size == Transform_32x32) if (plane > 0 || transform_size == Transform_32x32)
return TransformSet { TransformType::DCT, TransformType::DCT }; return TransformSet { TransformType::DCT, TransformType::DCT };
if (transform_size == Transform_4x4) { if (transform_size == Transform_4x4) {
if (block_context.frame_context.is_lossless() || block_context.is_inter_predicted()) if (block_context.frame_context.lossless || block_context.is_inter_predicted())
return TransformSet { TransformType::DCT, TransformType::DCT }; return TransformSet { TransformType::DCT, TransformType::DCT };
return mode_to_txfm_map[to_underlying(block_context.size < Block_8x8 ? block_context.sub_block_prediction_modes[block_index] : block_context.y_prediction_mode())]; return mode_to_txfm_map[to_underlying(block_context.size < Block_8x8 ? block_context.sub_block_prediction_modes[block_index] : block_context.y_prediction_mode())];

View file

@ -30,6 +30,7 @@ struct FrameContext;
struct TileContext; struct TileContext;
struct BlockContext; struct BlockContext;
struct MotionVectorCandidate; struct MotionVectorCandidate;
struct QuantizationParameters;
class Parser { class Parser {
friend class TreeParser; friend class TreeParser;
@ -61,10 +62,10 @@ private:
DecoderErrorOr<void> compute_image_size(FrameContext&); DecoderErrorOr<void> compute_image_size(FrameContext&);
DecoderErrorOr<InterpolationFilter> read_interpolation_filter(BigEndianInputBitStream&); DecoderErrorOr<InterpolationFilter> read_interpolation_filter(BigEndianInputBitStream&);
DecoderErrorOr<void> loop_filter_params(FrameContext&); DecoderErrorOr<void> loop_filter_params(FrameContext&);
DecoderErrorOr<void> quantization_params(FrameContext&);
DecoderErrorOr<i8> read_delta_q(BigEndianInputBitStream&); DecoderErrorOr<i8> read_delta_q(BigEndianInputBitStream&);
DecoderErrorOr<void> segmentation_params(FrameContext&); DecoderErrorOr<void> segmentation_params(FrameContext&);
DecoderErrorOr<u8> read_prob(BigEndianInputBitStream&); DecoderErrorOr<u8> read_prob(BigEndianInputBitStream&);
static void precalculate_quantizers(FrameContext& frame_context, QuantizationParameters quantization_parameters);
DecoderErrorOr<void> parse_tile_counts(FrameContext&); DecoderErrorOr<void> parse_tile_counts(FrameContext&);
void setup_past_independence(); void setup_past_independence();