mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 12:07:45 +00:00
LibVideo/VP9: Make a lookup table for bit reversals
Bit reversals are used very often in intra-predicted frames. Turning these into a constexpr lookup table reduces the branching needed for block transforms significantly. This reduces the times spent decoding an intra-heavy 1080p video by about 9% (~14.3s -> ~12.9s).
This commit is contained in:
parent
f6764beead
commit
6e6cc1ddb2
2 changed files with 21 additions and 10 deletions
|
@ -1400,7 +1400,7 @@ inline DecoderErrorOr<void> Decoder::inverse_discrete_cosine_transform_array_per
|
||||||
|
|
||||||
// 1.2. T[ i ] is set equal to copyT[ brev( n, i ) ] for i = 0..((1<<n) - 1).
|
// 1.2. T[ i ] is set equal to copyT[ brev( n, i ) ] for i = 0..((1<<n) - 1).
|
||||||
for (auto i = 0u; i < block_size; i++)
|
for (auto i = 0u; i < block_size; i++)
|
||||||
data[i] = data_copy[brev(log2_of_block_size, i)];
|
data[i] = data_copy[brev<log2_of_block_size>(i)];
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
@ -1432,7 +1432,7 @@ inline DecoderErrorOr<void> Decoder::inverse_discrete_cosine_transform(Span<Inte
|
||||||
// 2.6 Invoke B( n1+i, n0-1-i, 32-brev( 5, n1+i), 0 ) for i = 0..(n2-1).
|
// 2.6 Invoke B( n1+i, n0-1-i, 32-brev( 5, n1+i), 0 ) for i = 0..(n2-1).
|
||||||
for (auto i = 0u; i < quarter_block_size; i++) {
|
for (auto i = 0u; i < quarter_block_size; i++) {
|
||||||
auto index = half_block_size + i;
|
auto index = half_block_size + i;
|
||||||
butterfly_rotation_in_place(data, index, block_size - 1 - i, 32 - brev(5, index), false);
|
butterfly_rotation_in_place(data, index, block_size - 1 - i, 32 - brev<5>(index), false);
|
||||||
}
|
}
|
||||||
|
|
||||||
// 2.7 If n is greater than or equal to 3:
|
// 2.7 If n is greater than or equal to 3:
|
||||||
|
|
|
@ -34,15 +34,26 @@ u16 clip_1(u8 bit_depth, T x)
|
||||||
return x;
|
return x;
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, typename C>
|
template<u8 bits>
|
||||||
inline T brev(C bit_count, T value)
|
inline u8 brev(u8 value)
|
||||||
{
|
{
|
||||||
T result = 0;
|
static_assert(bits <= 8, "brev() expects an 8-bit value.");
|
||||||
for (C i = 0; i < bit_count; i++) {
|
|
||||||
auto bit = (value >> i) & 1;
|
static constexpr auto lookup_table = [] {
|
||||||
result |= bit << (bit_count - 1 - i);
|
constexpr size_t value_count = 1 << bits;
|
||||||
|
Array<u8, value_count> the_table;
|
||||||
|
for (u8 lookup_value = 0; lookup_value < value_count; lookup_value++) {
|
||||||
|
u8 reversed = 0;
|
||||||
|
for (u8 bit_index = 0; bit_index < bits; bit_index++) {
|
||||||
|
auto bit = (lookup_value >> bit_index) & 1;
|
||||||
|
reversed |= bit << (bits - 1 - bit_index);
|
||||||
}
|
}
|
||||||
return result;
|
the_table[lookup_value] = reversed;
|
||||||
|
}
|
||||||
|
return the_table;
|
||||||
|
}();
|
||||||
|
|
||||||
|
return lookup_table[value];
|
||||||
}
|
}
|
||||||
|
|
||||||
inline BlockSubsize get_subsampled_block_size(BlockSubsize size, bool subsampling_x, bool subsampling_y)
|
inline BlockSubsize get_subsampled_block_size(BlockSubsize size, bool subsampling_x, bool subsampling_y)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue