LibVideo/VP9: Force inlining of inverse_transform_2d() and the IDCT

Clang was reluctant to inline these for some reason. However, inlining them seems to be quite beneficial, reducing decoding time in an intra- heavy video by about 21% (~12.7s -> ~10.0s).
2025-09-16 11:16:17 +00:00 · 2023-04-18 22:54:56 -05:00 · 2023-04-18 22:54:56 -05:00 · d6b867ba89
commit d6b867ba89
parent 90c0e1ad8f
1 changed files with 2 additions and 2 deletions
--- a/Userland/Libraries/LibVideo/VP9/Decoder.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Decoder.cpp
@ -1399,7 +1399,7 @@ inline DecoderErrorOr<void> Decoder::inverse_discrete_cosine_transform_array_per
 }

 template<u8 log2_of_block_size>
-inline DecoderErrorOr<void> Decoder::inverse_discrete_cosine_transform(Span<Intermediate> data)
+ALWAYS_INLINE DecoderErrorOr<void> Decoder::inverse_discrete_cosine_transform(Span<Intermediate> data)
 {
    static_assert(log2_of_block_size >= 2 && log2_of_block_size <= 5, "Block size out of range.");

@ -1790,7 +1790,7 @@ inline DecoderErrorOr<void> Decoder::inverse_asymmetric_discrete_sine_transform(
 }

 template<u8 log2_of_block_size>
-DecoderErrorOr<void> Decoder::inverse_transform_2d(BlockContext const& block_context, Span<Intermediate> dequantized, TransformSet transform_set)
+ALWAYS_INLINE DecoderErrorOr<void> Decoder::inverse_transform_2d(BlockContext const& block_context, Span<Intermediate> dequantized, TransformSet transform_set)
 {
    static_assert(log2_of_block_size >= 2 && log2_of_block_size <= 5);