From 5b4c1056f1928b8ab2bbebbbd596d8e214cbce1a Mon Sep 17 00:00:00 2001 From: Zaggy1024 Date: Sun, 16 Apr 2023 08:44:55 -0500 Subject: [PATCH] LibVideo/Color: Always inline `convert_yuv_to_full_range_rgb()` Inlining the color conversion reduces time spent for frame conversions in a 1080p video from ~12ms down to ~9ms. --- .../LibVideo/Color/ColorConverter.cpp | 86 ----------------- .../Libraries/LibVideo/Color/ColorConverter.h | 92 ++++++++++++++++++- 2 files changed, 89 insertions(+), 89 deletions(-) diff --git a/Userland/Libraries/LibVideo/Color/ColorConverter.cpp b/Userland/Libraries/LibVideo/Color/ColorConverter.cpp index ab95075194..cdece9e7ea 100644 --- a/Userland/Libraries/LibVideo/Color/ColorConverter.cpp +++ b/Userland/Libraries/LibVideo/Color/ColorConverter.cpp @@ -14,43 +14,6 @@ namespace Video { -// Tonemapping methods are outlined here: -// https://64.github.io/tonemapping/ - -template -ALWAYS_INLINE constexpr T scalar_to_color_vector(float value) -{ - if constexpr (IsSame>) { - return Gfx::VectorN<4, float>(value, value, value, 1.0f); - } else if constexpr (IsSame>) { - return Gfx::VectorN<3, float>(value, value, value); - } else { - static_assert(IsFloatingPoint); - return static_cast(value); - } -} - -template -ALWAYS_INLINE constexpr T hable_tonemapping_partial(T value) -{ - constexpr auto a = scalar_to_color_vector(0.15f); - constexpr auto b = scalar_to_color_vector(0.5f); - constexpr auto c = scalar_to_color_vector(0.1f); - constexpr auto d = scalar_to_color_vector(0.2f); - constexpr auto e = scalar_to_color_vector(0.02f); - constexpr auto f = scalar_to_color_vector(0.3f); - return ((value * (a * value + c * b) + d * e) / (value * (a * value + b) + d * f)) - e / f; -} - -template -ALWAYS_INLINE constexpr T hable_tonemapping(T value) -{ - constexpr auto exposure_bias = scalar_to_color_vector(2.0f); - value = hable_tonemapping_partial(value * exposure_bias); - constexpr auto scale = scalar_to_color_vector(1.0f) / scalar_to_color_vector(hable_tonemapping_partial(11.2f)); - return value * scale; -} - DecoderErrorOr ColorConverter::create(u8 bit_depth, CodingIndependentCodePoints cicp) { // We'll need to apply tonemapping for linear HDR values. @@ -195,53 +158,4 @@ DecoderErrorOr ColorConverter::create(u8 bit_depth, CodingIndepe return ColorConverter(bit_depth, cicp, should_skip_color_remapping, should_tonemap, input_conversion_matrix, to_linear_lookup_table, color_primaries_matrix_4x4, to_non_linear_lookup_table); } -ALWAYS_INLINE FloatVector4 max_zero(FloatVector4 vector) -{ - return { max(0.0f, vector.x()), max(0.0f, vector.y()), max(0.0f, vector.z()), vector.w() }; -} - -// Referencing https://en.wikipedia.org/wiki/YCbCr -Gfx::Color ColorConverter::convert_yuv_to_full_range_rgb(u16 y, u16 u, u16 v) const -{ - FloatVector4 color_vector = { static_cast(y), static_cast(u), static_cast(v), 1.0f }; - color_vector = m_input_conversion_matrix * color_vector; - - if (m_should_skip_color_remapping) { - color_vector.clamp(0.0f, 1.0f); - } else { - color_vector = max_zero(color_vector); - color_vector = m_to_linear_lookup.do_lookup(color_vector); - - if (m_cicp.transfer_characteristics() == TransferCharacteristics::HLG) { - static auto hlg_ootf_lookup_table = InterpolatedLookupTable<32, 1000>::create( - [](float value) { - return AK::pow(value, 1.2f - 1.0f); - }); - // See: https://en.wikipedia.org/wiki/Hybrid_log-gamma under a bolded section "HLG reference OOTF" - float luminance = (0.2627f * color_vector.x() + 0.6780f * color_vector.y() + 0.0593f * color_vector.z()) * 1000.0f; - float coefficient = hlg_ootf_lookup_table.do_lookup(luminance); - color_vector = { color_vector.x() * coefficient, color_vector.y() * coefficient, color_vector.z() * coefficient, 1.0f }; - } - - // FIXME: We could implement gamut compression here: - // https://github.com/jedypod/gamut-compress/blob/master/docs/gamut-compress-algorithm.md - // This would allow the color values outside the output gamut to be - // preserved relative to values within the gamut instead of clipping. The - // downside is that this requires a pass over the image before conversion - // back into gamut is done to find the maximum color values to compress. - // The compression would have to be somewhat temporally consistent as well. - color_vector = m_color_space_conversion_matrix * color_vector; - color_vector = max_zero(color_vector); - if (m_should_tonemap) - color_vector = hable_tonemapping(color_vector); - color_vector = m_to_non_linear_lookup.do_lookup(color_vector); - color_vector = max_zero(color_vector); - } - - u8 r = static_cast(color_vector.x() * 255.0f); - u8 g = static_cast(color_vector.y() * 255.0f); - u8 b = static_cast(color_vector.z() * 255.0f); - return Gfx::Color(r, g, b); -} - } diff --git a/Userland/Libraries/LibVideo/Color/ColorConverter.h b/Userland/Libraries/LibVideo/Color/ColorConverter.h index d0c4ce8f6a..b464ba69b2 100644 --- a/Userland/Libraries/LibVideo/Color/ColorConverter.h +++ b/Userland/Libraries/LibVideo/Color/ColorConverter.h @@ -31,7 +31,7 @@ public: return lookup_table; } - float do_lookup(float value) const + ALWAYS_INLINE float do_lookup(float value) const { float float_index = value * (maximum_value / static_cast(Scale)); if (float_index > maximum_value) [[unlikely]] @@ -42,7 +42,7 @@ public: return value; } - FloatVector4 do_lookup(FloatVector4 vector) const + ALWAYS_INLINE FloatVector4 do_lookup(FloatVector4 vector) const { return { do_lookup(vector.x()), @@ -58,12 +58,97 @@ private: Array m_lookup_table; }; +static auto hlg_ootf_lookup_table = InterpolatedLookupTable<32, 1000>::create( + [](float value) { + return AK::pow(value, 1.2f - 1.0f); + }); + class ColorConverter final { +private: + // Tonemapping methods are outlined here: + // https://64.github.io/tonemapping/ + + template + static ALWAYS_INLINE constexpr T scalar_to_color_vector(float value) + { + if constexpr (IsSame>) { + return Gfx::VectorN<4, float>(value, value, value, 1.0f); + } else if constexpr (IsSame>) { + return Gfx::VectorN<3, float>(value, value, value); + } else { + static_assert(IsFloatingPoint); + return static_cast(value); + } + } + + template + static ALWAYS_INLINE constexpr T hable_tonemapping_partial(T value) + { + constexpr auto a = scalar_to_color_vector(0.15f); + constexpr auto b = scalar_to_color_vector(0.5f); + constexpr auto c = scalar_to_color_vector(0.1f); + constexpr auto d = scalar_to_color_vector(0.2f); + constexpr auto e = scalar_to_color_vector(0.02f); + constexpr auto f = scalar_to_color_vector(0.3f); + return ((value * (a * value + c * b) + d * e) / (value * (a * value + b) + d * f)) - e / f; + } + + template + static ALWAYS_INLINE constexpr T hable_tonemapping(T value) + { + constexpr auto exposure_bias = scalar_to_color_vector(2.0f); + value = hable_tonemapping_partial(value * exposure_bias); + constexpr auto scale = scalar_to_color_vector(1.0f) / scalar_to_color_vector(hable_tonemapping_partial(11.2f)); + return value * scale; + } + public: static DecoderErrorOr create(u8 bit_depth, CodingIndependentCodePoints cicp); - Gfx::Color convert_yuv_to_full_range_rgb(u16 y, u16 u, u16 v) const; + // Referencing https://en.wikipedia.org/wiki/YCbCr + ALWAYS_INLINE Gfx::Color convert_yuv_to_full_range_rgb(u16 y, u16 u, u16 v) const + { + auto max_zero = [](FloatVector4 vector) { + return FloatVector4(max(0.0f, vector.x()), max(0.0f, vector.y()), max(0.0f, vector.z()), vector.w()); + }; + + FloatVector4 color_vector = { static_cast(y), static_cast(u), static_cast(v), 1.0f }; + color_vector = m_input_conversion_matrix * color_vector; + + if (m_should_skip_color_remapping) { + color_vector.clamp(0.0f, 1.0f); + } else { + color_vector = max_zero(color_vector); + color_vector = m_to_linear_lookup.do_lookup(color_vector); + + if (m_cicp.transfer_characteristics() == TransferCharacteristics::HLG) { + // See: https://en.wikipedia.org/wiki/Hybrid_log-gamma under a bolded section "HLG reference OOTF" + float luminance = (0.2627f * color_vector.x() + 0.6780f * color_vector.y() + 0.0593f * color_vector.z()) * 1000.0f; + float coefficient = hlg_ootf_lookup_table.do_lookup(luminance); + color_vector = { color_vector.x() * coefficient, color_vector.y() * coefficient, color_vector.z() * coefficient, 1.0f }; + } + + // FIXME: We could implement gamut compression here: + // https://github.com/jedypod/gamut-compress/blob/master/docs/gamut-compress-algorithm.md + // This would allow the color values outside the output gamut to be + // preserved relative to values within the gamut instead of clipping. The + // downside is that this requires a pass over the image before conversion + // back into gamut is done to find the maximum color values to compress. + // The compression would have to be somewhat temporally consistent as well. + color_vector = m_color_space_conversion_matrix * color_vector; + color_vector = max_zero(color_vector); + if (m_should_tonemap) + color_vector = hable_tonemapping(color_vector); + color_vector = m_to_non_linear_lookup.do_lookup(color_vector); + color_vector = max_zero(color_vector); + } + + u8 r = static_cast(color_vector.x() * 255.0f); + u8 g = static_cast(color_vector.y() * 255.0f); + u8 b = static_cast(color_vector.z() * 255.0f); + return Gfx::Color(r, g, b); + } private: static constexpr size_t to_linear_size = 64; @@ -80,6 +165,7 @@ private: , m_to_non_linear_lookup(move(to_non_linear_lookup)) { } + u8 m_bit_depth; CodingIndependentCodePoints m_cicp; bool m_should_skip_color_remapping;