LibVideo: Fast-path converting colors by only matrix coefficients

We don't need to run through the whole floating-point color converter for videos that use sRGB transfer characteristics and BT.709 color primaries. This commit adds a new templated inlining function to ColorConverter to do a very fast fixed-point YCbCr to RGB conversion. With the fast path, frame conversion times go from ~7.8ms down to ~3.7ms. The fast path can benefit a lot more from extra SIMD vector width, as well.
2025-07-27 15:27:35 +00:00 · 2023-04-20 05:11:57 -05:00 · 2023-04-20 05:11:57 -05:00 · b10da81c7c
commit b10da81c7c
parent d6b867ba89
3 changed files with 152 additions and 37 deletions
--- a/Userland/Libraries/LibVideo/Color/ColorConverter.cpp
+++ b/Userland/Libraries/LibVideo/Color/ColorConverter.cpp
@ -14,14 +14,12 @@

 namespace Video {

-DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndependentCodePoints cicp)
+DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndependentCodePoints input_cicp, CodingIndependentCodePoints output_cicp)
 {
    // We'll need to apply tonemapping for linear HDR values.
    bool should_tonemap = false;
-    switch (cicp.transfer_characteristics()) {
+    switch (input_cicp.transfer_characteristics()) {
    case TransferCharacteristics::SMPTE2084:
-        should_tonemap = true;
-        break;
    case TransferCharacteristics::HLG:
        should_tonemap = true;
        break;
@ -34,7 +32,7 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
    //    float 0..1 range.
    //    This can be done with a 3x3 scaling matrix.
    size_t maximum_value = (1u << bit_depth) - 1;
-    float scale = 1.0 / maximum_value;
+    float scale = 1.0f / maximum_value;
    FloatMatrix4x4 integer_scaling_matrix = {
        scale, 0.0f, 0.0f, 0.0f, // y
        0.0f, scale, 0.0f, 0.0f, // u
@ -50,7 +48,7 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
    float y_max;
    float uv_min;
    float uv_max;
-    if (cicp.video_full_range_flag() == VideoFullRangeFlag::Studio) {
+    if (input_cicp.video_full_range_flag() == VideoFullRangeFlag::Studio) {
        y_min = 16.0f / 255.0f;
        y_max = 235.0f / 255.0f;
        uv_min = y_min;
@ -77,7 +75,7 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
    FloatMatrix4x4 color_conversion_matrix;

    // https://kdashg.github.io/misc/colors/from-coeffs.html
-    switch (cicp.matrix_coefficients()) {
+    switch (input_cicp.matrix_coefficients()) {
    case MatrixCoefficients::BT709:
        color_conversion_matrix = {
            1.0f, 0.0f, 0.78740f, 0.0f,       // y
@ -104,7 +102,7 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
        };
        break;
    default:
-        return DecoderError::format(DecoderErrorCategory::Invalid, "Matrix coefficients {} not supported", matrix_coefficients_to_string(cicp.matrix_coefficients()));
+        return DecoderError::format(DecoderErrorCategory::Invalid, "Matrix coefficients {} not supported", matrix_coefficients_to_string(input_cicp.matrix_coefficients()));
    }

    // 4. Apply the inverse transfer function to convert RGB values to the
@ -113,23 +111,21 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
    //    up the conversion.
    auto to_linear_lookup_table = InterpolatedLookupTable<to_linear_size>::create(
        [&](float value) {
-            return TransferCharacteristicsConversion::to_linear_luminance(value, cicp.transfer_characteristics());
+            return TransferCharacteristicsConversion::to_linear_luminance(value, input_cicp.transfer_characteristics());
        });

    // 5. Convert the RGB color to CIE XYZ coordinates using the input color
    //    primaries and then to the output color primaries.
    //    This is done with two 3x3 matrices that can be combined into one
    //    matrix multiplication.
-    ColorPrimaries output_cp = ColorPrimaries::BT709;
-    FloatMatrix3x3 color_primaries_matrix = TRY(get_conversion_matrix(cicp.color_primaries(), output_cp));
+    FloatMatrix3x3 color_primaries_matrix = TRY(get_conversion_matrix(input_cicp.color_primaries(), output_cicp.color_primaries()));

    // 6. Apply the output transfer function. For HDR color spaces, this
    //    should apply tonemapping as well.
    //    Use a lookup table as with step 3.
-    TransferCharacteristics output_tc = TransferCharacteristics::SRGB;
    auto to_non_linear_lookup_table = InterpolatedLookupTable<to_non_linear_size>::create(
        [&](float value) {
-            return TransferCharacteristicsConversion::to_non_linear_luminance(value, output_tc);
+            return TransferCharacteristicsConversion::to_non_linear_luminance(value, output_cicp.transfer_characteristics());
        });

    // Expand color primaries matrix with identity elements.
@ -152,10 +148,10 @@ DecoderErrorOr<ColorConverter> ColorConverter::create(u8 bit_depth, CodingIndepe
        1.0f, // w
    };

-    bool should_skip_color_remapping = output_cp == cicp.color_primaries() && output_tc == cicp.transfer_characteristics();
+    bool should_skip_color_remapping = output_cicp.color_primaries() == input_cicp.color_primaries() && output_cicp.transfer_characteristics() == input_cicp.transfer_characteristics();
    FloatMatrix4x4 input_conversion_matrix = color_conversion_matrix * range_scaling_matrix * integer_scaling_matrix;

-    return ColorConverter(bit_depth, cicp, should_skip_color_remapping, should_tonemap, input_conversion_matrix, to_linear_lookup_table, color_primaries_matrix_4x4, to_non_linear_lookup_table);
+    return ColorConverter(bit_depth, input_cicp, should_skip_color_remapping, should_tonemap, input_conversion_matrix, to_linear_lookup_table, color_primaries_matrix_4x4, to_non_linear_lookup_table);
 }

 }
--- a/Userland/Libraries/LibVideo/Color/ColorConverter.h
+++ b/Userland/Libraries/LibVideo/Color/ColorConverter.h
@ -104,10 +104,10 @@ private:
    }

 public:
-    static DecoderErrorOr<ColorConverter> create(u8 bit_depth, CodingIndependentCodePoints cicp);
+    static DecoderErrorOr<ColorConverter> create(u8 bit_depth, CodingIndependentCodePoints input_cicp, CodingIndependentCodePoints output_cicp);

    // Referencing https://en.wikipedia.org/wiki/YCbCr
-    ALWAYS_INLINE Gfx::Color convert_yuv_to_full_range_rgb(u16 y, u16 u, u16 v) const
+    ALWAYS_INLINE Gfx::Color convert_yuv(u16 y, u16 u, u16 v) const
    {
        auto max_zero = [](FloatVector4 vector) {
            return FloatVector4(max(0.0f, vector.x()), max(0.0f, vector.y()), max(0.0f, vector.z()), vector.w());
@ -150,6 +150,99 @@ public:
        return Gfx::Color(r, g, b);
    }

+    // Fast conversion of 8-bit YUV to full-range RGB.
+    template<MatrixCoefficients MC, VideoFullRangeFlag FR, Unsigned T>
+    static ALWAYS_INLINE Gfx::Color convert_simple_yuv_to_rgb(T y_in, T u_in, T v_in)
+    {
+        static constexpr i32 bit_depth = 8;
+        static constexpr i32 maximum_value = (1 << bit_depth) - 1;
+        static constexpr i32 one = 1 << 14;
+        static constexpr auto fraction = [](i32 numerator, i32 denominator) constexpr {
+            auto temp = static_cast<i64>(numerator) * one;
+            return static_cast<i32>(temp / denominator);
+        };
+        static constexpr auto coef = [](i32 hundred_thousandths) constexpr {
+            return fraction(hundred_thousandths, 100'000);
+        };
+        static constexpr auto multiply = [](i32 a, i32 b) constexpr {
+            return (a * b) / one;
+        };
+
+        struct RangeFactors {
+            i32 y_offset, y_scale;
+            i32 uv_offset, uv_scale;
+        };
+
+        constexpr auto range_factors = [] {
+            RangeFactors range_factors;
+
+            i32 min = 0;
+            i32 y_max = 255;
+            i32 uv_max = 255;
+
+            if constexpr (FR == VideoFullRangeFlag::Studio) {
+                min = 16;
+                y_max = 235;
+                uv_max = 240;
+            }
+
+            range_factors.y_offset = -min * maximum_value / 255;
+            range_factors.y_scale = fraction(255, y_max - min);
+            range_factors.uv_offset = -((min + uv_max) * maximum_value) / (255 * 2);
+            range_factors.uv_scale = fraction(255, uv_max - min) * 2;
+
+            range_factors.y_scale = multiply(range_factors.y_scale, fraction(255, maximum_value));
+            range_factors.uv_scale = multiply(range_factors.uv_scale, fraction(255, maximum_value));
+
+            return range_factors;
+        }();
+
+        i32 y = y_in + range_factors.y_offset;
+        i32 u = u_in + range_factors.uv_offset;
+        i32 v = v_in + range_factors.uv_offset;
+
+        i32 red;
+        i32 green;
+        i32 blue;
+
+        constexpr i32 y_scale = range_factors.y_scale;
+        constexpr i32 uv_scale = range_factors.uv_scale;
+
+        // The equations below will have the following effects:
+        //  - Scale the Y, U and V values into the range 0...maximum_value*one for these fixed-point operations.
+        //  - Scale the values by the color range defined by VideoFullRangeFlag.
+        //  - Scale the U and V values by 2 to put them in the actual YCbCr coordinate space.
+        //  - Multiply by the YCbCr coefficients to convert to RGB.
+        if constexpr (MC == MatrixCoefficients::BT709) {
+            red = y * y_scale + v * multiply(coef(78740), uv_scale);
+            green = y * y_scale + u * multiply(coef(-9366), uv_scale) + v * multiply(coef(-23406), uv_scale);
+            blue = y * y_scale + u * multiply(coef(92780), uv_scale);
+        }
+
+        if constexpr (MC == MatrixCoefficients::BT601) {
+            red = y * y_scale + v * multiply(coef(70100), uv_scale);
+            green = y * y_scale + u * multiply(coef(-17207), uv_scale) + v * multiply(coef(-35707), uv_scale);
+            blue = y * y_scale + u * multiply(coef(88600), uv_scale);
+        }
+
+        if constexpr (MC == MatrixCoefficients::BT2020ConstantLuminance) {
+            red = y * y_scale + v * multiply(coef(73730), uv_scale);
+            green = y * y_scale + u * multiply(coef(-8228), uv_scale) + v * multiply(coef(-28568), uv_scale);
+            blue = y * y_scale + u * multiply(coef(94070), uv_scale);
+        }
+
+        red = clamp(red, 0, maximum_value * one);
+        green = clamp(green, 0, maximum_value * one);
+        blue = clamp(blue, 0, maximum_value * one);
+
+        // This compiles down to a bit shift if maximum_value == 255
+        red /= fraction(maximum_value, 255);
+        green /= fraction(maximum_value, 255);
+        blue /= fraction(maximum_value, 255);
+
+        return Gfx::Color(u8(red), u8(green), u8(blue));
+    }
+
 private:
    static constexpr size_t to_linear_size = 64;
    static constexpr size_t to_non_linear_size = 64;