From b10da81c7c5f0f5c00b24174f1f6c63f26f8b697 Mon Sep 17 00:00:00 2001 From: Zaggy1024 Date: Thu, 20 Apr 2023 05:11:57 -0500 Subject: [PATCH] LibVideo: Fast-path converting colors by only matrix coefficients We don't need to run through the whole floating-point color converter for videos that use sRGB transfer characteristics and BT.709 color primaries. This commit adds a new templated inlining function to ColorConverter to do a very fast fixed-point YCbCr to RGB conversion. With the fast path, frame conversion times go from ~7.8ms down to ~3.7ms. The fast path can benefit a lot more from extra SIMD vector width, as well. --- .../LibVideo/Color/ColorConverter.cpp | 26 +++-- .../Libraries/LibVideo/Color/ColorConverter.h | 97 ++++++++++++++++++- Userland/Libraries/LibVideo/VideoFrame.cpp | 66 +++++++++---- 3 files changed, 152 insertions(+), 37 deletions(-) diff --git a/Userland/Libraries/LibVideo/Color/ColorConverter.cpp b/Userland/Libraries/LibVideo/Color/ColorConverter.cpp index cdece9e7ea..30a7e09e20 100644 --- a/Userland/Libraries/LibVideo/Color/ColorConverter.cpp +++ b/Userland/Libraries/LibVideo/Color/ColorConverter.cpp @@ -14,14 +14,12 @@ namespace Video { -DecoderErrorOr ColorConverter::create(u8 bit_depth, CodingIndependentCodePoints cicp) +DecoderErrorOr ColorConverter::create(u8 bit_depth, CodingIndependentCodePoints input_cicp, CodingIndependentCodePoints output_cicp) { // We'll need to apply tonemapping for linear HDR values. bool should_tonemap = false; - switch (cicp.transfer_characteristics()) { + switch (input_cicp.transfer_characteristics()) { case TransferCharacteristics::SMPTE2084: - should_tonemap = true; - break; case TransferCharacteristics::HLG: should_tonemap = true; break; @@ -34,7 +32,7 @@ DecoderErrorOr ColorConverter::create(u8 bit_depth, CodingIndepe // float 0..1 range. // This can be done with a 3x3 scaling matrix. size_t maximum_value = (1u << bit_depth) - 1; - float scale = 1.0 / maximum_value; + float scale = 1.0f / maximum_value; FloatMatrix4x4 integer_scaling_matrix = { scale, 0.0f, 0.0f, 0.0f, // y 0.0f, scale, 0.0f, 0.0f, // u @@ -50,7 +48,7 @@ DecoderErrorOr ColorConverter::create(u8 bit_depth, CodingIndepe float y_max; float uv_min; float uv_max; - if (cicp.video_full_range_flag() == VideoFullRangeFlag::Studio) { + if (input_cicp.video_full_range_flag() == VideoFullRangeFlag::Studio) { y_min = 16.0f / 255.0f; y_max = 235.0f / 255.0f; uv_min = y_min; @@ -77,7 +75,7 @@ DecoderErrorOr ColorConverter::create(u8 bit_depth, CodingIndepe FloatMatrix4x4 color_conversion_matrix; // https://kdashg.github.io/misc/colors/from-coeffs.html - switch (cicp.matrix_coefficients()) { + switch (input_cicp.matrix_coefficients()) { case MatrixCoefficients::BT709: color_conversion_matrix = { 1.0f, 0.0f, 0.78740f, 0.0f, // y @@ -104,7 +102,7 @@ DecoderErrorOr ColorConverter::create(u8 bit_depth, CodingIndepe }; break; default: - return DecoderError::format(DecoderErrorCategory::Invalid, "Matrix coefficients {} not supported", matrix_coefficients_to_string(cicp.matrix_coefficients())); + return DecoderError::format(DecoderErrorCategory::Invalid, "Matrix coefficients {} not supported", matrix_coefficients_to_string(input_cicp.matrix_coefficients())); } // 4. Apply the inverse transfer function to convert RGB values to the @@ -113,23 +111,21 @@ DecoderErrorOr ColorConverter::create(u8 bit_depth, CodingIndepe // up the conversion. auto to_linear_lookup_table = InterpolatedLookupTable::create( [&](float value) { - return TransferCharacteristicsConversion::to_linear_luminance(value, cicp.transfer_characteristics()); + return TransferCharacteristicsConversion::to_linear_luminance(value, input_cicp.transfer_characteristics()); }); // 5. Convert the RGB color to CIE XYZ coordinates using the input color // primaries and then to the output color primaries. // This is done with two 3x3 matrices that can be combined into one // matrix multiplication. - ColorPrimaries output_cp = ColorPrimaries::BT709; - FloatMatrix3x3 color_primaries_matrix = TRY(get_conversion_matrix(cicp.color_primaries(), output_cp)); + FloatMatrix3x3 color_primaries_matrix = TRY(get_conversion_matrix(input_cicp.color_primaries(), output_cicp.color_primaries())); // 6. Apply the output transfer function. For HDR color spaces, this // should apply tonemapping as well. // Use a lookup table as with step 3. - TransferCharacteristics output_tc = TransferCharacteristics::SRGB; auto to_non_linear_lookup_table = InterpolatedLookupTable::create( [&](float value) { - return TransferCharacteristicsConversion::to_non_linear_luminance(value, output_tc); + return TransferCharacteristicsConversion::to_non_linear_luminance(value, output_cicp.transfer_characteristics()); }); // Expand color primaries matrix with identity elements. @@ -152,10 +148,10 @@ DecoderErrorOr ColorConverter::create(u8 bit_depth, CodingIndepe 1.0f, // w }; - bool should_skip_color_remapping = output_cp == cicp.color_primaries() && output_tc == cicp.transfer_characteristics(); + bool should_skip_color_remapping = output_cicp.color_primaries() == input_cicp.color_primaries() && output_cicp.transfer_characteristics() == input_cicp.transfer_characteristics(); FloatMatrix4x4 input_conversion_matrix = color_conversion_matrix * range_scaling_matrix * integer_scaling_matrix; - return ColorConverter(bit_depth, cicp, should_skip_color_remapping, should_tonemap, input_conversion_matrix, to_linear_lookup_table, color_primaries_matrix_4x4, to_non_linear_lookup_table); + return ColorConverter(bit_depth, input_cicp, should_skip_color_remapping, should_tonemap, input_conversion_matrix, to_linear_lookup_table, color_primaries_matrix_4x4, to_non_linear_lookup_table); } } diff --git a/Userland/Libraries/LibVideo/Color/ColorConverter.h b/Userland/Libraries/LibVideo/Color/ColorConverter.h index b464ba69b2..fbf5d8db1d 100644 --- a/Userland/Libraries/LibVideo/Color/ColorConverter.h +++ b/Userland/Libraries/LibVideo/Color/ColorConverter.h @@ -104,10 +104,10 @@ private: } public: - static DecoderErrorOr create(u8 bit_depth, CodingIndependentCodePoints cicp); + static DecoderErrorOr create(u8 bit_depth, CodingIndependentCodePoints input_cicp, CodingIndependentCodePoints output_cicp); // Referencing https://en.wikipedia.org/wiki/YCbCr - ALWAYS_INLINE Gfx::Color convert_yuv_to_full_range_rgb(u16 y, u16 u, u16 v) const + ALWAYS_INLINE Gfx::Color convert_yuv(u16 y, u16 u, u16 v) const { auto max_zero = [](FloatVector4 vector) { return FloatVector4(max(0.0f, vector.x()), max(0.0f, vector.y()), max(0.0f, vector.z()), vector.w()); @@ -150,6 +150,99 @@ public: return Gfx::Color(r, g, b); } + // Fast conversion of 8-bit YUV to full-range RGB. + template + static ALWAYS_INLINE Gfx::Color convert_simple_yuv_to_rgb(T y_in, T u_in, T v_in) + { + static constexpr i32 bit_depth = 8; + static constexpr i32 maximum_value = (1 << bit_depth) - 1; + static constexpr i32 one = 1 << 14; + static constexpr auto fraction = [](i32 numerator, i32 denominator) constexpr { + auto temp = static_cast(numerator) * one; + return static_cast(temp / denominator); + }; + static constexpr auto coef = [](i32 hundred_thousandths) constexpr { + return fraction(hundred_thousandths, 100'000); + }; + static constexpr auto multiply = [](i32 a, i32 b) constexpr { + return (a * b) / one; + }; + + struct RangeFactors { + i32 y_offset, y_scale; + i32 uv_offset, uv_scale; + }; + + constexpr auto range_factors = [] { + RangeFactors range_factors; + + i32 min = 0; + i32 y_max = 255; + i32 uv_max = 255; + + if constexpr (FR == VideoFullRangeFlag::Studio) { + min = 16; + y_max = 235; + uv_max = 240; + } + + range_factors.y_offset = -min * maximum_value / 255; + range_factors.y_scale = fraction(255, y_max - min); + range_factors.uv_offset = -((min + uv_max) * maximum_value) / (255 * 2); + range_factors.uv_scale = fraction(255, uv_max - min) * 2; + + range_factors.y_scale = multiply(range_factors.y_scale, fraction(255, maximum_value)); + range_factors.uv_scale = multiply(range_factors.uv_scale, fraction(255, maximum_value)); + + return range_factors; + }(); + + i32 y = y_in + range_factors.y_offset; + i32 u = u_in + range_factors.uv_offset; + i32 v = v_in + range_factors.uv_offset; + + i32 red; + i32 green; + i32 blue; + + constexpr i32 y_scale = range_factors.y_scale; + constexpr i32 uv_scale = range_factors.uv_scale; + + // The equations below will have the following effects: + // - Scale the Y, U and V values into the range 0...maximum_value*one for these fixed-point operations. + // - Scale the values by the color range defined by VideoFullRangeFlag. + // - Scale the U and V values by 2 to put them in the actual YCbCr coordinate space. + // - Multiply by the YCbCr coefficients to convert to RGB. + if constexpr (MC == MatrixCoefficients::BT709) { + red = y * y_scale + v * multiply(coef(78740), uv_scale); + green = y * y_scale + u * multiply(coef(-9366), uv_scale) + v * multiply(coef(-23406), uv_scale); + blue = y * y_scale + u * multiply(coef(92780), uv_scale); + } + + if constexpr (MC == MatrixCoefficients::BT601) { + red = y * y_scale + v * multiply(coef(70100), uv_scale); + green = y * y_scale + u * multiply(coef(-17207), uv_scale) + v * multiply(coef(-35707), uv_scale); + blue = y * y_scale + u * multiply(coef(88600), uv_scale); + } + + if constexpr (MC == MatrixCoefficients::BT2020ConstantLuminance) { + red = y * y_scale + v * multiply(coef(73730), uv_scale); + green = y * y_scale + u * multiply(coef(-8228), uv_scale) + v * multiply(coef(-28568), uv_scale); + blue = y * y_scale + u * multiply(coef(94070), uv_scale); + } + + red = clamp(red, 0, maximum_value * one); + green = clamp(green, 0, maximum_value * one); + blue = clamp(blue, 0, maximum_value * one); + + // This compiles down to a bit shift if maximum_value == 255 + red /= fraction(maximum_value, 255); + green /= fraction(maximum_value, 255); + blue /= fraction(maximum_value, 255); + + return Gfx::Color(u8(red), u8(green), u8(blue)); + } + private: static constexpr size_t to_linear_size = 64; static constexpr size_t to_non_linear_size = 64; diff --git a/Userland/Libraries/LibVideo/VideoFrame.cpp b/Userland/Libraries/LibVideo/VideoFrame.cpp index 861e0c8445..41bff631c5 100644 --- a/Userland/Libraries/LibVideo/VideoFrame.cpp +++ b/Userland/Libraries/LibVideo/VideoFrame.cpp @@ -57,8 +57,8 @@ ALWAYS_INLINE void interpolate_row(u32 const row, u32 const width, u16 const* pl } } -template -ALWAYS_INLINE DecoderErrorOr convert_to_bitmap(ColorConverter const& converter, u32 const width, u32 const height, FixedArray const& plane_y, FixedArray const& plane_u, FixedArray const& plane_v, Gfx::Bitmap& bitmap) +template +ALWAYS_INLINE DecoderErrorOr convert_to_bitmap_subsampled(Convert convert, u32 const width, u32 const height, FixedArray const& plane_y, FixedArray const& plane_u, FixedArray const& plane_v, Gfx::Bitmap& bitmap) { VERIFY(bitmap.width() >= 0 && static_cast(bitmap.width()) == width); VERIFY(bitmap.height() >= 0 && static_cast(bitmap.height()) == height); @@ -99,13 +99,13 @@ ALWAYS_INLINE DecoderErrorOr convert_to_bitmap(ColorConverter const& conve auto* scan_line_a = bitmap.scanline(static_cast(row)); for (size_t column = 0; column < width; column++) { - scan_line_a[column] = converter.convert_yuv_to_full_range_rgb(y_row_a[column], u_row_a[column], v_row_a[column]).value(); + scan_line_a[column] = convert(y_row_a[column], u_row_a[column], v_row_a[column]).value(); } if constexpr (subsampling_vertical != 0) { auto const* y_row_b = &plane_y[static_cast(row + 1) * width]; auto* scan_line_b = bitmap.scanline(static_cast(row + 1)); for (size_t column = 0; column < width; column++) { - scan_line_b[column] = converter.convert_yuv_to_full_range_rgb(y_row_b[column], u_row_b[column], v_row_b[column]).value(); + scan_line_b[column] = convert(y_row_b[column], u_row_b[column], v_row_b[column]).value(); } } @@ -119,7 +119,7 @@ ALWAYS_INLINE DecoderErrorOr convert_to_bitmap(ColorConverter const& conve auto const* y_row = &plane_y[static_cast(height - 1) * width]; auto* scan_line = bitmap.scanline(static_cast(height - 1)); for (size_t column = 0; column < width; column++) { - scan_line[column] = converter.convert_yuv_to_full_range_rgb(y_row[column], u_row_a[column], v_row_a[column]).value(); + scan_line[column] = convert(y_row[column], u_row_a[column], v_row_a[column]).value(); } } } @@ -127,23 +127,49 @@ ALWAYS_INLINE DecoderErrorOr convert_to_bitmap(ColorConverter const& conve return {}; } +template +static ALWAYS_INLINE DecoderErrorOr convert_to_bitmap_selecting_converter(CodingIndependentCodePoints cicp, u8 bit_depth, u32 const width, u32 const height, FixedArray const& plane_y, FixedArray const& plane_u, FixedArray const& plane_v, Gfx::Bitmap& bitmap) +{ + constexpr auto output_cicp = CodingIndependentCodePoints(ColorPrimaries::BT709, TransferCharacteristics::SRGB, MatrixCoefficients::BT709, VideoFullRangeFlag::Full); + + if (bit_depth == 8 && cicp.transfer_characteristics() == output_cicp.transfer_characteristics() && cicp.color_primaries() == output_cicp.color_primaries() && cicp.video_full_range_flag() == VideoFullRangeFlag::Studio) { + switch (cicp.matrix_coefficients()) { + case MatrixCoefficients::BT709: + return convert_to_bitmap_subsampled([](u16 y, u16 u, u16 v) { return ColorConverter::convert_simple_yuv_to_rgb(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap); + case MatrixCoefficients::BT601: + return convert_to_bitmap_subsampled([](u16 y, u16 u, u16 v) { return ColorConverter::convert_simple_yuv_to_rgb(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap); + case MatrixCoefficients::BT2020ConstantLuminance: + case MatrixCoefficients::BT2020NonConstantLuminance: + return convert_to_bitmap_subsampled([](u16 y, u16 u, u16 v) { return ColorConverter::convert_simple_yuv_to_rgb(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap); + default: + VERIFY_NOT_REACHED(); + } + } + + auto converter = TRY(ColorConverter::create(bit_depth, cicp, output_cicp)); + return convert_to_bitmap_subsampled([&](u16 y, u16 u, u16 v) { return converter.convert_yuv(y, u, v); }, width, height, plane_y, plane_u, plane_v, bitmap); +} + +static DecoderErrorOr convert_to_bitmap_selecting_subsampling(bool subsampling_horizontal, bool subsampling_vertical, CodingIndependentCodePoints cicp, u8 bit_depth, u32 const width, u32 const height, FixedArray const& plane_y, FixedArray const& plane_u, FixedArray const& plane_v, Gfx::Bitmap& bitmap) +{ + if (subsampling_horizontal && subsampling_vertical) { + return convert_to_bitmap_selecting_converter(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap); + } + + if (subsampling_horizontal && !subsampling_vertical) { + return convert_to_bitmap_selecting_converter(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap); + } + + if (!subsampling_horizontal && subsampling_vertical) { + return convert_to_bitmap_selecting_converter(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap); + } + + return convert_to_bitmap_selecting_converter(cicp, bit_depth, width, height, plane_y, plane_u, plane_v, bitmap); +} + DecoderErrorOr SubsampledYUVFrame::output_to_bitmap(Gfx::Bitmap& bitmap) { - auto converter = TRY(ColorConverter::create(bit_depth(), cicp())); - - if (m_subsampling_horizontal && m_subsampling_vertical) { - return convert_to_bitmap(converter, width(), height(), m_plane_y, m_plane_u, m_plane_v, bitmap); - } - - if (m_subsampling_horizontal && !m_subsampling_vertical) { - return convert_to_bitmap(converter, width(), height(), m_plane_y, m_plane_u, m_plane_v, bitmap); - } - - if (!m_subsampling_horizontal && m_subsampling_vertical) { - return convert_to_bitmap(converter, width(), height(), m_plane_y, m_plane_u, m_plane_v, bitmap); - } - - return convert_to_bitmap(converter, width(), height(), m_plane_y, m_plane_u, m_plane_v, bitmap); + return convert_to_bitmap_selecting_subsampling(m_subsampling_horizontal, m_subsampling_vertical, cicp(), bit_depth(), width(), height(), m_plane_y, m_plane_u, m_plane_v, bitmap); } }