LibVideo: Add support for VP9 superframes

This allows the second shown frame of the VP9 test video to be decoded, as the second chunk uses a superframe to encode a reference frame and a second to inter predict between the keyframe and the reference frame.
2025-07-26 08:17:45 +00:00 · 2022-09-22 21:49:10 -05:00 · 2022-09-22 21:49:10 -05:00 · be0760871e
commit be0760871e
parent b0187dfc27
5 changed files with 119 additions and 17 deletions
--- a/Userland/Applications/VideoPlayer/main.cpp
+++ b/Userland/Applications/VideoPlayer/main.cpp
@ -59,7 +59,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
        if (!optional_sample.has_value())
            return;
-        auto result = vp9_decoder.decode_frame(optional_sample.release_value());
+        auto result = vp9_decoder.decode(optional_sample.release_value());
        if (result.is_error()) {
            outln("Error decoding frame {}: {}", frame_number, result.error().string_literal());
--- a/Userland/Libraries/LibVideo/VP9/Decoder.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Decoder.cpp
@ -18,23 +18,74 @@ Decoder::Decoder()
 {
 }
-DecoderErrorOr<void> Decoder::decode_frame(ByteBuffer const& frame_data)
+DecoderErrorOr<void> Decoder::decode(Span<const u8> chunk_data)
 {
-    TRY(m_parser->parse_frame(frame_data));
+    auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data);
-    // TODO:
+
-    //  - #2
+    if (superframe_sizes.is_empty()) {
-    //  - #3
+        return decode_frame(chunk_data);
-    //  - #4
+    }
-    TRY(update_reference_frames());
+
    size_t offset = 0;
    for (auto superframe_size : superframe_sizes) {
        auto frame_data = chunk_data.slice(offset, superframe_size);
        TRY(decode_frame(frame_data));
        offset += superframe_size;
    }
    return {};
 }
 DecoderErrorOr<void> Decoder::decode(ByteBuffer const& chunk_data)
 {
    return decode(chunk_data.span());
 }
 void Decoder::dump_frame_info()
 {
    m_parser->dump_info();
 }
 inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
 {
    return row * stride + column;
 }
 DecoderErrorOr<void> Decoder::decode_frame(Span<const u8> frame_data)
 {
    // 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax
    // tables include function calls indicating when the block decode processes should be triggered.
    TRY(m_parser->parse_frame(frame_data));
    // 2. If loop_filter_level is not equal to 0, the loop filter process as specified in section 8.8 is invoked once the
    // coded frame has been decoded.
    // FIXME: Implement loop filtering.
    // 3. If all of the following conditions are true, PrevSegmentIds[ row ][ col ] is set equal to
    // SegmentIds[ row ][ col ] for row = 0..MiRows-1, for col = 0..MiCols-1:
    // − show_existing_frame is equal to 0,
    // − segmentation_enabled is equal to 1,
    // − segmentation_update_map is equal to 1.
    if (!m_parser->m_show_existing_frame && m_parser->m_segmentation_enabled && m_parser->m_segmentation_update_map) {
        for (auto row = 0u; row < m_parser->m_mi_rows; row++) {
            for (auto column = 0u; column < m_parser->m_mi_cols; column++) {
                auto index = index_from_row_and_column(row, column, m_parser->m_mi_rows);
                m_parser->m_prev_segment_ids[index] = m_parser->m_segment_ids[index];
            }
        }
    }
    // 4. The output process as specified in section 8.9 is invoked.
    // FIXME: Create a struct to store an output frame along with all information needed to display
    //        it. This function will need to append the images to a vector to ensure that if a superframe
    //        with multiple output frames is encountered, all of them can be displayed.
    // 5. The reference frame update process as specified in section 8.10 is invoked.
    TRY(update_reference_frames());
    return {};
 }
 inline size_t buffer_size(size_t width, size_t height)
 {
    return width * height;
@ -215,11 +266,6 @@ u8 Decoder::adapt_prob(u8 prob, u8 counts[2])
    return merge_prob(prob, counts[0], counts[1], COUNT_SAT, MAX_UPDATE_FACTOR);
 }
 inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
 {
    return row * stride + column;
 }
 DecoderErrorOr<void> Decoder::predict_intra(u8 plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index)
 {
    auto& frame_buffer = get_output_buffer(plane);
--- a/Userland/Libraries/LibVideo/VP9/Decoder.h
+++ b/Userland/Libraries/LibVideo/VP9/Decoder.h
@ -9,6 +9,7 @@
 #include <AK/ByteBuffer.h>
 #include <AK/Error.h>
 #include <AK/Span.h>
 #include <LibVideo/DecoderError.h>
 #include "Parser.h"
@ -20,7 +21,9 @@ class Decoder {
 public:
    Decoder();
-    DecoderErrorOr<void> decode_frame(ByteBuffer const&);
+    /* (8.1) General */
    DecoderErrorOr<void> decode(Span<const u8>);
    DecoderErrorOr<void> decode(ByteBuffer const&);
    void dump_frame_info();
    // FIXME: These functions should be replaced by a struct that contains
@ -33,6 +36,8 @@ public:
 private:
    typedef i32 Intermediate;
    DecoderErrorOr<void> decode_frame(Span<const u8>);
    DecoderErrorOr<void> allocate_buffers();
    Vector<Intermediate>& get_temp_buffer(u8 plane);
    Vector<u16>& get_output_buffer(u8 plane);
--- a/Userland/Libraries/LibVideo/VP9/Parser.cpp
+++ b/Userland/Libraries/LibVideo/VP9/Parser.cpp
@ -28,8 +28,55 @@ Parser::~Parser()
 {
 }
 Vector<size_t> Parser::parse_superframe_sizes(Span<const u8> frame_data)
 {
    if (frame_data.size() < 1)
        return {};
    // The decoder determines the presence of a superframe by:
    // 1. parsing the final byte of the chunk and checking that the superframe_marker equals 0b110,
    // If the checks in steps 1 and 3 both pass, then the chunk is determined to contain a superframe and each
    // frame in the superframe is passed to the decoding process in turn.
    // Otherwise, the chunk is determined to not contain a superframe, and the whole chunk is passed to the
    // decoding process.
    // NOTE: Reading from span data will be quicker than spinning up a BitStream.
    u8 superframe_byte = frame_data[frame_data.size() - 1];
    // NOTE: We have to read out of the byte from the little end first, hence the padding bits in the masks below.
    u8 superframe_marker = superframe_byte & 0b1110'0000;
    if (superframe_marker == 0b1100'0000) {
        u8 bytes_per_framesize = ((superframe_byte >> 3) & 0b11) + 1;
        u8 frames_in_superframe = (superframe_byte & 0b111) + 1;
        // 2. setting the total size of the superframe_index SzIndex equal to 2 + NumFrames * SzBytes,
        size_t index_size = 2 + bytes_per_framesize * frames_in_superframe;
        if (index_size > frame_data.size())
            return {};
        auto superframe_header_data = frame_data.data() + frame_data.size() - index_size;
        u8 start_superframe_byte = *(superframe_header_data++);
        // 3. checking that the first byte of the superframe_index matches the final byte.
        if (superframe_byte != start_superframe_byte)
            return {};
        Vector<size_t> result;
        for (u8 i = 0; i < frames_in_superframe; i++) {
            size_t frame_size = 0;
            for (u8 j = 0; j < bytes_per_framesize; j++)
                frame_size |= (static_cast<size_t>(*(superframe_header_data++)) << (j * 8));
            result.append(frame_size);
        }
        return result;
    }
    return {};
 }
 /* (6.1) */
-DecoderErrorOr<void> Parser::parse_frame(ByteBuffer const& frame_data)
+DecoderErrorOr<void> Parser::parse_frame(Span<const u8> frame_data)
 {
    m_bit_stream = make<BitStream>(frame_data.data(), frame_data.size());
    m_syntax_element_counter = make<SyntaxElementCounter>();
--- a/Userland/Libraries/LibVideo/VP9/Parser.h
+++ b/Userland/Libraries/LibVideo/VP9/Parser.h
@ -8,8 +8,8 @@
 #pragma once
 #include <AK/Array.h>
 #include <AK/ByteBuffer.h>
 #include <AK/OwnPtr.h>
 #include <AK/Span.h>
 #include <AK/Vector.h>
 #include <LibGfx/Forward.h>
 #include <LibVideo/DecoderError.h>
@ -32,10 +32,14 @@ class Parser {
 public:
    explicit Parser(Decoder&);
    ~Parser();
-    DecoderErrorOr<void> parse_frame(ByteBuffer const&);
+    DecoderErrorOr<void> parse_frame(Span<const u8>);
    void dump_info();
 private:
    /* Annex B: Superframes are a method of storing multiple coded frames into a single chunk
     * See also section 5.26. */
    Vector<size_t> parse_superframe_sizes(Span<const u8>);
    DecoderErrorOr<FrameType> read_frame_type();
    DecoderErrorOr<ColorRange> read_color_range();