mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 08:17:45 +00:00
LibVideo: Add support for VP9 superframes
This allows the second shown frame of the VP9 test video to be decoded, as the second chunk uses a superframe to encode a reference frame and a second to inter predict between the keyframe and the reference frame.
This commit is contained in:
parent
b0187dfc27
commit
be0760871e
5 changed files with 119 additions and 17 deletions
|
@ -59,7 +59,7 @@ ErrorOr<int> serenity_main(Main::Arguments arguments)
|
||||||
if (!optional_sample.has_value())
|
if (!optional_sample.has_value())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
auto result = vp9_decoder.decode_frame(optional_sample.release_value());
|
auto result = vp9_decoder.decode(optional_sample.release_value());
|
||||||
|
|
||||||
if (result.is_error()) {
|
if (result.is_error()) {
|
||||||
outln("Error decoding frame {}: {}", frame_number, result.error().string_literal());
|
outln("Error decoding frame {}: {}", frame_number, result.error().string_literal());
|
||||||
|
|
|
@ -18,23 +18,74 @@ Decoder::Decoder()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
DecoderErrorOr<void> Decoder::decode_frame(ByteBuffer const& frame_data)
|
DecoderErrorOr<void> Decoder::decode(Span<const u8> chunk_data)
|
||||||
{
|
{
|
||||||
TRY(m_parser->parse_frame(frame_data));
|
auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data);
|
||||||
// TODO:
|
|
||||||
// - #2
|
if (superframe_sizes.is_empty()) {
|
||||||
// - #3
|
return decode_frame(chunk_data);
|
||||||
// - #4
|
}
|
||||||
TRY(update_reference_frames());
|
|
||||||
|
size_t offset = 0;
|
||||||
|
|
||||||
|
for (auto superframe_size : superframe_sizes) {
|
||||||
|
auto frame_data = chunk_data.slice(offset, superframe_size);
|
||||||
|
TRY(decode_frame(frame_data));
|
||||||
|
offset += superframe_size;
|
||||||
|
}
|
||||||
|
|
||||||
return {};
|
return {};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
DecoderErrorOr<void> Decoder::decode(ByteBuffer const& chunk_data)
|
||||||
|
{
|
||||||
|
return decode(chunk_data.span());
|
||||||
|
}
|
||||||
|
|
||||||
void Decoder::dump_frame_info()
|
void Decoder::dump_frame_info()
|
||||||
{
|
{
|
||||||
m_parser->dump_info();
|
m_parser->dump_info();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
|
||||||
|
{
|
||||||
|
return row * stride + column;
|
||||||
|
}
|
||||||
|
|
||||||
|
DecoderErrorOr<void> Decoder::decode_frame(Span<const u8> frame_data)
|
||||||
|
{
|
||||||
|
// 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax
|
||||||
|
// tables include function calls indicating when the block decode processes should be triggered.
|
||||||
|
TRY(m_parser->parse_frame(frame_data));
|
||||||
|
|
||||||
|
// 2. If loop_filter_level is not equal to 0, the loop filter process as specified in section 8.8 is invoked once the
|
||||||
|
// coded frame has been decoded.
|
||||||
|
// FIXME: Implement loop filtering.
|
||||||
|
|
||||||
|
// 3. If all of the following conditions are true, PrevSegmentIds[ row ][ col ] is set equal to
|
||||||
|
// SegmentIds[ row ][ col ] for row = 0..MiRows-1, for col = 0..MiCols-1:
|
||||||
|
// − show_existing_frame is equal to 0,
|
||||||
|
// − segmentation_enabled is equal to 1,
|
||||||
|
// − segmentation_update_map is equal to 1.
|
||||||
|
if (!m_parser->m_show_existing_frame && m_parser->m_segmentation_enabled && m_parser->m_segmentation_update_map) {
|
||||||
|
for (auto row = 0u; row < m_parser->m_mi_rows; row++) {
|
||||||
|
for (auto column = 0u; column < m_parser->m_mi_cols; column++) {
|
||||||
|
auto index = index_from_row_and_column(row, column, m_parser->m_mi_rows);
|
||||||
|
m_parser->m_prev_segment_ids[index] = m_parser->m_segment_ids[index];
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. The output process as specified in section 8.9 is invoked.
|
||||||
|
// FIXME: Create a struct to store an output frame along with all information needed to display
|
||||||
|
// it. This function will need to append the images to a vector to ensure that if a superframe
|
||||||
|
// with multiple output frames is encountered, all of them can be displayed.
|
||||||
|
|
||||||
|
// 5. The reference frame update process as specified in section 8.10 is invoked.
|
||||||
|
TRY(update_reference_frames());
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
inline size_t buffer_size(size_t width, size_t height)
|
inline size_t buffer_size(size_t width, size_t height)
|
||||||
{
|
{
|
||||||
return width * height;
|
return width * height;
|
||||||
|
@ -215,11 +266,6 @@ u8 Decoder::adapt_prob(u8 prob, u8 counts[2])
|
||||||
return merge_prob(prob, counts[0], counts[1], COUNT_SAT, MAX_UPDATE_FACTOR);
|
return merge_prob(prob, counts[0], counts[1], COUNT_SAT, MAX_UPDATE_FACTOR);
|
||||||
}
|
}
|
||||||
|
|
||||||
inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride)
|
|
||||||
{
|
|
||||||
return row * stride + column;
|
|
||||||
}
|
|
||||||
|
|
||||||
DecoderErrorOr<void> Decoder::predict_intra(u8 plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index)
|
DecoderErrorOr<void> Decoder::predict_intra(u8 plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index)
|
||||||
{
|
{
|
||||||
auto& frame_buffer = get_output_buffer(plane);
|
auto& frame_buffer = get_output_buffer(plane);
|
||||||
|
|
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
#include <AK/ByteBuffer.h>
|
#include <AK/ByteBuffer.h>
|
||||||
#include <AK/Error.h>
|
#include <AK/Error.h>
|
||||||
|
#include <AK/Span.h>
|
||||||
#include <LibVideo/DecoderError.h>
|
#include <LibVideo/DecoderError.h>
|
||||||
|
|
||||||
#include "Parser.h"
|
#include "Parser.h"
|
||||||
|
@ -20,7 +21,9 @@ class Decoder {
|
||||||
|
|
||||||
public:
|
public:
|
||||||
Decoder();
|
Decoder();
|
||||||
DecoderErrorOr<void> decode_frame(ByteBuffer const&);
|
/* (8.1) General */
|
||||||
|
DecoderErrorOr<void> decode(Span<const u8>);
|
||||||
|
DecoderErrorOr<void> decode(ByteBuffer const&);
|
||||||
void dump_frame_info();
|
void dump_frame_info();
|
||||||
|
|
||||||
// FIXME: These functions should be replaced by a struct that contains
|
// FIXME: These functions should be replaced by a struct that contains
|
||||||
|
@ -33,6 +36,8 @@ public:
|
||||||
private:
|
private:
|
||||||
typedef i32 Intermediate;
|
typedef i32 Intermediate;
|
||||||
|
|
||||||
|
DecoderErrorOr<void> decode_frame(Span<const u8>);
|
||||||
|
|
||||||
DecoderErrorOr<void> allocate_buffers();
|
DecoderErrorOr<void> allocate_buffers();
|
||||||
Vector<Intermediate>& get_temp_buffer(u8 plane);
|
Vector<Intermediate>& get_temp_buffer(u8 plane);
|
||||||
Vector<u16>& get_output_buffer(u8 plane);
|
Vector<u16>& get_output_buffer(u8 plane);
|
||||||
|
|
|
@ -28,8 +28,55 @@ Parser::~Parser()
|
||||||
{
|
{
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Vector<size_t> Parser::parse_superframe_sizes(Span<const u8> frame_data)
|
||||||
|
{
|
||||||
|
if (frame_data.size() < 1)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
// The decoder determines the presence of a superframe by:
|
||||||
|
// 1. parsing the final byte of the chunk and checking that the superframe_marker equals 0b110,
|
||||||
|
|
||||||
|
// If the checks in steps 1 and 3 both pass, then the chunk is determined to contain a superframe and each
|
||||||
|
// frame in the superframe is passed to the decoding process in turn.
|
||||||
|
// Otherwise, the chunk is determined to not contain a superframe, and the whole chunk is passed to the
|
||||||
|
// decoding process.
|
||||||
|
|
||||||
|
// NOTE: Reading from span data will be quicker than spinning up a BitStream.
|
||||||
|
u8 superframe_byte = frame_data[frame_data.size() - 1];
|
||||||
|
|
||||||
|
// NOTE: We have to read out of the byte from the little end first, hence the padding bits in the masks below.
|
||||||
|
u8 superframe_marker = superframe_byte & 0b1110'0000;
|
||||||
|
if (superframe_marker == 0b1100'0000) {
|
||||||
|
u8 bytes_per_framesize = ((superframe_byte >> 3) & 0b11) + 1;
|
||||||
|
u8 frames_in_superframe = (superframe_byte & 0b111) + 1;
|
||||||
|
// 2. setting the total size of the superframe_index SzIndex equal to 2 + NumFrames * SzBytes,
|
||||||
|
size_t index_size = 2 + bytes_per_framesize * frames_in_superframe;
|
||||||
|
|
||||||
|
if (index_size > frame_data.size())
|
||||||
|
return {};
|
||||||
|
|
||||||
|
auto superframe_header_data = frame_data.data() + frame_data.size() - index_size;
|
||||||
|
|
||||||
|
u8 start_superframe_byte = *(superframe_header_data++);
|
||||||
|
// 3. checking that the first byte of the superframe_index matches the final byte.
|
||||||
|
if (superframe_byte != start_superframe_byte)
|
||||||
|
return {};
|
||||||
|
|
||||||
|
Vector<size_t> result;
|
||||||
|
for (u8 i = 0; i < frames_in_superframe; i++) {
|
||||||
|
size_t frame_size = 0;
|
||||||
|
for (u8 j = 0; j < bytes_per_framesize; j++)
|
||||||
|
frame_size |= (static_cast<size_t>(*(superframe_header_data++)) << (j * 8));
|
||||||
|
result.append(frame_size);
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
return {};
|
||||||
|
}
|
||||||
|
|
||||||
/* (6.1) */
|
/* (6.1) */
|
||||||
DecoderErrorOr<void> Parser::parse_frame(ByteBuffer const& frame_data)
|
DecoderErrorOr<void> Parser::parse_frame(Span<const u8> frame_data)
|
||||||
{
|
{
|
||||||
m_bit_stream = make<BitStream>(frame_data.data(), frame_data.size());
|
m_bit_stream = make<BitStream>(frame_data.data(), frame_data.size());
|
||||||
m_syntax_element_counter = make<SyntaxElementCounter>();
|
m_syntax_element_counter = make<SyntaxElementCounter>();
|
||||||
|
|
|
@ -8,8 +8,8 @@
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
#include <AK/Array.h>
|
#include <AK/Array.h>
|
||||||
#include <AK/ByteBuffer.h>
|
|
||||||
#include <AK/OwnPtr.h>
|
#include <AK/OwnPtr.h>
|
||||||
|
#include <AK/Span.h>
|
||||||
#include <AK/Vector.h>
|
#include <AK/Vector.h>
|
||||||
#include <LibGfx/Forward.h>
|
#include <LibGfx/Forward.h>
|
||||||
#include <LibVideo/DecoderError.h>
|
#include <LibVideo/DecoderError.h>
|
||||||
|
@ -32,10 +32,14 @@ class Parser {
|
||||||
public:
|
public:
|
||||||
explicit Parser(Decoder&);
|
explicit Parser(Decoder&);
|
||||||
~Parser();
|
~Parser();
|
||||||
DecoderErrorOr<void> parse_frame(ByteBuffer const&);
|
DecoderErrorOr<void> parse_frame(Span<const u8>);
|
||||||
void dump_info();
|
void dump_info();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
/* Annex B: Superframes are a method of storing multiple coded frames into a single chunk
|
||||||
|
* See also section 5.26. */
|
||||||
|
Vector<size_t> parse_superframe_sizes(Span<const u8>);
|
||||||
|
|
||||||
DecoderErrorOr<FrameType> read_frame_type();
|
DecoderErrorOr<FrameType> read_frame_type();
|
||||||
DecoderErrorOr<ColorRange> read_color_range();
|
DecoderErrorOr<ColorRange> read_color_range();
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue