diff --git a/Userland/Applications/VideoPlayer/main.cpp b/Userland/Applications/VideoPlayer/main.cpp index 74f2a439a8..8ef8162980 100644 --- a/Userland/Applications/VideoPlayer/main.cpp +++ b/Userland/Applications/VideoPlayer/main.cpp @@ -59,7 +59,7 @@ ErrorOr serenity_main(Main::Arguments arguments) if (!optional_sample.has_value()) return; - auto result = vp9_decoder.decode_frame(optional_sample.release_value()); + auto result = vp9_decoder.decode(optional_sample.release_value()); if (result.is_error()) { outln("Error decoding frame {}: {}", frame_number, result.error().string_literal()); diff --git a/Userland/Libraries/LibVideo/VP9/Decoder.cpp b/Userland/Libraries/LibVideo/VP9/Decoder.cpp index 02361437e4..92ee747cca 100644 --- a/Userland/Libraries/LibVideo/VP9/Decoder.cpp +++ b/Userland/Libraries/LibVideo/VP9/Decoder.cpp @@ -18,23 +18,74 @@ Decoder::Decoder() { } -DecoderErrorOr Decoder::decode_frame(ByteBuffer const& frame_data) +DecoderErrorOr Decoder::decode(Span chunk_data) { - TRY(m_parser->parse_frame(frame_data)); - // TODO: - // - #2 - // - #3 - // - #4 - TRY(update_reference_frames()); + auto superframe_sizes = m_parser->parse_superframe_sizes(chunk_data); + + if (superframe_sizes.is_empty()) { + return decode_frame(chunk_data); + } + + size_t offset = 0; + + for (auto superframe_size : superframe_sizes) { + auto frame_data = chunk_data.slice(offset, superframe_size); + TRY(decode_frame(frame_data)); + offset += superframe_size; + } return {}; } +DecoderErrorOr Decoder::decode(ByteBuffer const& chunk_data) +{ + return decode(chunk_data.span()); +} + void Decoder::dump_frame_info() { m_parser->dump_info(); } +inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride) +{ + return row * stride + column; +} + +DecoderErrorOr Decoder::decode_frame(Span frame_data) +{ + // 1. The syntax elements for the coded frame are extracted as specified in sections 6 and 7. The syntax + // tables include function calls indicating when the block decode processes should be triggered. + TRY(m_parser->parse_frame(frame_data)); + + // 2. If loop_filter_level is not equal to 0, the loop filter process as specified in section 8.8 is invoked once the + // coded frame has been decoded. + // FIXME: Implement loop filtering. + + // 3. If all of the following conditions are true, PrevSegmentIds[ row ][ col ] is set equal to + // SegmentIds[ row ][ col ] for row = 0..MiRows-1, for col = 0..MiCols-1: + // − show_existing_frame is equal to 0, + // − segmentation_enabled is equal to 1, + // − segmentation_update_map is equal to 1. + if (!m_parser->m_show_existing_frame && m_parser->m_segmentation_enabled && m_parser->m_segmentation_update_map) { + for (auto row = 0u; row < m_parser->m_mi_rows; row++) { + for (auto column = 0u; column < m_parser->m_mi_cols; column++) { + auto index = index_from_row_and_column(row, column, m_parser->m_mi_rows); + m_parser->m_prev_segment_ids[index] = m_parser->m_segment_ids[index]; + } + } + } + + // 4. The output process as specified in section 8.9 is invoked. + // FIXME: Create a struct to store an output frame along with all information needed to display + // it. This function will need to append the images to a vector to ensure that if a superframe + // with multiple output frames is encountered, all of them can be displayed. + + // 5. The reference frame update process as specified in section 8.10 is invoked. + TRY(update_reference_frames()); + return {}; +} + inline size_t buffer_size(size_t width, size_t height) { return width * height; @@ -215,11 +266,6 @@ u8 Decoder::adapt_prob(u8 prob, u8 counts[2]) return merge_prob(prob, counts[0], counts[1], COUNT_SAT, MAX_UPDATE_FACTOR); } -inline size_t index_from_row_and_column(u32 row, u32 column, u32 stride) -{ - return row * stride + column; -} - DecoderErrorOr Decoder::predict_intra(u8 plane, u32 x, u32 y, bool have_left, bool have_above, bool not_on_right, TXSize tx_size, u32 block_index) { auto& frame_buffer = get_output_buffer(plane); diff --git a/Userland/Libraries/LibVideo/VP9/Decoder.h b/Userland/Libraries/LibVideo/VP9/Decoder.h index 179d73b7c2..e382cf87fb 100644 --- a/Userland/Libraries/LibVideo/VP9/Decoder.h +++ b/Userland/Libraries/LibVideo/VP9/Decoder.h @@ -9,6 +9,7 @@ #include #include +#include #include #include "Parser.h" @@ -20,7 +21,9 @@ class Decoder { public: Decoder(); - DecoderErrorOr decode_frame(ByteBuffer const&); + /* (8.1) General */ + DecoderErrorOr decode(Span); + DecoderErrorOr decode(ByteBuffer const&); void dump_frame_info(); // FIXME: These functions should be replaced by a struct that contains @@ -33,6 +36,8 @@ public: private: typedef i32 Intermediate; + DecoderErrorOr decode_frame(Span); + DecoderErrorOr allocate_buffers(); Vector& get_temp_buffer(u8 plane); Vector& get_output_buffer(u8 plane); diff --git a/Userland/Libraries/LibVideo/VP9/Parser.cpp b/Userland/Libraries/LibVideo/VP9/Parser.cpp index 031a9b8773..ff6cf753af 100644 --- a/Userland/Libraries/LibVideo/VP9/Parser.cpp +++ b/Userland/Libraries/LibVideo/VP9/Parser.cpp @@ -28,8 +28,55 @@ Parser::~Parser() { } +Vector Parser::parse_superframe_sizes(Span frame_data) +{ + if (frame_data.size() < 1) + return {}; + + // The decoder determines the presence of a superframe by: + // 1. parsing the final byte of the chunk and checking that the superframe_marker equals 0b110, + + // If the checks in steps 1 and 3 both pass, then the chunk is determined to contain a superframe and each + // frame in the superframe is passed to the decoding process in turn. + // Otherwise, the chunk is determined to not contain a superframe, and the whole chunk is passed to the + // decoding process. + + // NOTE: Reading from span data will be quicker than spinning up a BitStream. + u8 superframe_byte = frame_data[frame_data.size() - 1]; + + // NOTE: We have to read out of the byte from the little end first, hence the padding bits in the masks below. + u8 superframe_marker = superframe_byte & 0b1110'0000; + if (superframe_marker == 0b1100'0000) { + u8 bytes_per_framesize = ((superframe_byte >> 3) & 0b11) + 1; + u8 frames_in_superframe = (superframe_byte & 0b111) + 1; + // 2. setting the total size of the superframe_index SzIndex equal to 2 + NumFrames * SzBytes, + size_t index_size = 2 + bytes_per_framesize * frames_in_superframe; + + if (index_size > frame_data.size()) + return {}; + + auto superframe_header_data = frame_data.data() + frame_data.size() - index_size; + + u8 start_superframe_byte = *(superframe_header_data++); + // 3. checking that the first byte of the superframe_index matches the final byte. + if (superframe_byte != start_superframe_byte) + return {}; + + Vector result; + for (u8 i = 0; i < frames_in_superframe; i++) { + size_t frame_size = 0; + for (u8 j = 0; j < bytes_per_framesize; j++) + frame_size |= (static_cast(*(superframe_header_data++)) << (j * 8)); + result.append(frame_size); + } + return result; + } + + return {}; +} + /* (6.1) */ -DecoderErrorOr Parser::parse_frame(ByteBuffer const& frame_data) +DecoderErrorOr Parser::parse_frame(Span frame_data) { m_bit_stream = make(frame_data.data(), frame_data.size()); m_syntax_element_counter = make(); diff --git a/Userland/Libraries/LibVideo/VP9/Parser.h b/Userland/Libraries/LibVideo/VP9/Parser.h index 5e13e5687b..5a3df1c36a 100644 --- a/Userland/Libraries/LibVideo/VP9/Parser.h +++ b/Userland/Libraries/LibVideo/VP9/Parser.h @@ -8,8 +8,8 @@ #pragma once #include -#include #include +#include #include #include #include @@ -32,10 +32,14 @@ class Parser { public: explicit Parser(Decoder&); ~Parser(); - DecoderErrorOr parse_frame(ByteBuffer const&); + DecoderErrorOr parse_frame(Span); void dump_info(); private: + /* Annex B: Superframes are a method of storing multiple coded frames into a single chunk + * See also section 5.26. */ + Vector parse_superframe_sizes(Span); + DecoderErrorOr read_frame_type(); DecoderErrorOr read_color_range();