mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 20:37:35 +00:00
LibVideo: Scaffold LibVideo and implement simplistic Matroska parser
This commit initializes the LibVideo library and implements parsing basic Matroska container files. Currently, it will only parse audio and video tracks.
This commit is contained in:
parent
6a15bd06cb
commit
403bb07443
7 changed files with 840 additions and 0 deletions
458
Userland/Libraries/LibVideo/MatroskaReader.cpp
Normal file
458
Userland/Libraries/LibVideo/MatroskaReader.cpp
Normal file
|
@ -0,0 +1,458 @@
|
|||
/*
|
||||
* Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include "MatroskaReader.h"
|
||||
#include <AK/Function.h>
|
||||
#include <AK/MappedFile.h>
|
||||
#include <AK/Optional.h>
|
||||
#include <AK/Utf8View.h>
|
||||
|
||||
namespace Video {
|
||||
|
||||
#define CHECK_HAS_VALUE(x) \
|
||||
if (!(x).has_value()) \
|
||||
return false
|
||||
|
||||
constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3;
|
||||
constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067;
|
||||
constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282;
|
||||
constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287;
|
||||
constexpr u32 SEGMENT_INFORMATION_ELEMENT_ID = 0x1549A966;
|
||||
constexpr u32 TRACK_ELEMENT_ID = 0x1654AE6B;
|
||||
constexpr u32 CLUSTER_ELEMENT_ID = 0x1F43B675;
|
||||
constexpr u32 TIMESTAMP_SCALE_ID = 0x2AD7B1;
|
||||
constexpr u32 MUXING_APP_ID = 0x4D80;
|
||||
constexpr u32 WRITING_APP_ID = 0x5741;
|
||||
constexpr u32 TRACK_ENTRY_ID = 0xAE;
|
||||
constexpr u32 TRACK_NUMBER_ID = 0xD7;
|
||||
constexpr u32 TRACK_UID_ID = 0x73C5;
|
||||
constexpr u32 TRACK_TYPE_ID = 0x83;
|
||||
constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C;
|
||||
constexpr u32 TRACK_CODEC_ID = 0x86;
|
||||
constexpr u32 TRACK_VIDEO_ID = 0xE0;
|
||||
constexpr u32 TRACK_AUDIO_ID = 0xE1;
|
||||
constexpr u32 PIXEL_WIDTH_ID = 0xB0;
|
||||
constexpr u32 PIXEL_HEIGHT_ID = 0xBA;
|
||||
constexpr u32 CHANNELS_ID = 0x9F;
|
||||
constexpr u32 BIT_DEPTH_ID = 0x6264;
|
||||
constexpr u32 SIMPLE_BLOCK_ID = 0xA3;
|
||||
constexpr u32 TIMESTAMP_ID = 0xE7;
|
||||
|
||||
OwnPtr<MatroskaDocument> MatroskaReader::parse_matroska_from_file(const StringView& path)
|
||||
{
|
||||
auto mapped_file_result = MappedFile::map(path);
|
||||
if (mapped_file_result.is_error())
|
||||
return {};
|
||||
|
||||
auto mapped_file = mapped_file_result.release_value();
|
||||
return parse_matroska_from_data((u8*)mapped_file->data(), mapped_file->size());
|
||||
}
|
||||
|
||||
OwnPtr<MatroskaDocument> MatroskaReader::parse_matroska_from_data(const u8* data, size_t size)
|
||||
{
|
||||
MatroskaReader reader(data, size);
|
||||
return reader.parse();
|
||||
}
|
||||
|
||||
OwnPtr<MatroskaDocument> MatroskaReader::parse()
|
||||
{
|
||||
auto first_element_id = m_streamer.read_variable_size_integer(false);
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "First element ID is {:#010x}\n", first_element_id.value());
|
||||
if (!first_element_id.has_value() || first_element_id.value() != EBML_MASTER_ELEMENT_ID)
|
||||
return {};
|
||||
|
||||
auto header = parse_ebml_header();
|
||||
if (!header.has_value())
|
||||
return {};
|
||||
|
||||
dbgln_if(MATROSKA_DEBUG, "Parsed EBML header");
|
||||
|
||||
auto root_element_id = m_streamer.read_variable_size_integer(false);
|
||||
if (!root_element_id.has_value() || root_element_id.value() != SEGMENT_ELEMENT_ID)
|
||||
return {};
|
||||
|
||||
auto matroska_document = make<MatroskaDocument>(header.value());
|
||||
|
||||
auto segment_parse_success = parse_segment_elements(*matroska_document);
|
||||
if (!segment_parse_success)
|
||||
return {};
|
||||
|
||||
return matroska_document;
|
||||
}
|
||||
|
||||
bool MatroskaReader::parse_master_element([[maybe_unused]] const StringView& element_name, Function<bool(u64)> element_consumer)
|
||||
{
|
||||
auto element_data_size = m_streamer.read_variable_size_integer();
|
||||
CHECK_HAS_VALUE(element_data_size);
|
||||
dbgln_if(MATROSKA_DEBUG, "{} has {} octets of data.", element_name, element_data_size.value());
|
||||
|
||||
m_streamer.push_octets_read();
|
||||
while (m_streamer.octets_read() < element_data_size.value()) {
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "====== Reading element ======");
|
||||
auto optional_element_id = m_streamer.read_variable_size_integer(false);
|
||||
CHECK_HAS_VALUE(optional_element_id);
|
||||
|
||||
auto element_id = optional_element_id.value();
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "{:s} element ID is {:#010x}\n", element_name, element_id);
|
||||
|
||||
if (!element_consumer(element_id)) {
|
||||
dbgln_if(MATROSKA_DEBUG, "{:s} consumer failed on ID {:#010x}\n", element_name.to_string().characters(), element_id);
|
||||
return false;
|
||||
}
|
||||
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "Read {} octets of the {} so far.", m_streamer.octets_read(), element_name);
|
||||
}
|
||||
m_streamer.pop_octets_read();
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
Optional<EBMLHeader> MatroskaReader::parse_ebml_header()
|
||||
{
|
||||
EBMLHeader header;
|
||||
auto success = parse_master_element("Header", [&](u64 element_id) {
|
||||
if (element_id == DOCTYPE_ELEMENT_ID) {
|
||||
auto doc_type = read_string_element();
|
||||
CHECK_HAS_VALUE(doc_type);
|
||||
header.doc_type = doc_type.value();
|
||||
dbgln_if(MATROSKA_DEBUG, "Read DocType attribute: {}", doc_type.value());
|
||||
} else if (element_id == DOCTYPE_VERSION_ELEMENT_ID) {
|
||||
auto doc_type_version = read_u64_element();
|
||||
CHECK_HAS_VALUE(doc_type_version);
|
||||
header.doc_type_version = doc_type_version.value();
|
||||
dbgln_if(MATROSKA_DEBUG, "Read DocTypeVersion attribute: {}", doc_type_version.value());
|
||||
} else {
|
||||
return read_unknown_element();
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
if (!success)
|
||||
return {};
|
||||
return header;
|
||||
}
|
||||
|
||||
bool MatroskaReader::parse_segment_elements(MatroskaDocument& matroska_document)
|
||||
{
|
||||
dbgln_if(MATROSKA_DEBUG, "Parsing segment elements");
|
||||
auto success = parse_master_element("Segment", [&](u64 element_id) {
|
||||
if (element_id == SEGMENT_INFORMATION_ELEMENT_ID) {
|
||||
auto segment_information = parse_information();
|
||||
if (!segment_information)
|
||||
return false;
|
||||
matroska_document.set_segment_information(move(segment_information));
|
||||
} else if (element_id == TRACK_ELEMENT_ID) {
|
||||
return parse_tracks(matroska_document);
|
||||
} else if (element_id == CLUSTER_ELEMENT_ID) {
|
||||
auto cluster = parse_cluster();
|
||||
if (!cluster)
|
||||
return false;
|
||||
matroska_document.clusters().append(cluster.release_nonnull());
|
||||
} else {
|
||||
return read_unknown_element();
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
dbgln("Success {}", success);
|
||||
return success;
|
||||
}
|
||||
|
||||
OwnPtr<SegmentInformation> MatroskaReader::parse_information()
|
||||
{
|
||||
auto segment_information = make<SegmentInformation>();
|
||||
auto success = parse_master_element("Segment Information", [&](u64 element_id) {
|
||||
if (element_id == TIMESTAMP_SCALE_ID) {
|
||||
auto timestamp_scale = read_u64_element();
|
||||
CHECK_HAS_VALUE(timestamp_scale);
|
||||
segment_information->set_timestamp_scale(timestamp_scale.value());
|
||||
dbgln_if(MATROSKA_DEBUG, "Read TimestampScale attribute: {}", timestamp_scale.value());
|
||||
} else if (element_id == MUXING_APP_ID) {
|
||||
auto muxing_app = read_string_element();
|
||||
CHECK_HAS_VALUE(muxing_app);
|
||||
segment_information->set_muxing_app(muxing_app.value());
|
||||
dbgln_if(MATROSKA_DEBUG, "Read MuxingApp attribute: {}", muxing_app.value());
|
||||
} else if (element_id == WRITING_APP_ID) {
|
||||
auto writing_app = read_string_element();
|
||||
CHECK_HAS_VALUE(writing_app);
|
||||
segment_information->set_writing_app(writing_app.value());
|
||||
dbgln_if(MATROSKA_DEBUG, "Read WritingApp attribute: {}", writing_app.value());
|
||||
} else {
|
||||
return read_unknown_element();
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
if (!success)
|
||||
return {};
|
||||
return segment_information;
|
||||
}
|
||||
|
||||
bool MatroskaReader::parse_tracks(MatroskaDocument& matroska_document)
|
||||
{
|
||||
auto success = parse_master_element("Tracks", [&](u64 element_id) {
|
||||
if (element_id == TRACK_ENTRY_ID) {
|
||||
dbgln_if(MATROSKA_DEBUG, "Parsing track");
|
||||
auto track_entry = parse_track_entry();
|
||||
if (!track_entry)
|
||||
return false;
|
||||
auto track_number = track_entry->track_number();
|
||||
matroska_document.add_track(track_number, track_entry.release_nonnull());
|
||||
dbgln_if(MATROSKA_DEBUG, "Track {} added to document", track_number);
|
||||
} else {
|
||||
return read_unknown_element();
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
return success;
|
||||
}
|
||||
|
||||
OwnPtr<TrackEntry> MatroskaReader::parse_track_entry()
|
||||
{
|
||||
auto track_entry = make<TrackEntry>();
|
||||
auto success = parse_master_element("Track", [&](u64 element_id) {
|
||||
if (element_id == TRACK_NUMBER_ID) {
|
||||
auto track_number = read_u64_element();
|
||||
CHECK_HAS_VALUE(track_number);
|
||||
track_entry->set_track_number(track_number.value());
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackNumber attribute: {}", track_number.value());
|
||||
} else if (element_id == TRACK_UID_ID) {
|
||||
auto track_uid = read_u64_element();
|
||||
CHECK_HAS_VALUE(track_uid);
|
||||
track_entry->set_track_uid(track_uid.value());
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackUID attribute: {}", track_uid.value());
|
||||
} else if (element_id == TRACK_TYPE_ID) {
|
||||
auto track_type = read_u64_element();
|
||||
CHECK_HAS_VALUE(track_type);
|
||||
track_entry->set_track_type(static_cast<TrackEntry::TrackType>(track_type.value()));
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "Read TrackType attribute: {}", track_type.value());
|
||||
} else if (element_id == TRACK_LANGUAGE_ID) {
|
||||
auto language = read_string_element();
|
||||
CHECK_HAS_VALUE(language);
|
||||
track_entry->set_language(language.value());
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's Language attribute: {}", language.value());
|
||||
} else if (element_id == TRACK_CODEC_ID) {
|
||||
auto codec_id = read_string_element();
|
||||
CHECK_HAS_VALUE(codec_id);
|
||||
track_entry->set_codec_id(codec_id.value());
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", codec_id.value());
|
||||
} else if (element_id == TRACK_VIDEO_ID) {
|
||||
auto video_track = parse_video_track_information();
|
||||
CHECK_HAS_VALUE(video_track);
|
||||
track_entry->set_video_track(video_track.value());
|
||||
} else if (element_id == TRACK_AUDIO_ID) {
|
||||
auto audio_track = parse_audio_track_information();
|
||||
CHECK_HAS_VALUE(audio_track);
|
||||
track_entry->set_audio_track(audio_track.value());
|
||||
} else {
|
||||
return read_unknown_element();
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
if (!success)
|
||||
return {};
|
||||
return track_entry;
|
||||
}
|
||||
|
||||
Optional<TrackEntry::VideoTrack> MatroskaReader::parse_video_track_information()
|
||||
{
|
||||
TrackEntry::VideoTrack video_track {};
|
||||
|
||||
auto success = parse_master_element("VideoTrack", [&](u64 element_id) {
|
||||
if (element_id == PIXEL_WIDTH_ID) {
|
||||
auto pixel_width = read_u64_element();
|
||||
CHECK_HAS_VALUE(pixel_width);
|
||||
video_track.pixel_width = pixel_width.value();
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelWidth attribute: {}", pixel_width.value());
|
||||
} else if (element_id == PIXEL_HEIGHT_ID) {
|
||||
auto pixel_height = read_u64_element();
|
||||
CHECK_HAS_VALUE(pixel_height);
|
||||
video_track.pixel_height = pixel_height.value();
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "Read VideoTrack's PixelHeight attribute: {}", pixel_height.value());
|
||||
} else {
|
||||
return read_unknown_element();
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
if (!success)
|
||||
return {};
|
||||
return video_track;
|
||||
}
|
||||
|
||||
Optional<TrackEntry::AudioTrack> MatroskaReader::parse_audio_track_information()
|
||||
{
|
||||
TrackEntry::AudioTrack audio_track {};
|
||||
|
||||
auto success = parse_master_element("AudioTrack", [&](u64 element_id) {
|
||||
if (element_id == CHANNELS_ID) {
|
||||
auto channels = read_u64_element();
|
||||
CHECK_HAS_VALUE(channels);
|
||||
audio_track.channels = channels.value();
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's Channels attribute: {}", channels.value());
|
||||
} else if (element_id == BIT_DEPTH_ID) {
|
||||
auto bit_depth = read_u64_element();
|
||||
CHECK_HAS_VALUE(bit_depth);
|
||||
audio_track.bit_depth = bit_depth.value();
|
||||
dbgln_if(MATROSKA_TRACE_DEBUG, "Read AudioTrack's BitDepth attribute: {}", bit_depth.value());
|
||||
} else {
|
||||
return read_unknown_element();
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
if (!success)
|
||||
return {};
|
||||
return audio_track;
|
||||
}
|
||||
|
||||
OwnPtr<Cluster> MatroskaReader::parse_cluster()
|
||||
{
|
||||
auto cluster = make<Cluster>();
|
||||
|
||||
auto success = parse_master_element("Cluster", [&](u64 element_id) {
|
||||
if (element_id == SIMPLE_BLOCK_ID) {
|
||||
auto simple_block = parse_simple_block();
|
||||
if (!simple_block)
|
||||
return false;
|
||||
cluster->blocks().append(simple_block.release_nonnull());
|
||||
} else if (element_id == TIMESTAMP_ID) {
|
||||
auto timestamp = read_u64_element();
|
||||
if (!timestamp.has_value())
|
||||
return false;
|
||||
cluster->set_timestamp(timestamp.value());
|
||||
} else {
|
||||
auto success = read_unknown_element();
|
||||
if (!success)
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
if (!success)
|
||||
return {};
|
||||
return cluster;
|
||||
}
|
||||
|
||||
OwnPtr<Block> MatroskaReader::parse_simple_block()
|
||||
{
|
||||
auto block = make<Block>();
|
||||
|
||||
auto content_size = m_streamer.read_variable_size_integer();
|
||||
if (!content_size.has_value())
|
||||
return {};
|
||||
|
||||
auto octets_read_before_track_number = m_streamer.octets_read();
|
||||
auto track_number = m_streamer.read_variable_size_integer();
|
||||
if (!track_number.has_value())
|
||||
return {};
|
||||
block->set_track_number(track_number.value());
|
||||
|
||||
if (m_streamer.remaining() < 3)
|
||||
return {};
|
||||
block->set_timestamp(m_streamer.read_i16());
|
||||
|
||||
auto flags = m_streamer.read_octet();
|
||||
block->set_only_keyframes(flags & (1u << 7u));
|
||||
block->set_invisible(flags & (1u << 3u));
|
||||
block->set_lacing(static_cast<Block::Lacing>((flags & 0b110u) >> 1u));
|
||||
block->set_discardable(flags & 1u);
|
||||
|
||||
auto total_frame_content_size = content_size.value() - (m_streamer.octets_read() - octets_read_before_track_number);
|
||||
if (block->lacing() == Block::Lacing::EBML) {
|
||||
auto octets_read_before_frame_sizes = m_streamer.octets_read();
|
||||
auto frame_count = m_streamer.read_octet() + 1;
|
||||
Vector<u64> frame_sizes;
|
||||
frame_sizes.ensure_capacity(frame_count);
|
||||
|
||||
u64 frame_size_sum = 0;
|
||||
u64 previous_frame_size;
|
||||
auto first_frame_size = m_streamer.read_variable_size_integer();
|
||||
if (!first_frame_size.has_value())
|
||||
return {};
|
||||
frame_sizes.append(first_frame_size.value());
|
||||
frame_size_sum += first_frame_size.value();
|
||||
previous_frame_size = first_frame_size.value();
|
||||
|
||||
for (int i = 0; i < frame_count - 2; i++) {
|
||||
auto frame_size_difference = m_streamer.read_variable_sized_signed_integer();
|
||||
if (!frame_size_difference.has_value())
|
||||
return {};
|
||||
u64 frame_size;
|
||||
if (frame_size_difference.value() < 0)
|
||||
frame_size = previous_frame_size - (-frame_size_difference.value());
|
||||
else
|
||||
frame_size = previous_frame_size + frame_size_difference.value();
|
||||
frame_sizes.append(frame_size);
|
||||
frame_size_sum += frame_size;
|
||||
previous_frame_size = frame_size;
|
||||
}
|
||||
frame_sizes.append(total_frame_content_size - frame_size_sum - (m_streamer.octets_read() - octets_read_before_frame_sizes));
|
||||
|
||||
for (int i = 0; i < frame_count; i++) {
|
||||
auto current_frame_size = frame_sizes.at(i);
|
||||
block->add_frame(ByteBuffer::copy(m_streamer.data(), current_frame_size));
|
||||
m_streamer.drop_octets(current_frame_size);
|
||||
}
|
||||
} else if (block->lacing() == Block::Lacing::FixedSize) {
|
||||
auto frame_count = m_streamer.read_octet() + 1;
|
||||
auto individual_frame_size = total_frame_content_size / frame_count;
|
||||
for (int i = 0; i < frame_count; i++) {
|
||||
block->add_frame(ByteBuffer::copy(m_streamer.data(), individual_frame_size));
|
||||
m_streamer.drop_octets(individual_frame_size);
|
||||
}
|
||||
} else {
|
||||
block->add_frame(ByteBuffer::copy(m_streamer.data(), total_frame_content_size));
|
||||
m_streamer.drop_octets(total_frame_content_size);
|
||||
}
|
||||
return block;
|
||||
}
|
||||
|
||||
Optional<String> MatroskaReader::read_string_element()
|
||||
{
|
||||
auto string_length = m_streamer.read_variable_size_integer();
|
||||
if (!string_length.has_value() || m_streamer.remaining() < string_length.value())
|
||||
return {};
|
||||
auto string_value = String(m_streamer.data_as_chars(), string_length.value());
|
||||
m_streamer.drop_octets(string_length.value());
|
||||
return string_value;
|
||||
}
|
||||
|
||||
Optional<u64> MatroskaReader::read_u64_element()
|
||||
{
|
||||
auto integer_length = m_streamer.read_variable_size_integer();
|
||||
if (!integer_length.has_value() || m_streamer.remaining() < integer_length.value())
|
||||
return {};
|
||||
u64 result = 0;
|
||||
for (size_t i = 0; i < integer_length.value(); i++) {
|
||||
if (!m_streamer.has_octet())
|
||||
return {};
|
||||
result = (result << 8u) + m_streamer.read_octet();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool MatroskaReader::read_unknown_element()
|
||||
{
|
||||
auto element_length = m_streamer.read_variable_size_integer();
|
||||
if (!element_length.has_value() || m_streamer.remaining() < element_length.value())
|
||||
return false;
|
||||
|
||||
m_streamer.drop_octets(element_length.value());
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue