diff --git a/Userland/Libraries/LibVideo/Containers/Matroska/Reader.cpp b/Userland/Libraries/LibVideo/Containers/Matroska/Reader.cpp index 43a1ee64a8..9d6ca54a18 100644 --- a/Userland/Libraries/LibVideo/Containers/Matroska/Reader.cpp +++ b/Userland/Libraries/LibVideo/Containers/Matroska/Reader.cpp @@ -1,6 +1,6 @@ /* * Copyright (c) 2021, Hunter Salyer - * Copyright (c) 2022, Gregory Bertilson + * Copyright (c) 2022-2023, Gregory Bertilson * * SPDX-License-Identifier: BSD-2-Clause */ @@ -18,9 +18,14 @@ namespace Video::Matroska { #define TRY_READ(expression) DECODER_TRY(DecoderErrorCategory::Corrupted, expression) -// Elements IDs and types are listed at this URL: -// https://www.matroska.org/technical/elements.html +// RFC 8794 - Extensible Binary Meta Language +// https://datatracker.ietf.org/doc/html/rfc8794 constexpr u32 EBML_MASTER_ELEMENT_ID = 0x1A45DFA3; +constexpr u32 EBML_CRC32_ELEMENT_ID = 0xBF; +constexpr u32 EBML_VOID_ELEMENT_ID = 0xEC; + +// Matroska elements' IDs and types are listed at this URL: +// https://www.matroska.org/technical/elements.html constexpr u32 SEGMENT_ELEMENT_ID = 0x18538067; constexpr u32 DOCTYPE_ELEMENT_ID = 0x4282; constexpr u32 DOCTYPE_VERSION_ELEMENT_ID = 0x4287; @@ -98,35 +103,69 @@ DecoderErrorOr Reader::from_data(ReadonlyBytes data) return reader; } -static DecoderErrorOr parse_master_element(Streamer& streamer, [[maybe_unused]] StringView element_name, Function(u64, size_t)> element_consumer) +// Returns the position of the first element that is read from this master element. +static DecoderErrorOr parse_master_element(Streamer& streamer, [[maybe_unused]] StringView element_name, Function(u64)> element_consumer) { auto element_data_size = TRY_READ(streamer.read_variable_size_integer()); dbgln_if(MATROSKA_DEBUG, "{} has {} octets of data.", element_name, element_data_size); + bool first_element = true; + auto first_element_position = streamer.position(); + streamer.push_octets_read(); while (streamer.octets_read() < element_data_size) { dbgln_if(MATROSKA_TRACE_DEBUG, "====== Reading element ======"); auto element_id = TRY_READ(streamer.read_variable_size_integer(false)); - auto element_position = streamer.position(); dbgln_if(MATROSKA_TRACE_DEBUG, "{:s} element ID is {:#010x}", element_name, element_id); - auto result = element_consumer(element_id, element_position); + if (element_id == EBML_CRC32_ELEMENT_ID) { + // The CRC-32 Element contains a 32-bit Cyclic Redundancy Check value of all the + // Element Data of the Parent Element as stored except for the CRC-32 Element itself. + // When the CRC-32 Element is present, the CRC-32 Element MUST be the first ordered + // EBML Element within its Parent Element for easier reading. + if (!first_element) + return DecoderError::corrupted("CRC32 element must be the first child"sv); + + // All Top-Level Elements of an EBML Document that are Master Elements SHOULD include a + // CRC-32 Element as a Child Element. The CRC in use is the IEEE-CRC-32 algorithm as used + // in the [ISO3309] standard and in Section 8.1.1.6.2 of [ITU.V42], with initial value of + // 0xFFFFFFFF. The CRC value MUST be computed on a little-endian bytestream and MUST use + // little-endian storage. + + // FIXME: Currently we skip the CRC-32 Element instead of checking it. It may be worth + // verifying the contents of the SeekHead, Segment Info, and Tracks Elements. + // Note that Cluster Elements tend to be quite large, so verifying their integrity + // will result in longer buffering times in streamed contexts, so it may not be + // worth the effort checking those. It would also prevent error correction in + // video codecs from taking effect. + TRY_READ(streamer.read_unknown_element()); + continue; + } + if (element_id == EBML_VOID_ELEMENT_ID) { + // Used to void data or to avoid unexpected behaviors when using damaged data. + // The content is discarded. Also used to reserve space in a subelement for later use. + TRY_READ(streamer.read_unknown_element()); + continue; + } + + auto result = element_consumer(element_id); if (result.is_error()) return DecoderError::format(result.error().category(), "{} -> {}", element_name, result.error().description()); if (result.release_value() == IterationDecision::Break) break; dbgln_if(MATROSKA_TRACE_DEBUG, "Read {} octets of the {} so far.", streamer.octets_read(), element_name); + first_element = false; } streamer.pop_octets_read(); - return {}; + return first_element_position; } static DecoderErrorOr parse_ebml_header(Streamer& streamer) { EBMLHeader header; - TRY(parse_master_element(streamer, "Header"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + TRY(parse_master_element(streamer, "Header"sv, [&](u64 element_id) -> DecoderErrorOr { switch (element_id) { case DOCTYPE_ELEMENT_ID: header.doc_type = TRY_READ(streamer.read_string()); @@ -170,11 +209,11 @@ DecoderErrorOr Reader::parse_initial_data() static DecoderErrorOr parse_seek_head(Streamer& streamer, size_t base_position, HashMap& table) { - return parse_master_element(streamer, "SeekHead"sv, [&](u64 seek_head_child_id, size_t) -> DecoderErrorOr { + TRY(parse_master_element(streamer, "SeekHead"sv, [&](u64 seek_head_child_id) -> DecoderErrorOr { if (seek_head_child_id == SEEK_ELEMENT_ID) { Optional seek_id; Optional seek_position; - TRY(parse_master_element(streamer, "Seek"sv, [&](u64 seek_entry_child_id, size_t) -> DecoderErrorOr { + TRY(parse_master_element(streamer, "Seek"sv, [&](u64 seek_entry_child_id) -> DecoderErrorOr { switch (seek_entry_child_id) { case SEEK_ID_ELEMENT_ID: seek_id = TRY_READ(streamer.read_u64()); @@ -212,7 +251,8 @@ static DecoderErrorOr parse_seek_head(Streamer& streamer, size_t base_posi } return IterationDecision::Continue; - }); + })); + return {}; } DecoderErrorOr> Reader::find_first_top_level_element_with_id([[maybe_unused]] StringView element_name, u32 element_id) @@ -272,7 +312,7 @@ DecoderErrorOr> Reader::find_first_top_level_element_with_id([[ static DecoderErrorOr parse_information(Streamer& streamer) { SegmentInformation segment_information; - TRY(parse_master_element(streamer, "Segment Information"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + TRY(parse_master_element(streamer, "Segment Information"sv, [&](u64 element_id) -> DecoderErrorOr { switch (element_id) { case TIMESTAMP_SCALE_ID: segment_information.set_timestamp_scale(TRY_READ(streamer.read_u64())); @@ -331,7 +371,7 @@ static DecoderErrorOr parse_video_color_information(Str { TrackEntry::ColorFormat color_format {}; - TRY(parse_master_element(streamer, "Colour"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + TRY(parse_master_element(streamer, "Colour"sv, [&](u64 element_id) -> DecoderErrorOr { switch (element_id) { case PRIMARIES_ID: color_format.color_primaries = static_cast(TRY_READ(streamer.read_u64())); @@ -363,7 +403,7 @@ static DecoderErrorOr parse_video_track_information(Stre { TrackEntry::VideoTrack video_track {}; - TRY(parse_master_element(streamer, "VideoTrack"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + TRY(parse_master_element(streamer, "VideoTrack"sv, [&](u64 element_id) -> DecoderErrorOr { switch (element_id) { case PIXEL_WIDTH_ID: video_track.pixel_width = TRY_READ(streamer.read_u64()); @@ -390,7 +430,7 @@ static DecoderErrorOr parse_audio_track_information(Stre { TrackEntry::AudioTrack audio_track {}; - TRY(parse_master_element(streamer, "AudioTrack"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + TRY(parse_master_element(streamer, "AudioTrack"sv, [&](u64 element_id) -> DecoderErrorOr { switch (element_id) { case CHANNELS_ID: audio_track.channels = TRY_READ(streamer.read_u64()); @@ -413,7 +453,7 @@ static DecoderErrorOr parse_audio_track_information(Stre static DecoderErrorOr parse_track_entry(Streamer& streamer) { TrackEntry track_entry; - TRY(parse_master_element(streamer, "Track"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + TRY(parse_master_element(streamer, "Track"sv, [&](u64 element_id) -> DecoderErrorOr { switch (element_id) { case TRACK_NUMBER_ID: track_entry.set_track_number(TRY_READ(streamer.read_u64())); @@ -461,7 +501,7 @@ static DecoderErrorOr parse_track_entry(Streamer& streamer) DecoderErrorOr Reader::parse_tracks(Streamer& streamer) { - return parse_master_element(streamer, "Tracks"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + TRY(parse_master_element(streamer, "Tracks"sv, [&](u64 element_id) -> DecoderErrorOr { if (element_id == TRACK_ENTRY_ID) { auto track_entry = TRY(parse_track_entry(streamer)); dbgln_if(MATROSKA_DEBUG, "Parsed track {}", track_entry.track_number()); @@ -471,7 +511,8 @@ DecoderErrorOr Reader::parse_tracks(Streamer& streamer) } return IterationDecision::Continue; - }); + })); + return {}; } DecoderErrorOr Reader::for_each_track(TrackEntryCallback callback) @@ -517,12 +558,8 @@ constexpr size_t get_element_id_size(u32 element_id) static DecoderErrorOr parse_cluster(Streamer& streamer, u64 timestamp_scale) { Optional timestamp; - size_t first_element_position = 0; - - TRY(parse_master_element(streamer, "Cluster"sv, [&](u64 element_id, size_t position) -> DecoderErrorOr { - if (first_element_position == 0) - first_element_position = position - get_element_id_size(element_id); + auto first_element_position = TRY(parse_master_element(streamer, "Cluster"sv, [&](u64 element_id) -> DecoderErrorOr { switch (element_id) { case TIMESTAMP_ID: timestamp = TRY_READ(streamer.read_u64()); @@ -648,7 +685,7 @@ static DecoderErrorOr parse_cue_track_position(Streamer& strea bool had_cluster_position = false; - TRY_READ(parse_master_element(streamer, "CueTrackPositions"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + TRY_READ(parse_master_element(streamer, "CueTrackPositions"sv, [&](u64 element_id) -> DecoderErrorOr { switch (element_id) { case CUE_TRACK_ID: track_position.set_track_number(TRY_READ(streamer.read_u64())); @@ -692,7 +729,7 @@ static DecoderErrorOr parse_cue_point(Streamer& streamer, u64 timestam { CuePoint cue_point; - TRY(parse_master_element(streamer, "CuePoint"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + TRY(parse_master_element(streamer, "CuePoint"sv, [&](u64 element_id) -> DecoderErrorOr { switch (element_id) { case CUE_TIME_ID: { // On https://www.matroska.org/technical/elements.html, spec says of the CueTime element: @@ -735,7 +772,7 @@ DecoderErrorOr Reader::parse_cues(Streamer& streamer) { m_cues.clear(); - TRY(parse_master_element(streamer, "Cues"sv, [&](u64 element_id, size_t) -> DecoderErrorOr { + TRY(parse_master_element(streamer, "Cues"sv, [&](u64 element_id) -> DecoderErrorOr { switch (element_id) { case CUE_POINT_ID: { auto cue_point = TRY(parse_cue_point(streamer, TRY(segment_information()).timestamp_scale())); @@ -972,7 +1009,7 @@ ErrorOr Streamer::read_i16() ErrorOr Streamer::read_variable_size_integer(bool mask_length) { - dbgln_if(MATROSKA_TRACE_DEBUG, "Reading from offset {:p}", data()); + dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT from offset {:p}", position()); auto length_descriptor = TRY(read_octet()); dbgln_if(MATROSKA_TRACE_DEBUG, "Reading VINT, first byte is {:#02x}", length_descriptor); if (length_descriptor == 0)