LibVideo: Calculate Block timestamps for Matroska according to spec

Tracks have a timestamp scale value that should be present which scales each block's timestamp offset to allow video to be synced with audio. They should also contain a CodecDelay element and may also contain a TrackOffset that offsets the block timestamps.
2025-09-13 17:47:58 +00:00 · 2022-11-12 13:09:07 -06:00 · 2022-11-12 13:09:07 -06:00 · a58bf7c3d1
commit a58bf7c3d1
parent ceb7632862
4 changed files with 49 additions and 12 deletions
--- a/Userland/Libraries/LibVideo/Containers/Matroska/Document.h
+++ b/Userland/Libraries/LibVideo/Containers/Matroska/Document.h
@ -114,6 +114,12 @@ public:
    void set_language(FlyString const& language) { m_language = language; }
    FlyString codec_id() const { return m_codec_id; }
    void set_codec_id(FlyString const& codec_id) { m_codec_id = codec_id; }
    double timestamp_scale() const { return m_timestamp_scale; }
    void set_timestamp_scale(double timestamp_scale) { m_timestamp_scale = timestamp_scale; }
    u64 codec_delay() const { return m_codec_delay; }
    void set_codec_delay(u64 codec_delay) { m_codec_delay = codec_delay; }
    u64 timestamp_offset() const { return m_timestamp_offset; }
    void set_timestamp_offset(u64 timestamp_offset) { m_timestamp_offset = timestamp_offset; }
    Optional<VideoTrack> video_track() const
    {
        if (track_type() != Video)
@ -135,6 +141,9 @@ private:
    TrackType m_track_type { Invalid };
    FlyString m_language = "eng";
    FlyString m_codec_id;
    double m_timestamp_scale { 1 };
    u64 m_codec_delay { 0 };
    u64 m_timestamp_offset { 0 };
    union {
        VideoTrack m_video_track {};
--- a/Userland/Libraries/LibVideo/Containers/Matroska/Reader.cpp
+++ b/Userland/Libraries/LibVideo/Containers/Matroska/Reader.cpp
@ -44,6 +44,8 @@ constexpr u32 TRACK_UID_ID = 0x73C5;
 constexpr u32 TRACK_TYPE_ID = 0x83;
 constexpr u32 TRACK_LANGUAGE_ID = 0x22B59C;
 constexpr u32 TRACK_CODEC_ID = 0x86;
 constexpr u32 TRACK_TIMESTAMP_SCALE_ID = 0x23314F;
 constexpr u32 TRACK_OFFSET_ID = 0x537F;
 constexpr u32 TRACK_VIDEO_ID = 0xE0;
 constexpr u32 TRACK_AUDIO_ID = 0xE1;
@ -416,6 +418,14 @@ static DecoderErrorOr<TrackEntry> parse_track_entry(Streamer& streamer)
            track_entry.set_codec_id(TRY_READ(streamer.read_string()));
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's CodecID attribute: {}", track_entry.codec_id());
            break;
        case TRACK_TIMESTAMP_SCALE_ID:
            track_entry.set_timestamp_scale(TRY_READ(streamer.read_float()));
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackTimestampScale attribute: {}", track_entry.timestamp_scale());
            break;
        case TRACK_OFFSET_ID:
            track_entry.set_timestamp_offset(TRY_READ(streamer.read_variable_size_signed_integer()));
            dbgln_if(MATROSKA_TRACE_DEBUG, "Read Track's TrackOffset attribute: {}", track_entry.timestamp_offset());
            break;
        case TRACK_VIDEO_ID:
            track_entry.set_video_track(TRY(parse_video_track_information(streamer)));
            break;
@ -520,7 +530,7 @@ static DecoderErrorOr<Cluster> parse_cluster(Streamer& streamer, u64 timestamp_s
    return cluster;
 }
-static DecoderErrorOr<Block> parse_simple_block(Streamer& streamer, Time cluster_timestamp, u64 timestamp_scale)
+static DecoderErrorOr<Block> parse_simple_block(Streamer& streamer, Time cluster_timestamp, u64 segment_timestamp_scale, TrackEntry track)
 {
    Block block;
@ -529,7 +539,23 @@ static DecoderErrorOr<Block> parse_simple_block(Streamer& streamer, Time cluster
    auto position_before_track_number = streamer.position();
    block.set_track_number(TRY_READ(streamer.read_variable_size_integer()));
-    block.set_timestamp(cluster_timestamp + Time::from_nanoseconds(TRY_READ(streamer.read_i16()) * timestamp_scale));
+    // https://www.matroska.org/technical/notes.html
    // Block Timestamps:
    //     The Block Element and SimpleBlock Element store their timestamps as signed integers,
    //     relative to the Cluster\Timestamp value of the Cluster they are stored in. To get the
    //     timestamp of a Block or SimpleBlock in nanoseconds you have to use the following formula:
    //         `( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale`
    //
    //     When a CodecDelay Element is set, its value MUST be substracted from each Block timestamp
    //     of that track. To get the timestamp in nanoseconds of the first frame in a Block or
    //     SimpleBlock, the formula becomes:
    //         `( ( Cluster\Timestamp + ( block timestamp * TrackTimestampScale ) ) * TimestampScale ) - CodecDelay`
    Time timestamp_offset = Time::from_nanoseconds(static_cast<i64>(static_cast<double>(TRY_READ(streamer.read_i16()) * segment_timestamp_scale) * track.timestamp_scale()));
    timestamp_offset -= Time::from_nanoseconds(static_cast<i64>(track.codec_delay()));
    // This is only mentioned in the elements specification under TrackOffset.
    // https://www.matroska.org/technical/elements.html
    timestamp_offset += Time::from_nanoseconds(static_cast<i64>(track.timestamp_offset()));
    block.set_timestamp(cluster_timestamp + timestamp_offset);
    auto flags = TRY_READ(streamer.read_octet());
    block.set_only_keyframes((flags & (1u << 7u)) != 0);
@ -596,7 +622,7 @@ DecoderErrorOr<SampleIterator> Reader::create_sample_iterator(u64 track_number)
    auto position = optional_position.value() - get_element_id_size(CLUSTER_ELEMENT_ID) - m_segment_contents_position;
    dbgln_if(MATROSKA_DEBUG, "Creating sample iterator starting at {} relative to segment at {}", position, m_segment_contents_position);
-    return SampleIterator(this->m_mapped_file, segment_view, track_number, position, TRY(segment_information()).timestamp_scale());
+    return SampleIterator(this->m_mapped_file, segment_view, TRY(track_for_track_number(track_number)), TRY(segment_information()).timestamp_scale(), position);
 }
 static DecoderErrorOr<bool> find_keyframe_before_timestamp(SampleIterator& iterator, Time const& timestamp)
@ -643,7 +669,7 @@ DecoderErrorOr<void> Reader::seek_to_random_access_point(SampleIterator& iterato
    // FIXME: This could cache the keyframes it finds. Is it worth doing? Probably not, most files will have Cues :^)
    if (timestamp < iterator.last_timestamp() || iterator.last_timestamp().is_negative()) {
        // If the timestamp is before the iterator's current position, then we need to start from the beginning of the Segment.
-        iterator = TRY(create_sample_iterator(iterator.m_track_id));
+        iterator = TRY(create_sample_iterator(iterator.m_track.track_number()));
        if (!TRY(find_keyframe_before_timestamp(iterator, timestamp)))
            return DecoderError::corrupted("No random access points found"sv);
@ -678,11 +704,11 @@ DecoderErrorOr<Block> SampleIterator::next_block()
        if (element_id == CLUSTER_ELEMENT_ID) {
            dbgln_if(MATROSKA_DEBUG, "  Iterator is parsing new cluster.");
-            m_current_cluster = TRY(parse_cluster(streamer, m_timestamp_scale));
+            m_current_cluster = TRY(parse_cluster(streamer, m_segment_timestamp_scale));
        } else if (element_id == SIMPLE_BLOCK_ID) {
            dbgln_if(MATROSKA_TRACE_DEBUG, "  Iterator is parsing new block.");
-            auto candidate_block = TRY(parse_simple_block(streamer, m_current_cluster->timestamp(), m_timestamp_scale));
+            auto candidate_block = TRY(parse_simple_block(streamer, m_current_cluster->timestamp(), m_segment_timestamp_scale, m_track));
-            if (candidate_block.track_number() == m_track_id)
+            if (candidate_block.track_number() == m_track.track_number())
                block = move(candidate_block);
        } else {
            dbgln_if(MATROSKA_TRACE_DEBUG, "  Iterator is skipping unknown element with ID {:#010x}.", element_id);
--- a/Userland/Libraries/LibVideo/Containers/Matroska/Reader.h
+++ b/Userland/Libraries/LibVideo/Containers/Matroska/Reader.h
@ -80,12 +80,12 @@ public:
 private:
    friend class Reader;
-    SampleIterator(RefPtr<Core::MappedFile> file, ReadonlyBytes data, u64 track_id, size_t position, u64 timestamp_scale)
+    SampleIterator(RefPtr<Core::MappedFile> file, ReadonlyBytes data, TrackEntry track, u64 timestamp_scale, size_t position)
        : m_file(move(file))
        , m_data(data)
-        , m_track_id(track_id)
+        , m_track(move(track))
        , m_segment_timestamp_scale(timestamp_scale)
        , m_position(position)
        , m_timestamp_scale(timestamp_scale)
    {
    }
@ -93,12 +93,12 @@ private:
    RefPtr<Core::MappedFile> m_file;
    ReadonlyBytes m_data;
-    u64 m_track_id;
+    TrackEntry m_track;
    u64 m_segment_timestamp_scale { 0 };
    // Must always point to an element ID or the end of the stream.
    size_t m_position { 0 };
    u64 m_timestamp_scale { 0 };
    Time m_last_timestamp { Time::min() };
    Optional<Cluster> m_current_cluster;
--- a/Userland/Utilities/matroska.cpp
+++ b/Userland/Utilities/matroska.cpp
@ -36,6 +36,8 @@ ErrorOr<int> serenity_main(Main::Arguments)
        outln("\tTrack has TrackType {}", static_cast<u8>(track_entry.track_type()));
        outln("\tTrack has Language \"{}\"", track_entry.language().characters());
        outln("\tTrack has CodecID \"{}\"", track_entry.codec_id().characters());
        outln("\tTrack has TrackTimestampScale {}", track_entry.timestamp_scale());
        outln("\tTrack has CodecDelay {}", track_entry.codec_delay());
        if (track_entry.track_type() == Video::Matroska::TrackEntry::TrackType::Video) {
            auto const video_track = track_entry.video_track().value();