1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-14 08:04:57 +00:00
serenity/Userland/Utilities/matroska.cpp
Zaggy1024 f6830eaf73 LibVideo: Implement Matroska Cues for faster keyframe lookup
This implements the fastest seeking mode available for tracks with cues
using an array of cue points for each track. It approximates the index
based on the seeking timestamp and then finds the earliest cue point
before the timestamp. The approximation assumes that cues will be on
a regular interval, which I don't believe is always the case, but it
should at least be faster than iterating the whole set of cue points
each time.

Cues are stored per track, but most videos will only have cue points
for the video track(s) that are present. For now, this assumes that it
should only seek based on the cue points for the selected track. To
seek audio in a video file, we should copy the seeked iterator over to
the audio track's iterator after seeking is complete. The iterator will
then skip to the next audio block.
2022-11-25 23:28:39 +01:00

115 lines
5.7 KiB
C++

/*
* Copyright (c) 2021, Hunter Salyer <thefalsehonesty@gmail.com>
* Copyright (c) 2022, Gregory Bertilson <zaggy1024@gmail.com>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Function.h>
#include <LibCore/ArgsParser.h>
#include <LibMain/Main.h>
#include <LibVideo/Containers/Matroska/Reader.h>
#define TRY_PARSE(expression) \
({ \
auto _temporary_result = ((expression)); \
if (_temporary_result.is_error()) [[unlikely]] { \
outln("Encountered a parsing error: {}", _temporary_result.error().string_literal()); \
return Error::from_string_literal("Failed to parse :("); \
} \
_temporary_result.release_value(); \
})
ErrorOr<int> serenity_main(Main::Arguments arguments)
{
StringView filename;
bool blocks = false;
bool cues = false;
u64 track_number = 0;
Core::ArgsParser args_parser;
args_parser.add_option(blocks, "Print blocks for each track.", "blocks", 'b');
args_parser.add_option(cues, "Print cue points for each track.", "cues", 'c');
args_parser.add_option<u64>(track_number, "Specify a track number to print info for, omit to print all of them.", "track", 't', "tracknumber");
args_parser.add_positional_argument(filename, "The video file to display.", "filename", Core::ArgsParser::Required::Yes);
args_parser.parse(arguments);
auto reader = TRY_PARSE(Video::Matroska::Reader::from_file(filename));
outln("DocType is {}", reader.header().doc_type.characters());
outln("DocTypeVersion is {}", reader.header().doc_type_version);
auto segment_information = TRY_PARSE(reader.segment_information());
outln("Timestamp scale is {}", segment_information.timestamp_scale());
outln("Muxing app is \"{}\"", segment_information.muxing_app().as_string().to_string().characters());
outln("Writing app is \"{}\"", segment_information.writing_app().as_string().to_string().characters());
outln("Document has {} tracks", TRY_PARSE(reader.track_count()));
TRY_PARSE(reader.for_each_track([&](Video::Matroska::TrackEntry const& track_entry) -> Video::DecoderErrorOr<IterationDecision> {
if (track_number != 0 && track_entry.track_number() != track_number)
return IterationDecision::Continue;
outln("\tTrack #{} with TrackID {}", track_entry.track_number(), track_entry.track_uid());
outln("\tTrack has TrackType {}", static_cast<u8>(track_entry.track_type()));
outln("\tTrack has Language \"{}\"", track_entry.language().characters());
outln("\tTrack has CodecID \"{}\"", track_entry.codec_id().characters());
outln("\tTrack has TrackTimestampScale {}", track_entry.timestamp_scale());
outln("\tTrack has CodecDelay {}", track_entry.codec_delay());
if (track_entry.track_type() == Video::Matroska::TrackEntry::TrackType::Video) {
auto const video_track = track_entry.video_track().value();
outln("\t\tVideo is {} pixels wide by {} pixels tall", video_track.pixel_width, video_track.pixel_height);
} else if (track_entry.track_type() == Video::Matroska::TrackEntry::TrackType::Audio) {
auto const audio_track = track_entry.audio_track().value();
outln("\t\tAudio has {} channels with a bit depth of {}", audio_track.channels, audio_track.bit_depth);
}
if (cues) {
auto const& cue_points = TRY(reader.cue_points_for_track(track_entry.track_number()));
if (cue_points.has_value()) {
outln("\tCues points:");
for (auto const& cue_point : cue_points.value()) {
outln("\t\tCue point at {}ms:", cue_point.timestamp().to_milliseconds());
auto const& track_position = cue_point.position_for_track(track_entry.track_number());
if (!track_position.has_value()) {
outln("\t\t\tCue point has no positions for this track, this should not happen");
continue;
}
outln("\t\t\tCluster position {}", track_position->cluster_position());
outln("\t\t\tBlock offset {}", track_position->block_offset());
}
} else {
outln("\tNo cue points exist for this track");
}
}
if (blocks) {
outln("\tBlocks:");
auto iterator = TRY(reader.create_sample_iterator(track_entry.track_number()));
while (true) {
auto block_result = iterator.next_block();
if (block_result.is_error()) {
if (block_result.error().category() == Video::DecoderErrorCategory::EndOfStream)
break;
return block_result.release_error();
}
auto block = block_result.release_value();
outln("\t\tBlock at timestamp {}ms:", block.timestamp().to_milliseconds());
if (block.only_keyframes())
outln("\t\t\tThis block contains only keyframes");
outln("\t\t\tContains {} frames", block.frame_count());
outln("\t\t\tLacing is {}", static_cast<u8>(block.lacing()));
}
}
if (track_number != 0)
return IterationDecision::Break;
return IterationDecision::Continue;
}));
return 0;
}