From 625aac23670f4c33d91b2690bd3147c3618732b9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?kleines=20Filmr=C3=B6llchen?= Date: Wed, 5 Jul 2023 00:10:35 +0200 Subject: [PATCH] LibAudio: Add a FLAC encoder This encoder can handle all integer formats and sample rates, though only two channels well. It uses fixed LPC and performs a close-to-optimal parameter search on the LPC order and residual Rice parameter, leading to decent compression already. --- AK/Debug.h.in | 4 + Meta/CMake/all_the_debug_macros.cmake | 1 + Userland/Libraries/LibAudio/CMakeLists.txt | 1 + Userland/Libraries/LibAudio/FlacLoader.cpp | 1 + Userland/Libraries/LibAudio/FlacTypes.h | 46 ++ Userland/Libraries/LibAudio/FlacWriter.cpp | 624 +++++++++++++++++++++ Userland/Libraries/LibAudio/FlacWriter.h | 118 ++++ 7 files changed, 795 insertions(+) create mode 100644 Userland/Libraries/LibAudio/FlacWriter.cpp create mode 100644 Userland/Libraries/LibAudio/FlacWriter.h diff --git a/AK/Debug.h.in b/AK/Debug.h.in index 31972ed3f3..d154f601d0 100644 --- a/AK/Debug.h.in +++ b/AK/Debug.h.in @@ -142,6 +142,10 @@ # cmakedefine01 FILE_WATCHER_DEBUG #endif +#ifndef FLAC_ENCODER_DEBUG +# cmakedefine01 FLAC_ENCODER_DEBUG +#endif + #ifndef GEMINI_DEBUG # cmakedefine01 GEMINI_DEBUG #endif diff --git a/Meta/CMake/all_the_debug_macros.cmake b/Meta/CMake/all_the_debug_macros.cmake index 7ad6d30466..fbf306bbfa 100644 --- a/Meta/CMake/all_the_debug_macros.cmake +++ b/Meta/CMake/all_the_debug_macros.cmake @@ -50,6 +50,7 @@ set(FAT_DEBUG ON) set(FILE_CONTENT_DEBUG ON) set(FILE_WATCHER_DEBUG ON) set(FILL_PATH_DEBUG ON) +set(FLAC_ENCODER_DEBUG ON) set(FORK_DEBUG ON) set(FUTEX_DEBUG ON) set(FUTEXQUEUE_DEBUG ON) diff --git a/Userland/Libraries/LibAudio/CMakeLists.txt b/Userland/Libraries/LibAudio/CMakeLists.txt index 16ab17e7ea..40e8fdb4e5 100644 --- a/Userland/Libraries/LibAudio/CMakeLists.txt +++ b/Userland/Libraries/LibAudio/CMakeLists.txt @@ -5,6 +5,7 @@ set(SOURCES RIFFTypes.cpp WavLoader.cpp FlacLoader.cpp + FlacWriter.cpp WavWriter.cpp Metadata.cpp MP3Loader.cpp diff --git a/Userland/Libraries/LibAudio/FlacLoader.cpp b/Userland/Libraries/LibAudio/FlacLoader.cpp index 613ef6683f..afa8b05675 100644 --- a/Userland/Libraries/LibAudio/FlacLoader.cpp +++ b/Userland/Libraries/LibAudio/FlacLoader.cpp @@ -852,6 +852,7 @@ ErrorOr, LoaderError> FlacLoaderPlugin::decode_fixed_lpc(FlacSubfram // http://mi.eng.cam.ac.uk/reports/svr-ftp/auto-pdf/robinson_tr156.pdf page 4 // The coefficients for order 4 are undocumented in the original FLAC specification(s), but can now be found in // https://datatracker.ietf.org/doc/html/draft-ietf-cellar-flac-03#section-10.2.5 + // FIXME: Share this code with predict_fixed_lpc(). switch (subframe.order) { case 0: // s_0(t) = 0 diff --git a/Userland/Libraries/LibAudio/FlacTypes.h b/Userland/Libraries/LibAudio/FlacTypes.h index e2333a7310..48ab8ba1ee 100644 --- a/Userland/Libraries/LibAudio/FlacTypes.h +++ b/Userland/Libraries/LibAudio/FlacTypes.h @@ -25,6 +25,8 @@ namespace Audio { #define FLAC_SAMPLERATE_AT_END_OF_HEADER_16 0xfffffffe #define FLAC_SAMPLERATE_AT_END_OF_HEADER_16X10 0xfffffffd +constexpr StringView flac_magic = "fLaC"sv; + // 11.22.11. FRAME CRC // The polynomial used here is known as CRC-8-CCITT. static constexpr u8 flac_polynomial = 0x07; @@ -84,6 +86,8 @@ struct FlacRawMetadataBlock { FlacMetadataBlockType type; u32 length; // 24 bits ByteBuffer data; + + ErrorOr write_to_stream(Stream&) const; }; enum class BlockingStrategy : u8 { @@ -91,6 +95,29 @@ enum class BlockingStrategy : u8 { Variable = 1, }; +// Block sample count can be stored in one of 5 ways. +enum class BlockSizeCategory : u8 { + Reserved = 0b0000, + S192 = 0b0001, + // The formula for these four is 144 * (2^x), and it appears to be an MP3 compatibility feature. + S576 = 0b0010, + S1152 = 0b0011, + S2304 = 0b0100, + S4608 = 0b0101, + // Actual size is stored later on. + Uncommon8Bits = 0b0110, + Uncommon16Bits = 0b1111, + // Formula 2^x. + S256 = 0b1000, + S512 = 0b1001, + S1024 = 0b1010, + S2048 = 0b1011, + S4096 = 0b1100, + S8192 = 0b1101, + S16384 = 0b1110, + S32768 = 0b1111, +}; + // 11.22. FRAME_HEADER struct FlacFrameHeader { u32 sample_rate; @@ -102,6 +129,8 @@ struct FlacFrameHeader { FlacFrameChannelType channels; u8 bit_depth; u8 checksum; + + ErrorOr write_to_stream(Stream&) const; }; // 11.25. SUBFRAME_HEADER @@ -113,4 +142,21 @@ struct FlacSubframeHeader { u8 bits_per_sample; }; +enum class FlacFixedLPC : size_t { + Zero = 0, + One = 1, + Two = 2, + Three = 3, + Four = 4, +}; + +struct FlacLPCEncodedSubframe { + Vector warm_up_samples; + Variant, FlacFixedLPC> coefficients; + Vector residuals; + size_t residual_cost_bits; + // If we’re only using one Rice partition, this is the optimal order to use. + u8 single_partition_optimal_order; +}; + } diff --git a/Userland/Libraries/LibAudio/FlacWriter.cpp b/Userland/Libraries/LibAudio/FlacWriter.cpp new file mode 100644 index 0000000000..59e6959aeb --- /dev/null +++ b/Userland/Libraries/LibAudio/FlacWriter.cpp @@ -0,0 +1,624 @@ +/* + * Copyright (c) 2023, kleines Filmröllchen + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include "FlacWriter.h" +#include +#include +#include +#include +#include + +namespace Audio { + +ErrorOr> FlacWriter::create(NonnullOwnPtr stream, u32 sample_rate, u8 num_channels, u16 bits_per_sample) +{ + auto writer = TRY(AK::adopt_nonnull_own_or_enomem(new (nothrow) FlacWriter(move(stream)))); + TRY(writer->set_bits_per_sample(bits_per_sample)); + TRY(writer->set_sample_rate(sample_rate)); + TRY(writer->set_num_channels(num_channels)); + return writer; +} + +FlacWriter::FlacWriter(NonnullOwnPtr stream) + : m_stream(move(stream)) +{ +} + +FlacWriter::~FlacWriter() +{ + if (m_state != WriteState::FullyFinalized) + (void)finalize(); +} + +ErrorOr FlacWriter::finalize() +{ + if (m_state == WriteState::FullyFinalized) + return Error::from_string_view("File is already finalized"sv); + + // TODO: Write missing sample data instead of discarding it. + + if (m_state == WriteState::HeaderUnwritten) + TRY(finalize_header_format()); + + { + // 1 byte metadata block header + 3 bytes size + 2*2 bytes min/max block size + TRY(m_stream->seek(m_streaminfo_start_index + 8, AK::SeekMode::SetPosition)); + BigEndianOutputBitStream bit_stream { MaybeOwned { *m_stream } }; + TRY(bit_stream.write_bits(m_min_frame_size, 24)); + TRY(bit_stream.write_bits(m_max_frame_size, 24)); + TRY(bit_stream.write_bits(m_sample_rate, 20)); + TRY(bit_stream.write_bits(m_num_channels - 1u, 3)); + TRY(bit_stream.write_bits(m_bits_per_sample - 1u, 5)); + TRY(bit_stream.write_bits(m_sample_count, 36)); + TRY(bit_stream.align_to_byte_boundary()); + } + + // TODO: Write the audio data MD5 to the header. + + m_stream->close(); + + m_state = WriteState::FullyFinalized; + return {}; +} + +ErrorOr FlacWriter::finalize_header_format() +{ + if (m_state != WriteState::HeaderUnwritten) + return Error::from_string_view("Header format is already finalized"sv); + TRY(write_header()); + m_state = WriteState::FormatFinalized; + return {}; +} + +ErrorOr FlacWriter::set_num_channels(u8 num_channels) +{ + if (m_state != WriteState::HeaderUnwritten) + return Error::from_string_view("Header format is already finalized"sv); + if (num_channels > 8) + return Error::from_string_view("FLAC doesn't support more than 8 channels"sv); + + m_num_channels = num_channels; + return {}; +} + +ErrorOr FlacWriter::set_sample_rate(u32 sample_rate) +{ + if (m_state != WriteState::HeaderUnwritten) + return Error::from_string_view("Header format is already finalized"sv); + + m_sample_rate = sample_rate; + return {}; +} + +ErrorOr FlacWriter::set_bits_per_sample(u16 bits_per_sample) +{ + if (m_state != WriteState::HeaderUnwritten) + return Error::from_string_view("Header format is already finalized"sv); + if (bits_per_sample < 8 || bits_per_sample > 32) + return Error::from_string_view("FLAC only supports bits per sample between 8 and 32"sv); + + m_bits_per_sample = bits_per_sample; + return {}; +} + +ErrorOr FlacWriter::write_header() +{ + TRY(m_stream->write_until_depleted(flac_magic.bytes())); + m_streaminfo_start_index = TRY(m_stream->tell()); + + ByteBuffer data; + // STREAMINFO is always exactly 34 bytes long. + TRY(data.try_resize(34)); + BigEndianOutputBitStream header_stream { TRY(try_make(data.bytes())) }; + + // Duplication on purpose: + // Minimum frame size. + TRY(header_stream.write_bits(block_size, 16)); + // Maximum frame size. + TRY(header_stream.write_bits(block_size, 16)); + // Leave the frame sizes as unknown for now. + TRY(header_stream.write_bits(0u, 24)); + TRY(header_stream.write_bits(0u, 24)); + + TRY(header_stream.write_bits(m_sample_rate, 20)); + TRY(header_stream.write_bits(m_num_channels - 1u, 3)); + TRY(header_stream.write_bits(m_bits_per_sample - 1u, 5)); + // Leave the sample count as unknown for now. + TRY(header_stream.write_bits(0u, 36)); + + // TODO: Calculate the MD5 signature of all of the audio data. + auto md5 = TRY(ByteBuffer::create_zeroed(128u / 8u)); + TRY(header_stream.write_until_depleted(md5)); + + FlacRawMetadataBlock streaminfo_block = { + .is_last_block = true, + .type = FlacMetadataBlockType::STREAMINFO, + .length = static_cast(data.size()), + .data = data, + }; + + TRY(m_stream->write_value(streaminfo_block)); + return {}; +} + +ErrorOr FlacRawMetadataBlock::write_to_stream(Stream& stream) const +{ + BigEndianOutputBitStream bit_stream { MaybeOwned { stream } }; + TRY(bit_stream.write_bits(static_cast(is_last_block), 1)); + TRY(bit_stream.write_bits(to_underlying(type), 7)); + TRY(bit_stream.write_bits(length, 24)); + + VERIFY(data.size() == length); + TRY(bit_stream.write_until_depleted(data)); + return {}; +} + +// If the given sample count is uncommon, this function will return one of the uncommon marker block sizes. +// The caller has to handle and add these later manually. +static BlockSizeCategory to_common_block_size(u16 sample_count) +{ + switch (sample_count) { + case 192: + return BlockSizeCategory::S192; + case 576: + return BlockSizeCategory::S576; + case 1152: + return BlockSizeCategory::S1152; + case 2304: + return BlockSizeCategory::S2304; + case 4608: + return BlockSizeCategory::S4608; + case 256: + return BlockSizeCategory::S256; + case 512: + return BlockSizeCategory::S512; + case 1024: + return BlockSizeCategory::S1024; + case 2048: + return BlockSizeCategory::S2048; + case 4096: + return BlockSizeCategory::S4096; + case 8192: + return BlockSizeCategory::S8192; + case 16384: + return BlockSizeCategory::S16384; + case 32768: + return BlockSizeCategory::S32768; + } + if (sample_count - 1 <= 0xff) + return BlockSizeCategory::Uncommon8Bits; + // Data type guarantees that 16-bit storage is possible. + return BlockSizeCategory::Uncommon16Bits; +} + +static ByteBuffer to_utf8(u64 value) +{ + ByteBuffer buffer; + if (value < 0x7f) { + buffer.append(static_cast(value)); + } else if (value < 0x7ff) { + buffer.append(static_cast(0b110'00000 | (value >> 6))); + buffer.append(static_cast(0b10'000000 | (value & 0b111111))); + } else if (value < 0xffff) { + buffer.append(static_cast(0b1110'0000 | (value >> 12))); + buffer.append(static_cast(0b10'000000 | ((value >> 6) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 0) & 0b111111))); + } else if (value < 0x1f'ffff) { + buffer.append(static_cast(0b11110'000 | (value >> 18))); + buffer.append(static_cast(0b10'000000 | ((value >> 12) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 6) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 0) & 0b111111))); + } else if (value < 0x3ff'ffff) { + buffer.append(static_cast(0b111110'00 | (value >> 24))); + buffer.append(static_cast(0b10'000000 | ((value >> 18) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 12) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 6) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 0) & 0b111111))); + } else if (value < 0x7fff'ffff) { + buffer.append(static_cast(0b1111110'0 | (value >> 30))); + buffer.append(static_cast(0b10'000000 | ((value >> 24) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 18) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 12) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 6) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 0) & 0b111111))); + } else if (value < 0xf'ffff'ffff) { + buffer.append(static_cast(0b11111110)); + buffer.append(static_cast(0b10'000000 | ((value >> 30) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 24) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 18) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 12) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 6) & 0b111111))); + buffer.append(static_cast(0b10'000000 | ((value >> 0) & 0b111111))); + } else { + // Anything larger is illegal even in expanded UTF-8, but FLAC only passes 32-bit values anyways. + VERIFY_NOT_REACHED(); + } + return buffer; +} + +ErrorOr FlacFrameHeader::write_to_stream(Stream& stream) const +{ + Crypto::Checksum::ChecksummingStream checksumming_stream { MaybeOwned { stream } }; + BigEndianOutputBitStream bit_stream { MaybeOwned { checksumming_stream } }; + TRY(bit_stream.write_bits(0b11111111111110u, 14)); + TRY(bit_stream.write_bits(0u, 1)); + TRY(bit_stream.write_bits(to_underlying(blocking_strategy), 1)); + + auto common_block_size = to_common_block_size(sample_count); + TRY(bit_stream.write_bits(to_underlying(common_block_size), 4)); + + // We always store sample rate in the file header. + TRY(bit_stream.write_bits(0u, 4)); + TRY(bit_stream.write_bits(to_underlying(channels), 4)); + // We always store bit depth in the file header. + TRY(bit_stream.write_bits(0u, 3)); + // Reserved zero bit. + TRY(bit_stream.write_bits(0u, 1)); + + auto coded_number = to_utf8(sample_or_frame_index); + TRY(bit_stream.write_until_depleted(coded_number)); + + if (common_block_size == BlockSizeCategory::Uncommon8Bits) + TRY(bit_stream.write_value(static_cast(sample_count - 1))); + if (common_block_size == BlockSizeCategory::Uncommon16Bits) + TRY(bit_stream.write_value(BigEndian(static_cast(sample_count - 1)))); + + // Ensure that the checksum is calculated correctly. + TRY(bit_stream.align_to_byte_boundary()); + auto checksum = checksumming_stream.digest(); + TRY(bit_stream.write_value(checksum)); + + return {}; +} + +ErrorOr FlacWriter::write_samples(ReadonlySpan samples) +{ + if (m_state == WriteState::FullyFinalized) + return Error::from_string_view("File is already finalized"sv); + + auto remaining_samples = samples; + while (remaining_samples.size() > 0) { + if (m_sample_buffer.size() == block_size) { + TRY(write_frame()); + m_sample_buffer.clear(); + } + auto amount_to_copy = min(remaining_samples.size(), m_sample_buffer.capacity() - m_sample_buffer.size()); + auto current_buffer_size = m_sample_buffer.size(); + TRY(m_sample_buffer.try_resize_and_keep_capacity(current_buffer_size + amount_to_copy)); + remaining_samples.copy_trimmed_to(m_sample_buffer.span().slice(current_buffer_size)); + remaining_samples = remaining_samples.slice(amount_to_copy); + } + + // Ensure that the buffer is flushed if possible. + if (m_sample_buffer.size() == block_size) { + TRY(write_frame()); + m_sample_buffer.clear(); + } + + return {}; +} + +ErrorOr FlacWriter::write_frame() +{ + auto frame_samples = move(m_sample_buffer); + // De-interleave and integer-quantize subframes. + float sample_rescale = static_cast(1 << (m_bits_per_sample - 1)); + auto subframe_samples = Vector>(); + TRY(subframe_samples.try_resize_and_keep_capacity(m_num_channels)); + for (auto const& sample : frame_samples) { + TRY(subframe_samples[0].try_append(static_cast(sample.left * sample_rescale))); + // FIXME: We don't have proper data for any channels past 2. + for (auto i = 1; i < m_num_channels; ++i) + TRY(subframe_samples[i].try_append(static_cast(sample.right * sample_rescale))); + } + + FlacFrameHeader header { + .sample_rate = m_sample_rate, + .sample_count = static_cast(frame_samples.size()), + .sample_or_frame_index = static_cast(m_current_frame), + .blocking_strategy = BlockingStrategy::Fixed, + // FIXME: We should brute-force channel coupling for stereo. + .channels = static_cast(m_num_channels - 1), + .bit_depth = static_cast(m_bits_per_sample), + // Calculated for us during header write. + .checksum = 0, + }; + + auto frame_stream = Crypto::Checksum::ChecksummingStream { MaybeOwned { *m_stream } }; + + auto frame_start_offset = TRY(m_stream->tell()); + TRY(frame_stream.write_value(header)); + + BigEndianOutputBitStream bit_stream { MaybeOwned { frame_stream } }; + for (auto const& subframe : subframe_samples) + TRY(write_subframe(subframe.span(), bit_stream)); + + TRY(bit_stream.align_to_byte_boundary()); + auto frame_crc = frame_stream.digest(); + dbgln_if(FLAC_ENCODER_DEBUG, "Frame {:4} CRC: {:04x}", m_current_frame, frame_crc); + TRY(frame_stream.write_value>(frame_crc)); + + auto frame_end_offset = TRY(m_stream->tell()); + auto frame_size = frame_end_offset - frame_start_offset; + m_max_frame_size = max(m_max_frame_size, frame_size); + m_min_frame_size = min(m_min_frame_size, frame_size); + + m_current_frame++; + m_sample_count += frame_samples.size(); + + return {}; +} + +ErrorOr FlacWriter::write_subframe(ReadonlySpan subframe, BigEndianOutputBitStream& bit_stream) +{ + // The current subframe encoding strategy is as follows: + // - Check if the subframe is constant; use constant encoding in this case. + // - Try all fixed predictors and record the resulting residuals. + // - Estimate their encoding cost by taking the sum of all absolute logarithmic residuals, + // which is an accurate estimate of the final encoded size of the residuals. + // - Accurately estimate the encoding cost of a verbatim subframe. + // - Select the encoding strategy with the lowest cost out of this selection. + + auto constant_value = subframe[0]; + auto is_constant = true; + for (auto const sample : subframe) { + if (sample != constant_value) { + is_constant = false; + break; + } + } + + if (is_constant) { + dbgln_if(FLAC_ENCODER_DEBUG, "Encoding constant frame with value {}", constant_value); + TRY(bit_stream.write_bits(1u, 0)); + TRY(bit_stream.write_bits(to_underlying(FlacSubframeType::Constant), 6)); + TRY(bit_stream.write_bits(1u, 0)); + TRY(bit_stream.write_bits(bit_cast(constant_value), m_bits_per_sample)); + return {}; + } + + auto verbatim_cost_bits = subframe.size() * m_bits_per_sample; + + Optional best_lpc_subframe; + auto current_min_cost = verbatim_cost_bits; + for (auto order : { FlacFixedLPC::Zero, FlacFixedLPC::One, FlacFixedLPC::Two, FlacFixedLPC::Three, FlacFixedLPC::Four }) { + // Too many warm-up samples would be required; the lower-level encoding procedures assume that this was checked. + if (to_underlying(order) > subframe.size()) + continue; + + auto encode_result = TRY(encode_fixed_lpc(order, subframe, current_min_cost)); + if (encode_result.has_value() && encode_result.value().residual_cost_bits < current_min_cost) { + current_min_cost = encode_result.value().residual_cost_bits; + best_lpc_subframe = encode_result.release_value(); + } + } + + // No LPC encoding was better than verbatim. + if (!best_lpc_subframe.has_value()) { + dbgln_if(FLAC_ENCODER_DEBUG, "Best subframe type was Verbatim; encoding {} samples at {} bps = {} bits", subframe.size(), m_bits_per_sample, verbatim_cost_bits); + TRY(write_verbatim_subframe(subframe, bit_stream)); + } else { + dbgln_if(FLAC_ENCODER_DEBUG, "Best subframe type was Fixed LPC order {} (estimated cost {} bits); encoding {} samples", to_underlying(best_lpc_subframe->coefficients.get()), best_lpc_subframe->residual_cost_bits, subframe.size()); + TRY(write_lpc_subframe(best_lpc_subframe.release_value(), bit_stream)); + } + + return {}; +} + +ErrorOr> FlacWriter::encode_fixed_lpc(FlacFixedLPC order, ReadonlySpan subframe, size_t current_min_cost) +{ + FlacLPCEncodedSubframe lpc { + .warm_up_samples = Vector { subframe.trim(to_underlying(order)) }, + .coefficients = order, + .residuals {}, + // Warm-up sample cost. + .residual_cost_bits = to_underlying(order) * m_bits_per_sample, + .single_partition_optimal_order {}, + }; + TRY(lpc.residuals.try_ensure_capacity(subframe.size() - to_underlying(order))); + + Vector predicted; + TRY(predicted.try_resize_and_keep_capacity(subframe.size())); + lpc.warm_up_samples.span().copy_trimmed_to(predicted); + + // NOTE: Although we can't interrupt the prediction if the corresponding residuals would become too bad, + // we don't need to branch on the order in every loop during prediction, meaning this shouldn't cost us much. + predict_fixed_lpc(order, subframe, predicted); + + // There isn’t really a way of computing an LPC’s cost without performing most of the calculations, including a Rice parameter search. + // This is nevertheless optimized in multiple ways, so that we always bail out once we are sure no improvements can be made. + auto extra_residual_cost = NumericLimits::max(); + // Keep track of when we want to estimate costs again. We don't do this for every new residual since it's an expensive procedure. + // The likelihood for misprediction is pretty high for large orders; start with a later index for them. + auto next_cost_estimation_index = min(subframe.size() - 1, first_residual_estimation * (to_underlying(order) + 1)); + for (auto i = to_underlying(order); i < subframe.size(); ++i) { + auto residual = subframe[i] - predicted[i]; + if (!AK::is_within_range(residual)) { + dbgln_if(FLAC_ENCODER_DEBUG, "Bailing from Fixed LPC order {} due to residual overflow ({} is outside the 32-bit range)", to_underlying(order), residual); + return Optional {}; + } + lpc.residuals.append(residual); + + if (i >= next_cost_estimation_index) { + // Find best exponential Golomb order. + // Storing this in the LPC data allows us to automatically reuse the computation during LPC encoding. + // FIXME: Use more than one partition to improve compression. + // FIXME: Investigate whether this can be estimated “good enough” to improve performance at the cost of compression strength. + // Especially at larger sample counts, it is unlikely that we will find a different optimal order. + // Therefore, use a zig-zag search around the previous optimal order. + extra_residual_cost = NumericLimits::max(); + auto start_order = lpc.single_partition_optimal_order; + size_t useless_parameters = 0; + size_t steps = 0; + constexpr auto max_rice_parameter = AK::exp2(4) - 1; + for (auto offset = 0; start_order + offset < max_rice_parameter || start_order - offset >= 0; ++offset) { + for (auto factor : { -1, 1 }) { + auto k = start_order + factor * offset; + if (k >= max_rice_parameter || k < 0) + continue; + + auto order_cost = count_exp_golomb_bits_in(k, lpc.residuals); + if (order_cost < extra_residual_cost) { + extra_residual_cost = order_cost; + lpc.single_partition_optimal_order = k; + } else { + useless_parameters++; + } + steps++; + // Don’t do 0 twice. + if (offset == 0) + break; + } + // If we found enough useless parameters, we probably won't find useful ones anymore. + // The only exception is the first ever parameter search, where we search everything. + if (useless_parameters >= useless_parameter_threshold && start_order != 0) + break; + } + + // Min cost exceeded; bail out. + if (lpc.residual_cost_bits + extra_residual_cost > current_min_cost) { + dbgln_if(FLAC_ENCODER_DEBUG, " Bailing from Fixed LPC order {} at sample index {} and cost {} (best {})", to_underlying(order), i, lpc.residual_cost_bits + extra_residual_cost, current_min_cost); + return Optional {}; + } + + // Figure out when to next estimate costs. + auto estimated_bits_per_residual = static_cast(extra_residual_cost) / static_cast(i); + auto estimated_residuals_for_min_cost = static_cast(current_min_cost) / estimated_bits_per_residual; + auto unchecked_next_cost_estimation_index = AK::round_to(estimated_residuals_for_min_cost * (1 - residual_cost_margin)); + // Check either at the estimated residual, or the next residual if that is in the past, or the last residual. + next_cost_estimation_index = min(subframe.size() - 1, max(unchecked_next_cost_estimation_index, i + min_residual_estimation_step)); + dbgln_if(FLAC_ENCODER_DEBUG, " {} {:4} Estimate cost/residual {:.1f} (param {:2} after {:2} steps), will hit at {:6.1f}, jumping to {:4} (sanitized to {:4})", to_underlying(order), i, estimated_bits_per_residual, lpc.single_partition_optimal_order, steps, estimated_residuals_for_min_cost, unchecked_next_cost_estimation_index, next_cost_estimation_index); + } + } + + lpc.residual_cost_bits += extra_residual_cost; + return lpc; +} + +void predict_fixed_lpc(FlacFixedLPC order, ReadonlySpan samples, Span predicted_output) +{ + switch (order) { + case FlacFixedLPC::Zero: + // s_0(t) = 0 + for (auto i = to_underlying(order); i < predicted_output.size(); ++i) + predicted_output[i] += 0; + break; + case FlacFixedLPC::One: + // s_1(t) = s(t-1) + for (auto i = to_underlying(order); i < predicted_output.size(); ++i) + predicted_output[i] += samples[i - 1]; + break; + case FlacFixedLPC::Two: + // s_2(t) = 2s(t-1) - s(t-2) + for (auto i = to_underlying(order); i < predicted_output.size(); ++i) + predicted_output[i] += 2 * samples[i - 1] - samples[i - 2]; + break; + case FlacFixedLPC::Three: + // s_3(t) = 3s(t-1) - 3s(t-2) + s(t-3) + for (auto i = to_underlying(order); i < predicted_output.size(); ++i) + predicted_output[i] += 3 * samples[i - 1] - 3 * samples[i - 2] + samples[i - 3]; + break; + case FlacFixedLPC::Four: + // s_4(t) = 4s(t-1) - 6s(t-2) + 4s(t-3) - s(t-4) + for (auto i = to_underlying(order); i < predicted_output.size(); ++i) + predicted_output[i] += 4 * samples[i - 1] - 6 * samples[i - 2] + 4 * samples[i - 3] - samples[i - 4]; + break; + default: + VERIFY_NOT_REACHED(); + } +} + +// https://www.ietf.org/archive/id/draft-ietf-cellar-flac-08.html#name-verbatim-subframe +ErrorOr FlacWriter::write_verbatim_subframe(ReadonlySpan subframe, BigEndianOutputBitStream& bit_stream) +{ + TRY(bit_stream.write_bits(0u, 1)); + TRY(bit_stream.write_bits(to_underlying(FlacSubframeType::Verbatim), 6)); + TRY(bit_stream.write_bits(0u, 1)); + for (auto const& sample : subframe) + TRY(bit_stream.write_bits(bit_cast(sample), m_bits_per_sample)); + + return {}; +} + +// https://www.ietf.org/archive/id/draft-ietf-cellar-flac-08.html#name-fixed-predictor-subframe +ErrorOr FlacWriter::write_lpc_subframe(FlacLPCEncodedSubframe lpc_subframe, BigEndianOutputBitStream& bit_stream) +{ + // Reserved. + TRY(bit_stream.write_bits(0u, 1)); + // 9.2.1 Subframe header (https://www.ietf.org/archive/id/draft-ietf-cellar-flac-08.html#name-subframe-header) + u8 encoded_type; + if (lpc_subframe.coefficients.has()) + encoded_type = to_underlying(lpc_subframe.coefficients.get()) + to_underlying(FlacSubframeType::Fixed); + else + encoded_type = lpc_subframe.coefficients.get>().size() - 1 + to_underlying(FlacSubframeType::LPC); + + TRY(bit_stream.write_bits(encoded_type, 6)); + // No wasted bits per sample (unnecessary for the vast majority of data). + TRY(bit_stream.write_bits(0u, 1)); + + for (auto const& warm_up_sample : lpc_subframe.warm_up_samples) + TRY(bit_stream.write_bits(bit_cast(warm_up_sample), m_bits_per_sample)); + + // 4-bit Rice parameters. + TRY(bit_stream.write_bits(0b00u, 2)); + // Only one partition (2^0 = 1). + TRY(bit_stream.write_bits(0b0000u, 4)); + TRY(write_rice_partition(lpc_subframe.single_partition_optimal_order, lpc_subframe.residuals, bit_stream)); + + return {}; +} + +ErrorOr FlacWriter::write_rice_partition(u8 k, ReadonlySpan residuals, BigEndianOutputBitStream& bit_stream) +{ + TRY(bit_stream.write_bits(k, 4)); + + for (auto const& residual : residuals) + TRY(encode_unsigned_exp_golomb(k, static_cast(residual), bit_stream)); + + return {}; +} + +u32 signed_to_rice(i32 x) +{ + // Implements (x < 0 ? -1 : 0) + 2 * abs(x) in about half as many instructions. + // The reference encoder’s implementation is known to be the fastest on -O2/3 clang and gcc: + // x << 1 = multiply by 2. + // For negative numbers, x >> 31 will create an all-ones XOR mask, meaning that the number will be inverted. + // In two's complement this is -value - 1, exactly what we need. + // For positive numbers, x >> 31 == 0. + return static_cast((x << 1) ^ (x >> 31)); +} + +// Adopted from https://github.com/xiph/flac/blob/28e4f0528c76b296c561e922ba67d43751990599/src/libFLAC/bitwriter.c#L727 +ErrorOr encode_unsigned_exp_golomb(u8 k, i32 value, BigEndianOutputBitStream& bit_stream) +{ + auto zigzag_encoded = signed_to_rice(value); + auto msbs = zigzag_encoded >> k; + auto pattern = 1u << k; + pattern |= zigzag_encoded & ((1 << k) - 1); + + TRY(bit_stream.write_bits(0u, msbs)); + TRY(bit_stream.write_bits(pattern, k + 1)); + + return {}; +} + +// Adopted from count_rice_bits_in_partition(): +// https://github.com/xiph/flac/blob/28e4f0528c76b296c561e922ba67d43751990599/src/libFLAC/stream_encoder.c#L4299 +size_t count_exp_golomb_bits_in(u8 k, ReadonlySpan residuals) +{ + // Exponential Golomb order size (4). + // One unary stop bit and the entire exponential Golomb parameter for every residual. + size_t partition_bits = 4 + (1 + k) * residuals.size(); + + // Bit magic to compute the amount of leading unary bits. + for (auto const& residual : residuals) + partition_bits += (static_cast((residual << 1) ^ (residual >> 31)) >> k); + + return partition_bits; +} + +} diff --git a/Userland/Libraries/LibAudio/FlacWriter.h b/Userland/Libraries/LibAudio/FlacWriter.h new file mode 100644 index 0000000000..35f4a57fad --- /dev/null +++ b/Userland/Libraries/LibAudio/FlacWriter.h @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2023, kleines Filmröllchen + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace Audio { + +// Encodes the sign representation method used in Rice coding. +// Numbers alternate between positive and negative: 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, ... +ALWAYS_INLINE u32 signed_to_rice(i32 x); + +// Encode a single number encoded with exponential golomb encoding of the specified order (k). +ALWAYS_INLINE ErrorOr encode_unsigned_exp_golomb(u8 k, i32 value, BigEndianOutputBitStream& bit_stream); + +size_t count_exp_golomb_bits_in(u8 k, ReadonlySpan residuals); + +void predict_fixed_lpc(FlacFixedLPC order, ReadonlySpan samples, Span predicted_output); + +// A simple FLAC encoder that writes FLAC files compatible with the streamable subset. +// The encoder currently has the following simple output properties: +// FIXME: All frames have a fixed sample size, see below. +// FIXME: All frames are encoded with the best fixed LPC predictor. +// FIXME: All residuals are encoded in one Rice partition. +class FlacWriter : public Encoder { + AK_MAKE_NONCOPYABLE(FlacWriter); + AK_MAKE_NONMOVABLE(FlacWriter); + + /// Tunable static parameters. Please try to improve these; only some have already been well-tuned! + + // Constant block size. + static constexpr size_t block_size = 1024; + // Used as a percentage to check residual costs before the estimated "necessary" estimation point. + // We usually over-estimate residual costs, so this prevents us from overshooting the actual bail point. + static constexpr double residual_cost_margin = 0.07; + // At what sample index to first estimate residuals, so that the residual parameter can "stabilize" through more encoded values. + static constexpr size_t first_residual_estimation = 16; + // How many samples to advance at minimum before estimating residuals again. + static constexpr size_t min_residual_estimation_step = 20; + // After how many useless (i.e. worse than current optimal) Rice parameters to abort parameter search. + // Note that due to the zig-zag search, we start with searching the parameters that are most likely to be good. + static constexpr size_t useless_parameter_threshold = 2; + + enum class WriteState { + // Header has not been written at all, audio data cannot be written. + HeaderUnwritten, + // Header was written, i.e. sample format is finalized, + // but audio data has not been finalized and therefore some header information is still missing. + FormatFinalized, + // File is fully finalized, no more sample data can be written. + FullyFinalized, + }; + +public: + static ErrorOr> create(NonnullOwnPtr stream, u32 sample_rate = 44100, u8 num_channels = 2, u16 bits_per_sample = 16); + virtual ~FlacWriter(); + + virtual ErrorOr write_samples(ReadonlySpan samples) override; + + virtual ErrorOr finalize() override; + + u32 sample_rate() const { return m_sample_rate; } + u8 num_channels() const { return m_num_channels; } + PcmSampleFormat sample_format() const { return integer_sample_format_for(m_bits_per_sample).value(); } + Stream const& output_stream() const { return *m_stream; } + + ErrorOr set_num_channels(u8 num_channels); + ErrorOr set_sample_rate(u32 sample_rate); + ErrorOr set_bits_per_sample(u16 bits_per_sample); + ErrorOr finalize_header_format(); + +private: + FlacWriter(NonnullOwnPtr); + ErrorOr write_header(); + + ErrorOr write_frame(); + ErrorOr write_subframe(ReadonlySpan subframe, BigEndianOutputBitStream& bit_stream); + ErrorOr write_lpc_subframe(FlacLPCEncodedSubframe lpc_subframe, BigEndianOutputBitStream& bit_stream); + ErrorOr write_verbatim_subframe(ReadonlySpan subframe, BigEndianOutputBitStream& bit_stream); + // Assumes 4-bit k for now. + ErrorOr write_rice_partition(u8 k, ReadonlySpan residuals, BigEndianOutputBitStream& bit_stream); + + // Aborts encoding once the costs exceed the previous minimum, thereby speeding up the encoder's parameter search. + // In this case, an empty Optional is returned. + ErrorOr> encode_fixed_lpc(FlacFixedLPC order, ReadonlySpan subframe, size_t current_min_cost); + + NonnullOwnPtr m_stream; + WriteState m_state { WriteState::HeaderUnwritten }; + + Vector m_sample_buffer {}; + size_t m_current_frame { 0 }; + + u32 m_sample_rate; + u8 m_num_channels; + u16 m_bits_per_sample; + + // Data updated during encoding; written to the header at the end. + u32 m_max_frame_size { 0 }; + u32 m_min_frame_size { NumericLimits::max() }; + size_t m_sample_count { 0 }; + // Remember where the STREAMINFO block was written in the stream. + size_t m_streaminfo_start_index; +}; + +}