From 625aac23670f4c33d91b2690bd3147c3618732b9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?kleines=20Filmr=C3=B6llchen?= <filmroellchen@serenityos.org>
Date: Wed, 5 Jul 2023 00:10:35 +0200
Subject: [PATCH] LibAudio: Add a FLAC encoder

This encoder can handle all integer formats and sample rates, though
only two channels well. It uses fixed LPC and performs a
close-to-optimal parameter search on the LPC order and residual Rice
parameter, leading to decent compression already.
---
 AK/Debug.h.in                              |   4 +
 Meta/CMake/all_the_debug_macros.cmake      |   1 +
 Userland/Libraries/LibAudio/CMakeLists.txt |   1 +
 Userland/Libraries/LibAudio/FlacLoader.cpp |   1 +
 Userland/Libraries/LibAudio/FlacTypes.h    |  46 ++
 Userland/Libraries/LibAudio/FlacWriter.cpp | 624 +++++++++++++++++++++
 Userland/Libraries/LibAudio/FlacWriter.h   | 118 ++++
 7 files changed, 795 insertions(+)
 create mode 100644 Userland/Libraries/LibAudio/FlacWriter.cpp
 create mode 100644 Userland/Libraries/LibAudio/FlacWriter.h
diff --git a/AK/Debug.h.in b/AK/Debug.h.in
index 31972ed3f3..d154f601d0 100644
--- a/AK/Debug.h.in
+++ b/AK/Debug.h.in
@@ -142,6 +142,10 @@
 #    cmakedefine01 FILE_WATCHER_DEBUG
 #endif
 
+#ifndef FLAC_ENCODER_DEBUG
+#    cmakedefine01 FLAC_ENCODER_DEBUG
+#endif
+
 #ifndef GEMINI_DEBUG
 #    cmakedefine01 GEMINI_DEBUG
 #endif
diff --git a/Meta/CMake/all_the_debug_macros.cmake b/Meta/CMake/all_the_debug_macros.cmake
index 7ad6d30466..fbf306bbfa 100644
--- a/Meta/CMake/all_the_debug_macros.cmake
+++ b/Meta/CMake/all_the_debug_macros.cmake
@@ -50,6 +50,7 @@ set(FAT_DEBUG ON)
 set(FILE_CONTENT_DEBUG ON)
 set(FILE_WATCHER_DEBUG ON)
 set(FILL_PATH_DEBUG ON)
+set(FLAC_ENCODER_DEBUG ON)
 set(FORK_DEBUG ON)
 set(FUTEX_DEBUG ON)
 set(FUTEXQUEUE_DEBUG ON)
diff --git a/Userland/Libraries/LibAudio/CMakeLists.txt b/Userland/Libraries/LibAudio/CMakeLists.txt
index 16ab17e7ea..40e8fdb4e5 100644
--- a/Userland/Libraries/LibAudio/CMakeLists.txt
+++ b/Userland/Libraries/LibAudio/CMakeLists.txt
@@ -5,6 +5,7 @@ set(SOURCES
     RIFFTypes.cpp
     WavLoader.cpp
     FlacLoader.cpp
+    FlacWriter.cpp
     WavWriter.cpp
     Metadata.cpp
     MP3Loader.cpp
diff --git a/Userland/Libraries/LibAudio/FlacLoader.cpp b/Userland/Libraries/LibAudio/FlacLoader.cpp
index 613ef6683f..afa8b05675 100644
--- a/Userland/Libraries/LibAudio/FlacLoader.cpp
+++ b/Userland/Libraries/LibAudio/FlacLoader.cpp
@@ -852,6 +852,7 @@ ErrorOr<Vector<i64>, LoaderError> FlacLoaderPlugin::decode_fixed_lpc(FlacSubfram
     // http://mi.eng.cam.ac.uk/reports/svr-ftp/auto-pdf/robinson_tr156.pdf page 4
     // The coefficients for order 4 are undocumented in the original FLAC specification(s), but can now be found in
     // https://datatracker.ietf.org/doc/html/draft-ietf-cellar-flac-03#section-10.2.5
+    // FIXME: Share this code with predict_fixed_lpc().
     switch (subframe.order) {
     case 0:
         // s_0(t) = 0
diff --git a/Userland/Libraries/LibAudio/FlacTypes.h b/Userland/Libraries/LibAudio/FlacTypes.h
index e2333a7310..48ab8ba1ee 100644
--- a/Userland/Libraries/LibAudio/FlacTypes.h
+++ b/Userland/Libraries/LibAudio/FlacTypes.h
@@ -25,6 +25,8 @@ namespace Audio {
 #define FLAC_SAMPLERATE_AT_END_OF_HEADER_16 0xfffffffe
 #define FLAC_SAMPLERATE_AT_END_OF_HEADER_16X10 0xfffffffd
 
+constexpr StringView flac_magic = "fLaC"sv;
+
 // 11.22.11. FRAME CRC
 // The polynomial used here is known as CRC-8-CCITT.
 static constexpr u8 flac_polynomial = 0x07;
@@ -84,6 +86,8 @@ struct FlacRawMetadataBlock {
     FlacMetadataBlockType type;
     u32 length; // 24 bits
     ByteBuffer data;
+
+    ErrorOr<void> write_to_stream(Stream&) const;
 };
 
 enum class BlockingStrategy : u8 {
@@ -91,6 +95,29 @@ enum class BlockingStrategy : u8 {
     Variable = 1,
 };
 
+// Block sample count can be stored in one of 5 ways.
+enum class BlockSizeCategory : u8 {
+    Reserved = 0b0000,
+    S192 = 0b0001,
+    // The formula for these four is 144 * (2^x), and it appears to be an MP3 compatibility feature.
+    S576 = 0b0010,
+    S1152 = 0b0011,
+    S2304 = 0b0100,
+    S4608 = 0b0101,
+    // Actual size is stored later on.
+    Uncommon8Bits = 0b0110,
+    Uncommon16Bits = 0b1111,
+    // Formula 2^x.
+    S256 = 0b1000,
+    S512 = 0b1001,
+    S1024 = 0b1010,
+    S2048 = 0b1011,
+    S4096 = 0b1100,
+    S8192 = 0b1101,
+    S16384 = 0b1110,
+    S32768 = 0b1111,
+};
+
 // 11.22. FRAME_HEADER
 struct FlacFrameHeader {
     u32 sample_rate;
@@ -102,6 +129,8 @@ struct FlacFrameHeader {
     FlacFrameChannelType channels;
     u8 bit_depth;
     u8 checksum;
+
+    ErrorOr<void> write_to_stream(Stream&) const;
 };
 
 // 11.25. SUBFRAME_HEADER
@@ -113,4 +142,21 @@ struct FlacSubframeHeader {
     u8 bits_per_sample;
 };
 
+enum class FlacFixedLPC : size_t {
+    Zero = 0,
+    One = 1,
+    Two = 2,
+    Three = 3,
+    Four = 4,
+};
+
+struct FlacLPCEncodedSubframe {
+    Vector<i64> warm_up_samples;
+    Variant<Vector<i64>, FlacFixedLPC> coefficients;
+    Vector<i64> residuals;
+    size_t residual_cost_bits;
+    // If we’re only using one Rice partition, this is the optimal order to use.
+    u8 single_partition_optimal_order;
+};
+
 }
diff --git a/Userland/Libraries/LibAudio/FlacWriter.cpp b/Userland/Libraries/LibAudio/FlacWriter.cpp
new file mode 100644
index 0000000000..59e6959aeb
--- /dev/null
+++ b/Userland/Libraries/LibAudio/FlacWriter.cpp
@@ -0,0 +1,624 @@
+/*
+ * Copyright (c) 2023, kleines Filmröllchen <filmroellchen@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#include "FlacWriter.h"
+#include <AK/BitStream.h>
+#include <AK/Endian.h>
+#include <AK/IntegralMath.h>
+#include <AK/MemoryStream.h>
+#include <LibCrypto/Checksum/ChecksummingStream.h>
+
+namespace Audio {
+
+ErrorOr<NonnullOwnPtr<FlacWriter>> FlacWriter::create(NonnullOwnPtr<SeekableStream> stream, u32 sample_rate, u8 num_channels, u16 bits_per_sample)
+{
+    auto writer = TRY(AK::adopt_nonnull_own_or_enomem(new (nothrow) FlacWriter(move(stream))));
+    TRY(writer->set_bits_per_sample(bits_per_sample));
+    TRY(writer->set_sample_rate(sample_rate));
+    TRY(writer->set_num_channels(num_channels));
+    return writer;
+}
+
+FlacWriter::FlacWriter(NonnullOwnPtr<SeekableStream> stream)
+    : m_stream(move(stream))
+{
+}
+
+FlacWriter::~FlacWriter()
+{
+    if (m_state != WriteState::FullyFinalized)
+        (void)finalize();
+}
+
+ErrorOr<void> FlacWriter::finalize()
+{
+    if (m_state == WriteState::FullyFinalized)
+        return Error::from_string_view("File is already finalized"sv);
+
+    // TODO: Write missing sample data instead of discarding it.
+
+    if (m_state == WriteState::HeaderUnwritten)
+        TRY(finalize_header_format());
+
+    {
+        // 1 byte metadata block header + 3 bytes size + 2*2 bytes min/max block size
+        TRY(m_stream->seek(m_streaminfo_start_index + 8, AK::SeekMode::SetPosition));
+        BigEndianOutputBitStream bit_stream { MaybeOwned<Stream> { *m_stream } };
+        TRY(bit_stream.write_bits(m_min_frame_size, 24));
+        TRY(bit_stream.write_bits(m_max_frame_size, 24));
+        TRY(bit_stream.write_bits(m_sample_rate, 20));
+        TRY(bit_stream.write_bits(m_num_channels - 1u, 3));
+        TRY(bit_stream.write_bits(m_bits_per_sample - 1u, 5));
+        TRY(bit_stream.write_bits(m_sample_count, 36));
+        TRY(bit_stream.align_to_byte_boundary());
+    }
+
+    // TODO: Write the audio data MD5 to the header.
+
+    m_stream->close();
+
+    m_state = WriteState::FullyFinalized;
+    return {};
+}
+
+ErrorOr<void> FlacWriter::finalize_header_format()
+{
+    if (m_state != WriteState::HeaderUnwritten)
+        return Error::from_string_view("Header format is already finalized"sv);
+    TRY(write_header());
+    m_state = WriteState::FormatFinalized;
+    return {};
+}
+
+ErrorOr<void> FlacWriter::set_num_channels(u8 num_channels)
+{
+    if (m_state != WriteState::HeaderUnwritten)
+        return Error::from_string_view("Header format is already finalized"sv);
+    if (num_channels > 8)
+        return Error::from_string_view("FLAC doesn't support more than 8 channels"sv);
+
+    m_num_channels = num_channels;
+    return {};
+}
+
+ErrorOr<void> FlacWriter::set_sample_rate(u32 sample_rate)
+{
+    if (m_state != WriteState::HeaderUnwritten)
+        return Error::from_string_view("Header format is already finalized"sv);
+
+    m_sample_rate = sample_rate;
+    return {};
+}
+
+ErrorOr<void> FlacWriter::set_bits_per_sample(u16 bits_per_sample)
+{
+    if (m_state != WriteState::HeaderUnwritten)
+        return Error::from_string_view("Header format is already finalized"sv);
+    if (bits_per_sample < 8 || bits_per_sample > 32)
+        return Error::from_string_view("FLAC only supports bits per sample between 8 and 32"sv);
+
+    m_bits_per_sample = bits_per_sample;
+    return {};
+}
+
+ErrorOr<void> FlacWriter::write_header()
+{
+    TRY(m_stream->write_until_depleted(flac_magic.bytes()));
+    m_streaminfo_start_index = TRY(m_stream->tell());
+
+    ByteBuffer data;
+    // STREAMINFO is always exactly 34 bytes long.
+    TRY(data.try_resize(34));
+    BigEndianOutputBitStream header_stream { TRY(try_make<FixedMemoryStream>(data.bytes())) };
+
+    // Duplication on purpose:
+    // Minimum frame size.
+    TRY(header_stream.write_bits(block_size, 16));
+    // Maximum frame size.
+    TRY(header_stream.write_bits(block_size, 16));
+    // Leave the frame sizes as unknown for now.
+    TRY(header_stream.write_bits(0u, 24));
+    TRY(header_stream.write_bits(0u, 24));
+
+    TRY(header_stream.write_bits(m_sample_rate, 20));
+    TRY(header_stream.write_bits(m_num_channels - 1u, 3));
+    TRY(header_stream.write_bits(m_bits_per_sample - 1u, 5));
+    // Leave the sample count as unknown for now.
+    TRY(header_stream.write_bits(0u, 36));
+
+    // TODO: Calculate the MD5 signature of all of the audio data.
+    auto md5 = TRY(ByteBuffer::create_zeroed(128u / 8u));
+    TRY(header_stream.write_until_depleted(md5));
+
+    FlacRawMetadataBlock streaminfo_block = {
+        .is_last_block = true,
+        .type = FlacMetadataBlockType::STREAMINFO,
+        .length = static_cast<u32>(data.size()),
+        .data = data,
+    };
+
+    TRY(m_stream->write_value(streaminfo_block));
+    return {};
+}
+
+ErrorOr<void> FlacRawMetadataBlock::write_to_stream(Stream& stream) const
+{
+    BigEndianOutputBitStream bit_stream { MaybeOwned<Stream> { stream } };
+    TRY(bit_stream.write_bits(static_cast<u8>(is_last_block), 1));
+    TRY(bit_stream.write_bits(to_underlying(type), 7));
+    TRY(bit_stream.write_bits(length, 24));
+
+    VERIFY(data.size() == length);
+    TRY(bit_stream.write_until_depleted(data));
+    return {};
+}
+
+// If the given sample count is uncommon, this function will return one of the uncommon marker block sizes.
+// The caller has to handle and add these later manually.
+static BlockSizeCategory to_common_block_size(u16 sample_count)
+{
+    switch (sample_count) {
+    case 192:
+        return BlockSizeCategory::S192;
+    case 576:
+        return BlockSizeCategory::S576;
+    case 1152:
+        return BlockSizeCategory::S1152;
+    case 2304:
+        return BlockSizeCategory::S2304;
+    case 4608:
+        return BlockSizeCategory::S4608;
+    case 256:
+        return BlockSizeCategory::S256;
+    case 512:
+        return BlockSizeCategory::S512;
+    case 1024:
+        return BlockSizeCategory::S1024;
+    case 2048:
+        return BlockSizeCategory::S2048;
+    case 4096:
+        return BlockSizeCategory::S4096;
+    case 8192:
+        return BlockSizeCategory::S8192;
+    case 16384:
+        return BlockSizeCategory::S16384;
+    case 32768:
+        return BlockSizeCategory::S32768;
+    }
+    if (sample_count - 1 <= 0xff)
+        return BlockSizeCategory::Uncommon8Bits;
+    // Data type guarantees that 16-bit storage is possible.
+    return BlockSizeCategory::Uncommon16Bits;
+}
+
+static ByteBuffer to_utf8(u64 value)
+{
+    ByteBuffer buffer;
+    if (value < 0x7f) {
+        buffer.append(static_cast<u8>(value));
+    } else if (value < 0x7ff) {
+        buffer.append(static_cast<u8>(0b110'00000 | (value >> 6)));
+        buffer.append(static_cast<u8>(0b10'000000 | (value & 0b111111)));
+    } else if (value < 0xffff) {
+        buffer.append(static_cast<u8>(0b1110'0000 | (value >> 12)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 6) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 0) & 0b111111)));
+    } else if (value < 0x1f'ffff) {
+        buffer.append(static_cast<u8>(0b11110'000 | (value >> 18)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 12) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 6) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 0) & 0b111111)));
+    } else if (value < 0x3ff'ffff) {
+        buffer.append(static_cast<u8>(0b111110'00 | (value >> 24)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 18) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 12) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 6) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 0) & 0b111111)));
+    } else if (value < 0x7fff'ffff) {
+        buffer.append(static_cast<u8>(0b1111110'0 | (value >> 30)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 24) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 18) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 12) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 6) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 0) & 0b111111)));
+    } else if (value < 0xf'ffff'ffff) {
+        buffer.append(static_cast<u8>(0b11111110));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 30) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 24) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 18) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 12) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 6) & 0b111111)));
+        buffer.append(static_cast<u8>(0b10'000000 | ((value >> 0) & 0b111111)));
+    } else {
+        // Anything larger is illegal even in expanded UTF-8, but FLAC only passes 32-bit values anyways.
+        VERIFY_NOT_REACHED();
+    }
+    return buffer;
+}
+
+ErrorOr<void> FlacFrameHeader::write_to_stream(Stream& stream) const
+{
+    Crypto::Checksum::ChecksummingStream<FlacFrameHeaderCRC> checksumming_stream { MaybeOwned<Stream> { stream } };
+    BigEndianOutputBitStream bit_stream { MaybeOwned<Stream> { checksumming_stream } };
+    TRY(bit_stream.write_bits(0b11111111111110u, 14));
+    TRY(bit_stream.write_bits(0u, 1));
+    TRY(bit_stream.write_bits(to_underlying(blocking_strategy), 1));
+
+    auto common_block_size = to_common_block_size(sample_count);
+    TRY(bit_stream.write_bits(to_underlying(common_block_size), 4));
+
+    // We always store sample rate in the file header.
+    TRY(bit_stream.write_bits(0u, 4));
+    TRY(bit_stream.write_bits(to_underlying(channels), 4));
+    // We always store bit depth in the file header.
+    TRY(bit_stream.write_bits(0u, 3));
+    // Reserved zero bit.
+    TRY(bit_stream.write_bits(0u, 1));
+
+    auto coded_number = to_utf8(sample_or_frame_index);
+    TRY(bit_stream.write_until_depleted(coded_number));
+
+    if (common_block_size == BlockSizeCategory::Uncommon8Bits)
+        TRY(bit_stream.write_value(static_cast<u8>(sample_count - 1)));
+    if (common_block_size == BlockSizeCategory::Uncommon16Bits)
+        TRY(bit_stream.write_value(BigEndian<u16>(static_cast<u16>(sample_count - 1))));
+
+    // Ensure that the checksum is calculated correctly.
+    TRY(bit_stream.align_to_byte_boundary());
+    auto checksum = checksumming_stream.digest();
+    TRY(bit_stream.write_value(checksum));
+
+    return {};
+}
+
+ErrorOr<void> FlacWriter::write_samples(ReadonlySpan<Sample> samples)
+{
+    if (m_state == WriteState::FullyFinalized)
+        return Error::from_string_view("File is already finalized"sv);
+
+    auto remaining_samples = samples;
+    while (remaining_samples.size() > 0) {
+        if (m_sample_buffer.size() == block_size) {
+            TRY(write_frame());
+            m_sample_buffer.clear();
+        }
+        auto amount_to_copy = min(remaining_samples.size(), m_sample_buffer.capacity() - m_sample_buffer.size());
+        auto current_buffer_size = m_sample_buffer.size();
+        TRY(m_sample_buffer.try_resize_and_keep_capacity(current_buffer_size + amount_to_copy));
+        remaining_samples.copy_trimmed_to(m_sample_buffer.span().slice(current_buffer_size));
+        remaining_samples = remaining_samples.slice(amount_to_copy);
+    }
+
+    // Ensure that the buffer is flushed if possible.
+    if (m_sample_buffer.size() == block_size) {
+        TRY(write_frame());
+        m_sample_buffer.clear();
+    }
+
+    return {};
+}
+
+ErrorOr<void> FlacWriter::write_frame()
+{
+    auto frame_samples = move(m_sample_buffer);
+    // De-interleave and integer-quantize subframes.
+    float sample_rescale = static_cast<float>(1 << (m_bits_per_sample - 1));
+    auto subframe_samples = Vector<Vector<i64, block_size>>();
+    TRY(subframe_samples.try_resize_and_keep_capacity(m_num_channels));
+    for (auto const& sample : frame_samples) {
+        TRY(subframe_samples[0].try_append(static_cast<i64>(sample.left * sample_rescale)));
+        // FIXME: We don't have proper data for any channels past 2.
+        for (auto i = 1; i < m_num_channels; ++i)
+            TRY(subframe_samples[i].try_append(static_cast<i64>(sample.right * sample_rescale)));
+    }
+
+    FlacFrameHeader header {
+        .sample_rate = m_sample_rate,
+        .sample_count = static_cast<u16>(frame_samples.size()),
+        .sample_or_frame_index = static_cast<u32>(m_current_frame),
+        .blocking_strategy = BlockingStrategy::Fixed,
+        // FIXME: We should brute-force channel coupling for stereo.
+        .channels = static_cast<FlacFrameChannelType>(m_num_channels - 1),
+        .bit_depth = static_cast<u8>(m_bits_per_sample),
+        // Calculated for us during header write.
+        .checksum = 0,
+    };
+
+    auto frame_stream = Crypto::Checksum::ChecksummingStream<IBMCRC> { MaybeOwned<Stream> { *m_stream } };
+
+    auto frame_start_offset = TRY(m_stream->tell());
+    TRY(frame_stream.write_value(header));
+
+    BigEndianOutputBitStream bit_stream { MaybeOwned<Stream> { frame_stream } };
+    for (auto const& subframe : subframe_samples)
+        TRY(write_subframe(subframe.span(), bit_stream));
+
+    TRY(bit_stream.align_to_byte_boundary());
+    auto frame_crc = frame_stream.digest();
+    dbgln_if(FLAC_ENCODER_DEBUG, "Frame {:4} CRC: {:04x}", m_current_frame, frame_crc);
+    TRY(frame_stream.write_value<AK::BigEndian<u16>>(frame_crc));
+
+    auto frame_end_offset = TRY(m_stream->tell());
+    auto frame_size = frame_end_offset - frame_start_offset;
+    m_max_frame_size = max(m_max_frame_size, frame_size);
+    m_min_frame_size = min(m_min_frame_size, frame_size);
+
+    m_current_frame++;
+    m_sample_count += frame_samples.size();
+
+    return {};
+}
+
+ErrorOr<void> FlacWriter::write_subframe(ReadonlySpan<i64> subframe, BigEndianOutputBitStream& bit_stream)
+{
+    // The current subframe encoding strategy is as follows:
+    // - Check if the subframe is constant; use constant encoding in this case.
+    // - Try all fixed predictors and record the resulting residuals.
+    // - Estimate their encoding cost by taking the sum of all absolute logarithmic residuals,
+    //   which is an accurate estimate of the final encoded size of the residuals.
+    // - Accurately estimate the encoding cost of a verbatim subframe.
+    // - Select the encoding strategy with the lowest cost out of this selection.
+
+    auto constant_value = subframe[0];
+    auto is_constant = true;
+    for (auto const sample : subframe) {
+        if (sample != constant_value) {
+            is_constant = false;
+            break;
+        }
+    }
+
+    if (is_constant) {
+        dbgln_if(FLAC_ENCODER_DEBUG, "Encoding constant frame with value {}", constant_value);
+        TRY(bit_stream.write_bits(1u, 0));
+        TRY(bit_stream.write_bits(to_underlying(FlacSubframeType::Constant), 6));
+        TRY(bit_stream.write_bits(1u, 0));
+        TRY(bit_stream.write_bits(bit_cast<u64>(constant_value), m_bits_per_sample));
+        return {};
+    }
+
+    auto verbatim_cost_bits = subframe.size() * m_bits_per_sample;
+
+    Optional<FlacLPCEncodedSubframe> best_lpc_subframe;
+    auto current_min_cost = verbatim_cost_bits;
+    for (auto order : { FlacFixedLPC::Zero, FlacFixedLPC::One, FlacFixedLPC::Two, FlacFixedLPC::Three, FlacFixedLPC::Four }) {
+        // Too many warm-up samples would be required; the lower-level encoding procedures assume that this was checked.
+        if (to_underlying(order) > subframe.size())
+            continue;
+
+        auto encode_result = TRY(encode_fixed_lpc(order, subframe, current_min_cost));
+        if (encode_result.has_value() && encode_result.value().residual_cost_bits < current_min_cost) {
+            current_min_cost = encode_result.value().residual_cost_bits;
+            best_lpc_subframe = encode_result.release_value();
+        }
+    }
+
+    // No LPC encoding was better than verbatim.
+    if (!best_lpc_subframe.has_value()) {
+        dbgln_if(FLAC_ENCODER_DEBUG, "Best subframe type was Verbatim; encoding {} samples at {} bps = {} bits", subframe.size(), m_bits_per_sample, verbatim_cost_bits);
+        TRY(write_verbatim_subframe(subframe, bit_stream));
+    } else {
+        dbgln_if(FLAC_ENCODER_DEBUG, "Best subframe type was Fixed LPC order {} (estimated cost {} bits); encoding {} samples", to_underlying(best_lpc_subframe->coefficients.get<FlacFixedLPC>()), best_lpc_subframe->residual_cost_bits, subframe.size());
+        TRY(write_lpc_subframe(best_lpc_subframe.release_value(), bit_stream));
+    }
+
+    return {};
+}
+
+ErrorOr<Optional<FlacLPCEncodedSubframe>> FlacWriter::encode_fixed_lpc(FlacFixedLPC order, ReadonlySpan<i64> subframe, size_t current_min_cost)
+{
+    FlacLPCEncodedSubframe lpc {
+        .warm_up_samples = Vector<i64> { subframe.trim(to_underlying(order)) },
+        .coefficients = order,
+        .residuals {},
+        // Warm-up sample cost.
+        .residual_cost_bits = to_underlying(order) * m_bits_per_sample,
+        .single_partition_optimal_order {},
+    };
+    TRY(lpc.residuals.try_ensure_capacity(subframe.size() - to_underlying(order)));
+
+    Vector<i64> predicted;
+    TRY(predicted.try_resize_and_keep_capacity(subframe.size()));
+    lpc.warm_up_samples.span().copy_trimmed_to(predicted);
+
+    // NOTE: Although we can't interrupt the prediction if the corresponding residuals would become too bad,
+    //       we don't need to branch on the order in every loop during prediction, meaning this shouldn't cost us much.
+    predict_fixed_lpc(order, subframe, predicted);
+
+    // There isn’t really a way of computing an LPC’s cost without performing most of the calculations, including a Rice parameter search.
+    // This is nevertheless optimized in multiple ways, so that we always bail out once we are sure no improvements can be made.
+    auto extra_residual_cost = NumericLimits<size_t>::max();
+    // Keep track of when we want to estimate costs again. We don't do this for every new residual since it's an expensive procedure.
+    // The likelihood for misprediction is pretty high for large orders; start with a later index for them.
+    auto next_cost_estimation_index = min(subframe.size() - 1, first_residual_estimation * (to_underlying(order) + 1));
+    for (auto i = to_underlying(order); i < subframe.size(); ++i) {
+        auto residual = subframe[i] - predicted[i];
+        if (!AK::is_within_range<i32>(residual)) {
+            dbgln_if(FLAC_ENCODER_DEBUG, "Bailing from Fixed LPC order {} due to residual overflow ({} is outside the 32-bit range)", to_underlying(order), residual);
+            return Optional<FlacLPCEncodedSubframe> {};
+        }
+        lpc.residuals.append(residual);
+
+        if (i >= next_cost_estimation_index) {
+            // Find best exponential Golomb order.
+            // Storing this in the LPC data allows us to automatically reuse the computation during LPC encoding.
+            // FIXME: Use more than one partition to improve compression.
+            // FIXME: Investigate whether this can be estimated “good enough” to improve performance at the cost of compression strength.
+            // Especially at larger sample counts, it is unlikely that we will find a different optimal order.
+            // Therefore, use a zig-zag search around the previous optimal order.
+            extra_residual_cost = NumericLimits<size_t>::max();
+            auto start_order = lpc.single_partition_optimal_order;
+            size_t useless_parameters = 0;
+            size_t steps = 0;
+            constexpr auto max_rice_parameter = AK::exp2(4) - 1;
+            for (auto offset = 0; start_order + offset < max_rice_parameter || start_order - offset >= 0; ++offset) {
+                for (auto factor : { -1, 1 }) {
+                    auto k = start_order + factor * offset;
+                    if (k >= max_rice_parameter || k < 0)
+                        continue;
+
+                    auto order_cost = count_exp_golomb_bits_in(k, lpc.residuals);
+                    if (order_cost < extra_residual_cost) {
+                        extra_residual_cost = order_cost;
+                        lpc.single_partition_optimal_order = k;
+                    } else {
+                        useless_parameters++;
+                    }
+                    steps++;
+                    // Don’t do 0 twice.
+                    if (offset == 0)
+                        break;
+                }
+                // If we found enough useless parameters, we probably won't find useful ones anymore.
+                // The only exception is the first ever parameter search, where we search everything.
+                if (useless_parameters >= useless_parameter_threshold && start_order != 0)
+                    break;
+            }
+
+            // Min cost exceeded; bail out.
+            if (lpc.residual_cost_bits + extra_residual_cost > current_min_cost) {
+                dbgln_if(FLAC_ENCODER_DEBUG, "  Bailing from Fixed LPC order {} at sample index {} and cost {} (best {})", to_underlying(order), i, lpc.residual_cost_bits + extra_residual_cost, current_min_cost);
+                return Optional<FlacLPCEncodedSubframe> {};
+            }
+
+            // Figure out when to next estimate costs.
+            auto estimated_bits_per_residual = static_cast<double>(extra_residual_cost) / static_cast<double>(i);
+            auto estimated_residuals_for_min_cost = static_cast<double>(current_min_cost) / estimated_bits_per_residual;
+            auto unchecked_next_cost_estimation_index = AK::round_to<size_t>(estimated_residuals_for_min_cost * (1 - residual_cost_margin));
+            // Check either at the estimated residual, or the next residual if that is in the past, or the last residual.
+            next_cost_estimation_index = min(subframe.size() - 1, max(unchecked_next_cost_estimation_index, i + min_residual_estimation_step));
+            dbgln_if(FLAC_ENCODER_DEBUG, "    {} {:4} Estimate cost/residual {:.1f} (param {:2} after {:2} steps), will hit at {:6.1f}, jumping to {:4} (sanitized to {:4})", to_underlying(order), i, estimated_bits_per_residual, lpc.single_partition_optimal_order, steps, estimated_residuals_for_min_cost, unchecked_next_cost_estimation_index, next_cost_estimation_index);
+        }
+    }
+
+    lpc.residual_cost_bits += extra_residual_cost;
+    return lpc;
+}
+
+void predict_fixed_lpc(FlacFixedLPC order, ReadonlySpan<i64> samples, Span<i64> predicted_output)
+{
+    switch (order) {
+    case FlacFixedLPC::Zero:
+        // s_0(t) = 0
+        for (auto i = to_underlying(order); i < predicted_output.size(); ++i)
+            predicted_output[i] += 0;
+        break;
+    case FlacFixedLPC::One:
+        // s_1(t) = s(t-1)
+        for (auto i = to_underlying(order); i < predicted_output.size(); ++i)
+            predicted_output[i] += samples[i - 1];
+        break;
+    case FlacFixedLPC::Two:
+        // s_2(t) = 2s(t-1) - s(t-2)
+        for (auto i = to_underlying(order); i < predicted_output.size(); ++i)
+            predicted_output[i] += 2 * samples[i - 1] - samples[i - 2];
+        break;
+    case FlacFixedLPC::Three:
+        // s_3(t) = 3s(t-1) - 3s(t-2) + s(t-3)
+        for (auto i = to_underlying(order); i < predicted_output.size(); ++i)
+            predicted_output[i] += 3 * samples[i - 1] - 3 * samples[i - 2] + samples[i - 3];
+        break;
+    case FlacFixedLPC::Four:
+        // s_4(t) = 4s(t-1) - 6s(t-2) + 4s(t-3) - s(t-4)
+        for (auto i = to_underlying(order); i < predicted_output.size(); ++i)
+            predicted_output[i] += 4 * samples[i - 1] - 6 * samples[i - 2] + 4 * samples[i - 3] - samples[i - 4];
+        break;
+    default:
+        VERIFY_NOT_REACHED();
+    }
+}
+
+// https://www.ietf.org/archive/id/draft-ietf-cellar-flac-08.html#name-verbatim-subframe
+ErrorOr<void> FlacWriter::write_verbatim_subframe(ReadonlySpan<i64> subframe, BigEndianOutputBitStream& bit_stream)
+{
+    TRY(bit_stream.write_bits(0u, 1));
+    TRY(bit_stream.write_bits(to_underlying(FlacSubframeType::Verbatim), 6));
+    TRY(bit_stream.write_bits(0u, 1));
+    for (auto const& sample : subframe)
+        TRY(bit_stream.write_bits(bit_cast<u64>(sample), m_bits_per_sample));
+
+    return {};
+}
+
+// https://www.ietf.org/archive/id/draft-ietf-cellar-flac-08.html#name-fixed-predictor-subframe
+ErrorOr<void> FlacWriter::write_lpc_subframe(FlacLPCEncodedSubframe lpc_subframe, BigEndianOutputBitStream& bit_stream)
+{
+    // Reserved.
+    TRY(bit_stream.write_bits(0u, 1));
+    // 9.2.1 Subframe header (https://www.ietf.org/archive/id/draft-ietf-cellar-flac-08.html#name-subframe-header)
+    u8 encoded_type;
+    if (lpc_subframe.coefficients.has<FlacFixedLPC>())
+        encoded_type = to_underlying(lpc_subframe.coefficients.get<FlacFixedLPC>()) + to_underlying(FlacSubframeType::Fixed);
+    else
+        encoded_type = lpc_subframe.coefficients.get<Vector<i64>>().size() - 1 + to_underlying(FlacSubframeType::LPC);
+
+    TRY(bit_stream.write_bits(encoded_type, 6));
+    // No wasted bits per sample (unnecessary for the vast majority of data).
+    TRY(bit_stream.write_bits(0u, 1));
+
+    for (auto const& warm_up_sample : lpc_subframe.warm_up_samples)
+        TRY(bit_stream.write_bits(bit_cast<u64>(warm_up_sample), m_bits_per_sample));
+
+    // 4-bit Rice parameters.
+    TRY(bit_stream.write_bits(0b00u, 2));
+    // Only one partition (2^0 = 1).
+    TRY(bit_stream.write_bits(0b0000u, 4));
+    TRY(write_rice_partition(lpc_subframe.single_partition_optimal_order, lpc_subframe.residuals, bit_stream));
+
+    return {};
+}
+
+ErrorOr<void> FlacWriter::write_rice_partition(u8 k, ReadonlySpan<i64> residuals, BigEndianOutputBitStream& bit_stream)
+{
+    TRY(bit_stream.write_bits(k, 4));
+
+    for (auto const& residual : residuals)
+        TRY(encode_unsigned_exp_golomb(k, static_cast<i32>(residual), bit_stream));
+
+    return {};
+}
+
+u32 signed_to_rice(i32 x)
+{
+    // Implements (x < 0 ? -1 : 0) + 2 * abs(x) in about half as many instructions.
+    // The reference encoder’s implementation is known to be the fastest on -O2/3 clang and gcc:
+    // x << 1 = multiply by 2.
+    // For negative numbers, x >> 31 will create an all-ones XOR mask, meaning that the number will be inverted.
+    // In two's complement this is -value - 1, exactly what we need.
+    // For positive numbers, x >> 31 == 0.
+    return static_cast<u32>((x << 1) ^ (x >> 31));
+}
+
+// Adopted from https://github.com/xiph/flac/blob/28e4f0528c76b296c561e922ba67d43751990599/src/libFLAC/bitwriter.c#L727
+ErrorOr<void> encode_unsigned_exp_golomb(u8 k, i32 value, BigEndianOutputBitStream& bit_stream)
+{
+    auto zigzag_encoded = signed_to_rice(value);
+    auto msbs = zigzag_encoded >> k;
+    auto pattern = 1u << k;
+    pattern |= zigzag_encoded & ((1 << k) - 1);
+
+    TRY(bit_stream.write_bits(0u, msbs));
+    TRY(bit_stream.write_bits(pattern, k + 1));
+
+    return {};
+}
+
+// Adopted from count_rice_bits_in_partition():
+// https://github.com/xiph/flac/blob/28e4f0528c76b296c561e922ba67d43751990599/src/libFLAC/stream_encoder.c#L4299
+size_t count_exp_golomb_bits_in(u8 k, ReadonlySpan<i64> residuals)
+{
+    // Exponential Golomb order size (4).
+    // One unary stop bit and the entire exponential Golomb parameter for every residual.
+    size_t partition_bits = 4 + (1 + k) * residuals.size();
+
+    // Bit magic to compute the amount of leading unary bits.
+    for (auto const& residual : residuals)
+        partition_bits += (static_cast<u32>((residual << 1) ^ (residual >> 31)) >> k);
+
+    return partition_bits;
+}
+
+}
diff --git a/Userland/Libraries/LibAudio/FlacWriter.h b/Userland/Libraries/LibAudio/FlacWriter.h
new file mode 100644
index 0000000000..35f4a57fad
--- /dev/null
+++ b/Userland/Libraries/LibAudio/FlacWriter.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2023, kleines Filmröllchen <filmroellchen@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/MaybeOwned.h>
+#include <AK/Noncopyable.h>
+#include <AK/RefPtr.h>
+#include <AK/Stream.h>
+#include <AK/StringView.h>
+#include <LibAudio/Encoder.h>
+#include <LibAudio/FlacTypes.h>
+#include <LibAudio/Sample.h>
+#include <LibAudio/SampleFormats.h>
+#include <LibCore/Forward.h>
+
+namespace Audio {
+
+// Encodes the sign representation method used in Rice coding.
+// Numbers alternate between positive and negative: 0, 1, -1, 2, -2, 3, -3, 4, -4, 5, -5, ...
+ALWAYS_INLINE u32 signed_to_rice(i32 x);
+
+// Encode a single number encoded with exponential golomb encoding of the specified order (k).
+ALWAYS_INLINE ErrorOr<void> encode_unsigned_exp_golomb(u8 k, i32 value, BigEndianOutputBitStream& bit_stream);
+
+size_t count_exp_golomb_bits_in(u8 k, ReadonlySpan<i64> residuals);
+
+void predict_fixed_lpc(FlacFixedLPC order, ReadonlySpan<i64> samples, Span<i64> predicted_output);
+
+// A simple FLAC encoder that writes FLAC files compatible with the streamable subset.
+// The encoder currently has the following simple output properties:
+// FIXME: All frames have a fixed sample size, see below.
+// FIXME: All frames are encoded with the best fixed LPC predictor.
+// FIXME: All residuals are encoded in one Rice partition.
+class FlacWriter : public Encoder {
+    AK_MAKE_NONCOPYABLE(FlacWriter);
+    AK_MAKE_NONMOVABLE(FlacWriter);
+
+    /// Tunable static parameters. Please try to improve these; only some have already been well-tuned!
+
+    // Constant block size.
+    static constexpr size_t block_size = 1024;
+    // Used as a percentage to check residual costs before the estimated "necessary" estimation point.
+    // We usually over-estimate residual costs, so this prevents us from overshooting the actual bail point.
+    static constexpr double residual_cost_margin = 0.07;
+    // At what sample index to first estimate residuals, so that the residual parameter can "stabilize" through more encoded values.
+    static constexpr size_t first_residual_estimation = 16;
+    // How many samples to advance at minimum before estimating residuals again.
+    static constexpr size_t min_residual_estimation_step = 20;
+    // After how many useless (i.e. worse than current optimal) Rice parameters to abort parameter search.
+    // Note that due to the zig-zag search, we start with searching the parameters that are most likely to be good.
+    static constexpr size_t useless_parameter_threshold = 2;
+
+    enum class WriteState {
+        // Header has not been written at all, audio data cannot be written.
+        HeaderUnwritten,
+        // Header was written, i.e. sample format is finalized,
+        // but audio data has not been finalized and therefore some header information is still missing.
+        FormatFinalized,
+        // File is fully finalized, no more sample data can be written.
+        FullyFinalized,
+    };
+
+public:
+    static ErrorOr<NonnullOwnPtr<FlacWriter>> create(NonnullOwnPtr<SeekableStream> stream, u32 sample_rate = 44100, u8 num_channels = 2, u16 bits_per_sample = 16);
+    virtual ~FlacWriter();
+
+    virtual ErrorOr<void> write_samples(ReadonlySpan<Sample> samples) override;
+
+    virtual ErrorOr<void> finalize() override;
+
+    u32 sample_rate() const { return m_sample_rate; }
+    u8 num_channels() const { return m_num_channels; }
+    PcmSampleFormat sample_format() const { return integer_sample_format_for(m_bits_per_sample).value(); }
+    Stream const& output_stream() const { return *m_stream; }
+
+    ErrorOr<void> set_num_channels(u8 num_channels);
+    ErrorOr<void> set_sample_rate(u32 sample_rate);
+    ErrorOr<void> set_bits_per_sample(u16 bits_per_sample);
+    ErrorOr<void> finalize_header_format();
+
+private:
+    FlacWriter(NonnullOwnPtr<SeekableStream>);
+    ErrorOr<void> write_header();
+
+    ErrorOr<void> write_frame();
+    ErrorOr<void> write_subframe(ReadonlySpan<i64> subframe, BigEndianOutputBitStream& bit_stream);
+    ErrorOr<void> write_lpc_subframe(FlacLPCEncodedSubframe lpc_subframe, BigEndianOutputBitStream& bit_stream);
+    ErrorOr<void> write_verbatim_subframe(ReadonlySpan<i64> subframe, BigEndianOutputBitStream& bit_stream);
+    // Assumes 4-bit k for now.
+    ErrorOr<void> write_rice_partition(u8 k, ReadonlySpan<i64> residuals, BigEndianOutputBitStream& bit_stream);
+
+    // Aborts encoding once the costs exceed the previous minimum, thereby speeding up the encoder's parameter search.
+    // In this case, an empty Optional is returned.
+    ErrorOr<Optional<FlacLPCEncodedSubframe>> encode_fixed_lpc(FlacFixedLPC order, ReadonlySpan<i64> subframe, size_t current_min_cost);
+
+    NonnullOwnPtr<SeekableStream> m_stream;
+    WriteState m_state { WriteState::HeaderUnwritten };
+
+    Vector<Sample, block_size> m_sample_buffer {};
+    size_t m_current_frame { 0 };
+
+    u32 m_sample_rate;
+    u8 m_num_channels;
+    u16 m_bits_per_sample;
+
+    // Data updated during encoding; written to the header at the end.
+    u32 m_max_frame_size { 0 };
+    u32 m_min_frame_size { NumericLimits<u32>::max() };
+    size_t m_sample_count { 0 };
+    // Remember where the STREAMINFO block was written in the stream.
+    size_t m_streaminfo_start_index;
+};
+
+}