LibAudio: Mix multi-channel files more properly

We downsample multi-channel files into stereo for now, which at least makes the other channels listenable. The new multi-channel downmix helper is intended to be used for other formats with the same or similar channel arrangement, such as QOA.
2025-07-27 12:07:45 +00:00 · 2023-06-27 23:28:51 +02:00 · 2023-06-27 23:28:51 +02:00 · 8258618caf
commit 8258618caf
parent fc70d88367
2 changed files with 120 additions and 24 deletions
--- a/Userland/Libraries/LibAudio/FlacLoader.cpp
+++ b/Userland/Libraries/LibAudio/FlacLoader.cpp
@ -441,14 +441,20 @@ LoaderSamples FlacLoaderPlugin::next_frame()
    };

    u8 subframe_count = frame_channel_type_to_channel_count(channel_type);
-    Vector<Vector<i64>> current_subframes;
+    Vector<FixedArray<float>> current_subframes;
    current_subframes.ensure_capacity(subframe_count);

+    float sample_rescale = 1 / static_cast<float>(1 << (m_current_frame->bit_depth - 1));
+    dbgln_if(AFLACLOADER_DEBUG, "Samples will be rescaled from {} bits: factor {:.8f}", m_current_frame->bit_depth, sample_rescale);
+
    for (u8 i = 0; i < subframe_count; ++i) {
        FlacSubframeHeader new_subframe = TRY(next_subframe_header(bit_stream, i));
        Vector<i64> subframe_samples = TRY(parse_subframe(new_subframe, bit_stream));
        VERIFY(subframe_samples.size() == m_current_frame->sample_count);
-        current_subframes.unchecked_append(move(subframe_samples));
+        FixedArray<float> scaled_samples = TRY(FixedArray<float>::create(m_current_frame->sample_count));
+        for (size_t i = 0; i < m_current_frame->sample_count; ++i)
+            scaled_samples[i] = static_cast<float>(subframe_samples[i]) * sample_rescale;
+        current_subframes.unchecked_append(move(scaled_samples));
    }

    // 11.2. Overview ("The audio data is composed of...")
@ -459,55 +465,58 @@ LoaderSamples FlacLoaderPlugin::next_frame()
    [[maybe_unused]] u16 footer_checksum = TRY(bit_stream.read_bits<u16>(16));
    dbgln_if(AFLACLOADER_DEBUG, "Subframe footer checksum: {}", footer_checksum);

-    float sample_rescale = 1 / static_cast<float>(1 << (m_current_frame->bit_depth - 1));
-    dbgln_if(AFLACLOADER_DEBUG, "Sample rescaled from {} bits: factor {:.8f}", m_current_frame->bit_depth, sample_rescale);
-
-    FixedArray<Sample> samples = TRY(FixedArray<Sample>::create(m_current_frame->sample_count));
+    FixedArray<Sample> samples;

    switch (channel_type) {
    case FlacFrameChannelType::Mono:
-        for (size_t i = 0; i < m_current_frame->sample_count; ++i)
-            samples[i] = Sample { static_cast<float>(current_subframes[0][i]) * sample_rescale };
-        break;
    case FlacFrameChannelType::Stereo:
-    // TODO mix together surround channels on each side?
    case FlacFrameChannelType::StereoCenter:
    case FlacFrameChannelType::Surround4p0:
    case FlacFrameChannelType::Surround5p0:
    case FlacFrameChannelType::Surround5p1:
    case FlacFrameChannelType::Surround6p1:
-    case FlacFrameChannelType::Surround7p1:
-        for (size_t i = 0; i < m_current_frame->sample_count; ++i)
-            samples[i] = { static_cast<float>(current_subframes[0][i]) * sample_rescale, static_cast<float>(current_subframes[1][i]) * sample_rescale };
+    case FlacFrameChannelType::Surround7p1: {
+        auto new_samples = TRY(downmix_surround_to_stereo<FixedArray<float>>(move(current_subframes)));
+        samples.swap(new_samples);
        break;
-    case FlacFrameChannelType::LeftSideStereo:
+    }
+    case FlacFrameChannelType::LeftSideStereo: {
+        auto new_samples = TRY(FixedArray<Sample>::create(m_current_frame->sample_count));
+        samples.swap(new_samples);
        // channels are left (0) and side (1)
        for (size_t i = 0; i < m_current_frame->sample_count; ++i) {
            // right = left - side
-            samples[i] = { static_cast<float>(current_subframes[0][i]) * sample_rescale,
-                static_cast<float>(current_subframes[0][i] - current_subframes[1][i]) * sample_rescale };
+            samples[i] = { current_subframes[0][i],
+                current_subframes[0][i] - current_subframes[1][i] };
        }
        break;
-    case FlacFrameChannelType::RightSideStereo:
+    }
+    case FlacFrameChannelType::RightSideStereo: {
+        auto new_samples = TRY(FixedArray<Sample>::create(m_current_frame->sample_count));
+        samples.swap(new_samples);
        // channels are side (0) and right (1)
        for (size_t i = 0; i < m_current_frame->sample_count; ++i) {
            // left = right + side
-            samples[i] = { static_cast<float>(current_subframes[1][i] + current_subframes[0][i]) * sample_rescale,
-                static_cast<float>(current_subframes[1][i]) * sample_rescale };
+            samples[i] = { current_subframes[1][i] + current_subframes[0][i],
+                current_subframes[1][i] };
        }
        break;
-    case FlacFrameChannelType::MidSideStereo:
+    }
+    case FlacFrameChannelType::MidSideStereo: {
+        auto new_samples = TRY(FixedArray<Sample>::create(m_current_frame->sample_count));
+        samples.swap(new_samples);
        // channels are mid (0) and side (1)
        for (size_t i = 0; i < current_subframes[0].size(); ++i) {
-            i64 mid = current_subframes[0][i];
-            i64 side = current_subframes[1][i];
+            float mid = current_subframes[0][i];
+            float side = current_subframes[1][i];
            mid *= 2;
            // prevent integer division errors
-            samples[i] = { (static_cast<float>(mid + side) * .5f) * sample_rescale,
-                (static_cast<float>(mid - side) * .5f) * sample_rescale };
+            samples[i] = { (mid + side) * .5f,
+                (mid - side) * .5f };
        }
        break;
    }
+    }

    return samples;
 #undef FLAC_VERIFY
--- a/Userland/Libraries/LibAudio/MultiChannel.h
+++ b/Userland/Libraries/LibAudio/MultiChannel.h
@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2023, kleines Filmröllchen <filmroellchen@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Concepts.h>
+#include <AK/FixedArray.h>
+#include <LibAudio/Sample.h>
+
+namespace Audio {
+
+// Downmixes any number of channels to stereo, under the assumption that standard channel layout is followed:
+// 1 channel = mono
+// 2 channels = stereo (left, right)
+// 3 channels = left, right, center
+// 4 channels = front left/right, back left/right
+// 5 channels = front left/right, center, back left/right
+// 6 channels = front left/right, center, LFE, back left/right
+// 7 channels = front left/right, center, LFE, back center, side left/right
+// 8 channels = front left/right, center, LFE, back left/right, side left/right
+template<ArrayLike<float> ChannelType, ArrayLike<ChannelType> InputType>
+ErrorOr<FixedArray<Sample>> downmix_surround_to_stereo(InputType input)
+{
+    if (input.size() == 0)
+        return Error::from_string_view("Cannot resample from 0 channels"sv);
+
+    auto channel_count = input.size();
+    auto sample_count = input[0].size();
+
+    FixedArray<Sample> output = TRY(FixedArray<Sample>::create(sample_count));
+
+    // FIXME: We could figure out a better way to mix the channels, possibly spatially, but for now:
+    //        - Center and LFE channels are added to both left and right.
+    //        - All left channels are added together on the left, all right channels are added together on the right.
+    switch (channel_count) {
+    case 1:
+        for (auto i = 0u; i < sample_count; ++i)
+            output[i] = Sample { input[0][i] };
+        break;
+    case 2:
+        for (auto i = 0u; i < sample_count; ++i)
+            output[i] = Sample { input[0][i], input[1][i] };
+        break;
+    case 3:
+        for (auto i = 0u; i < sample_count; ++i)
+            output[i] = Sample { input[0][i] + input[2][i],
+                input[1][i] + input[2][i] };
+        break;
+    case 4:
+        for (auto i = 0u; i < sample_count; ++i)
+            output[i] = Sample { input[0][i] + input[2][i],
+                input[1][i] + input[3][i] };
+        break;
+    case 5:
+        for (auto i = 0u; i < sample_count; ++i)
+            output[i] = Sample { input[0][i] + input[3][i] + input[2][i],
+                input[1][i] + input[4][i] + input[2][i] };
+        break;
+    case 6:
+        for (auto i = 0u; i < sample_count; ++i) {
+            output[i] = Sample { input[0][i] + input[4][i] + input[2][i] + input[3][i],
+                input[1][i] + input[5][i] + input[2][i] + input[3][i] };
+        }
+        break;
+    case 7:
+        for (auto i = 0u; i < sample_count; ++i) {
+            output[i] = Sample { input[0][i] + input[5][i] + input[2][i] + input[3][i] + input[4][i],
+                input[1][i] + input[6][i] + input[2][i] + input[3][i] + input[4][i] };
+        }
+        break;
+    case 8:
+        for (auto i = 0u; i < sample_count; ++i) {
+            output[i] = Sample { input[0][i] + input[4][i] + input[6][i] + input[2][i] + input[3][i],
+                input[1][i] + input[5][i] + input[7][i] + input[2][i] + input[3][i] };
+        }
+        break;
+    default:
+        return Error::from_string_view("Invalid number of channels greater than 8"sv);
+    }
+
+    return output;
+}
+
+}