From 8258618caf83fb02f2dad1ad53de520667be6db6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?kleines=20Filmr=C3=B6llchen?= <filmroellchen@serenityos.org>
Date: Tue, 27 Jun 2023 23:28:51 +0200
Subject: [PATCH] LibAudio: Mix multi-channel files more properly

We downsample multi-channel files into stereo for now, which at least
makes the other channels listenable. The new multi-channel downmix
helper is intended to be used for other formats with the same or similar
channel arrangement, such as QOA.
---
 Userland/Libraries/LibAudio/FlacLoader.cpp | 57 ++++++++------
 Userland/Libraries/LibAudio/MultiChannel.h | 87 ++++++++++++++++++++++
 2 files changed, 120 insertions(+), 24 deletions(-)
 create mode 100644 Userland/Libraries/LibAudio/MultiChannel.h
diff --git a/Userland/Libraries/LibAudio/FlacLoader.cpp b/Userland/Libraries/LibAudio/FlacLoader.cpp
index 44cc133b84..b3ae3ec090 100644
--- a/Userland/Libraries/LibAudio/FlacLoader.cpp
+++ b/Userland/Libraries/LibAudio/FlacLoader.cpp
@@ -441,14 +441,20 @@ LoaderSamples FlacLoaderPlugin::next_frame()
     };
 
     u8 subframe_count = frame_channel_type_to_channel_count(channel_type);
-    Vector<Vector<i64>> current_subframes;
+    Vector<FixedArray<float>> current_subframes;
     current_subframes.ensure_capacity(subframe_count);
 
+    float sample_rescale = 1 / static_cast<float>(1 << (m_current_frame->bit_depth - 1));
+    dbgln_if(AFLACLOADER_DEBUG, "Samples will be rescaled from {} bits: factor {:.8f}", m_current_frame->bit_depth, sample_rescale);
+
     for (u8 i = 0; i < subframe_count; ++i) {
         FlacSubframeHeader new_subframe = TRY(next_subframe_header(bit_stream, i));
         Vector<i64> subframe_samples = TRY(parse_subframe(new_subframe, bit_stream));
         VERIFY(subframe_samples.size() == m_current_frame->sample_count);
-        current_subframes.unchecked_append(move(subframe_samples));
+        FixedArray<float> scaled_samples = TRY(FixedArray<float>::create(m_current_frame->sample_count));
+        for (size_t i = 0; i < m_current_frame->sample_count; ++i)
+            scaled_samples[i] = static_cast<float>(subframe_samples[i]) * sample_rescale;
+        current_subframes.unchecked_append(move(scaled_samples));
     }
 
     // 11.2. Overview ("The audio data is composed of...")
@@ -459,55 +465,58 @@ LoaderSamples FlacLoaderPlugin::next_frame()
     [[maybe_unused]] u16 footer_checksum = TRY(bit_stream.read_bits<u16>(16));
     dbgln_if(AFLACLOADER_DEBUG, "Subframe footer checksum: {}", footer_checksum);
 
-    float sample_rescale = 1 / static_cast<float>(1 << (m_current_frame->bit_depth - 1));
-    dbgln_if(AFLACLOADER_DEBUG, "Sample rescaled from {} bits: factor {:.8f}", m_current_frame->bit_depth, sample_rescale);
-
-    FixedArray<Sample> samples = TRY(FixedArray<Sample>::create(m_current_frame->sample_count));
+    FixedArray<Sample> samples;
 
     switch (channel_type) {
     case FlacFrameChannelType::Mono:
-        for (size_t i = 0; i < m_current_frame->sample_count; ++i)
-            samples[i] = Sample { static_cast<float>(current_subframes[0][i]) * sample_rescale };
-        break;
     case FlacFrameChannelType::Stereo:
-    // TODO mix together surround channels on each side?
     case FlacFrameChannelType::StereoCenter:
     case FlacFrameChannelType::Surround4p0:
     case FlacFrameChannelType::Surround5p0:
     case FlacFrameChannelType::Surround5p1:
     case FlacFrameChannelType::Surround6p1:
-    case FlacFrameChannelType::Surround7p1:
-        for (size_t i = 0; i < m_current_frame->sample_count; ++i)
-            samples[i] = { static_cast<float>(current_subframes[0][i]) * sample_rescale, static_cast<float>(current_subframes[1][i]) * sample_rescale };
+    case FlacFrameChannelType::Surround7p1: {
+        auto new_samples = TRY(downmix_surround_to_stereo<FixedArray<float>>(move(current_subframes)));
+        samples.swap(new_samples);
         break;
-    case FlacFrameChannelType::LeftSideStereo:
+    }
+    case FlacFrameChannelType::LeftSideStereo: {
+        auto new_samples = TRY(FixedArray<Sample>::create(m_current_frame->sample_count));
+        samples.swap(new_samples);
         // channels are left (0) and side (1)
         for (size_t i = 0; i < m_current_frame->sample_count; ++i) {
             // right = left - side
-            samples[i] = { static_cast<float>(current_subframes[0][i]) * sample_rescale,
-                static_cast<float>(current_subframes[0][i] - current_subframes[1][i]) * sample_rescale };
+            samples[i] = { current_subframes[0][i],
+                current_subframes[0][i] - current_subframes[1][i] };
         }
         break;
-    case FlacFrameChannelType::RightSideStereo:
+    }
+    case FlacFrameChannelType::RightSideStereo: {
+        auto new_samples = TRY(FixedArray<Sample>::create(m_current_frame->sample_count));
+        samples.swap(new_samples);
         // channels are side (0) and right (1)
         for (size_t i = 0; i < m_current_frame->sample_count; ++i) {
             // left = right + side
-            samples[i] = { static_cast<float>(current_subframes[1][i] + current_subframes[0][i]) * sample_rescale,
-                static_cast<float>(current_subframes[1][i]) * sample_rescale };
+            samples[i] = { current_subframes[1][i] + current_subframes[0][i],
+                current_subframes[1][i] };
         }
         break;
-    case FlacFrameChannelType::MidSideStereo:
+    }
+    case FlacFrameChannelType::MidSideStereo: {
+        auto new_samples = TRY(FixedArray<Sample>::create(m_current_frame->sample_count));
+        samples.swap(new_samples);
         // channels are mid (0) and side (1)
         for (size_t i = 0; i < current_subframes[0].size(); ++i) {
-            i64 mid = current_subframes[0][i];
-            i64 side = current_subframes[1][i];
+            float mid = current_subframes[0][i];
+            float side = current_subframes[1][i];
             mid *= 2;
             // prevent integer division errors
-            samples[i] = { (static_cast<float>(mid + side) * .5f) * sample_rescale,
-                (static_cast<float>(mid - side) * .5f) * sample_rescale };
+            samples[i] = { (mid + side) * .5f,
+                (mid - side) * .5f };
         }
         break;
     }
+    }
 
     return samples;
 #undef FLAC_VERIFY
diff --git a/Userland/Libraries/LibAudio/MultiChannel.h b/Userland/Libraries/LibAudio/MultiChannel.h
new file mode 100644
index 0000000000..f9debeb4d0
--- /dev/null
+++ b/Userland/Libraries/LibAudio/MultiChannel.h
@@ -0,0 +1,87 @@
+/*
+ * Copyright (c) 2023, kleines Filmröllchen <filmroellchen@serenityos.org>
+ *
+ * SPDX-License-Identifier: BSD-2-Clause
+ */
+
+#pragma once
+
+#include <AK/Concepts.h>
+#include <AK/FixedArray.h>
+#include <LibAudio/Sample.h>
+
+namespace Audio {
+
+// Downmixes any number of channels to stereo, under the assumption that standard channel layout is followed:
+// 1 channel = mono
+// 2 channels = stereo (left, right)
+// 3 channels = left, right, center
+// 4 channels = front left/right, back left/right
+// 5 channels = front left/right, center, back left/right
+// 6 channels = front left/right, center, LFE, back left/right
+// 7 channels = front left/right, center, LFE, back center, side left/right
+// 8 channels = front left/right, center, LFE, back left/right, side left/right
+template<ArrayLike<float> ChannelType, ArrayLike<ChannelType> InputType>
+ErrorOr<FixedArray<Sample>> downmix_surround_to_stereo(InputType input)
+{
+    if (input.size() == 0)
+        return Error::from_string_view("Cannot resample from 0 channels"sv);
+
+    auto channel_count = input.size();
+    auto sample_count = input[0].size();
+
+    FixedArray<Sample> output = TRY(FixedArray<Sample>::create(sample_count));
+
+    // FIXME: We could figure out a better way to mix the channels, possibly spatially, but for now:
+    //        - Center and LFE channels are added to both left and right.
+    //        - All left channels are added together on the left, all right channels are added together on the right.
+    switch (channel_count) {
+    case 1:
+        for (auto i = 0u; i < sample_count; ++i)
+            output[i] = Sample { input[0][i] };
+        break;
+    case 2:
+        for (auto i = 0u; i < sample_count; ++i)
+            output[i] = Sample { input[0][i], input[1][i] };
+        break;
+    case 3:
+        for (auto i = 0u; i < sample_count; ++i)
+            output[i] = Sample { input[0][i] + input[2][i],
+                input[1][i] + input[2][i] };
+        break;
+    case 4:
+        for (auto i = 0u; i < sample_count; ++i)
+            output[i] = Sample { input[0][i] + input[2][i],
+                input[1][i] + input[3][i] };
+        break;
+    case 5:
+        for (auto i = 0u; i < sample_count; ++i)
+            output[i] = Sample { input[0][i] + input[3][i] + input[2][i],
+                input[1][i] + input[4][i] + input[2][i] };
+        break;
+    case 6:
+        for (auto i = 0u; i < sample_count; ++i) {
+            output[i] = Sample { input[0][i] + input[4][i] + input[2][i] + input[3][i],
+                input[1][i] + input[5][i] + input[2][i] + input[3][i] };
+        }
+        break;
+    case 7:
+        for (auto i = 0u; i < sample_count; ++i) {
+            output[i] = Sample { input[0][i] + input[5][i] + input[2][i] + input[3][i] + input[4][i],
+                input[1][i] + input[6][i] + input[2][i] + input[3][i] + input[4][i] };
+        }
+        break;
+    case 8:
+        for (auto i = 0u; i < sample_count; ++i) {
+            output[i] = Sample { input[0][i] + input[4][i] + input[6][i] + input[2][i] + input[3][i],
+                input[1][i] + input[5][i] + input[7][i] + input[2][i] + input[3][i] };
+        }
+        break;
+    default:
+        return Error::from_string_view("Invalid number of channels greater than 8"sv);
+    }
+
+    return output;
+}
+
+}