From d03a6cc6c6e5e2791588f5601403cd94a872bf8c Mon Sep 17 00:00:00 2001
From: Hendiadyoin1 <leon.a@serenityos.org>
Date: Fri, 1 Apr 2022 13:49:02 +0200
Subject: [PATCH] LibGfx: Use AK's rsqrt and cast to floats earlier

---
 Userland/Libraries/LibGfx/Gamma.h       |  9 +++++----
 Userland/Libraries/LibGfx/JPGLoader.cpp | 24 ++++++++++++------------
 2 files changed, 17 insertions(+), 16 deletions(-)
diff --git a/Userland/Libraries/LibGfx/Gamma.h b/Userland/Libraries/LibGfx/Gamma.h
index dbb645aa18..face37f303 100644
--- a/Userland/Libraries/LibGfx/Gamma.h
+++ b/Userland/Libraries/LibGfx/Gamma.h
@@ -14,6 +14,7 @@
 #endif
 
 #include <AK/SIMD.h>
+#include <AK/SIMDMath.h>
 
 #define GAMMA 2.2
 
@@ -59,8 +60,8 @@ inline f32x4 linear_to_gamma4(f32x4 x)
     // Source for approximation: https://mimosa-pudica.net/fast-gamma/
     constexpr float a = 0.00279491f;
     constexpr float b = 1.15907984f;
-    float c = (b / AK::sqrt(1.0f + a)) - 1;
-    return ((b * __builtin_ia32_rsqrtps(x + a)) - c) * x;
+    float c = (b * AK::rsqrt(1.0f + a)) - 1;
+    return ((b * AK::SIMD::rsqrt(x + a)) - c) * x;
 }
 
 // Linearize v1 and v2, lerp them by mix factor, then convert back.
@@ -86,8 +87,8 @@ inline float linear_to_gamma(float x)
     // Source for approximation: https://mimosa-pudica.net/fast-gamma/
     constexpr float a = 0.00279491;
     constexpr float b = 1.15907984;
-    float c = (b / AK::sqrt(1 + a)) - 1;
-    return ((b / AK::sqrt(x + a)) - c) * x;
+    float c = (b * AK::rsqrt(1 + a)) - 1;
+    return ((b * AK::rsqrt(x + a)) - c) * x;
 }
 
 // Linearize v1 and v2, lerp them by mix factor, then convert back.
diff --git a/Userland/Libraries/LibGfx/JPGLoader.cpp b/Userland/Libraries/LibGfx/JPGLoader.cpp
index 63c5a7395b..07fb88fb47 100644
--- a/Userland/Libraries/LibGfx/JPGLoader.cpp
+++ b/Userland/Libraries/LibGfx/JPGLoader.cpp
@@ -864,20 +864,20 @@ static void dequantize(JPGLoadingContext& context, Vector<Macroblock>& macrobloc
 
 static void inverse_dct(JPGLoadingContext const& context, Vector<Macroblock>& macroblocks)
 {
-    static float const m0 = 2.0 * AK::cos(1.0 / 16.0 * 2.0 * AK::Pi<double>);
-    static float const m1 = 2.0 * AK::cos(2.0 / 16.0 * 2.0 * AK::Pi<double>);
-    static float const m3 = 2.0 * AK::cos(2.0 / 16.0 * 2.0 * AK::Pi<double>);
-    static float const m5 = 2.0 * AK::cos(3.0 / 16.0 * 2.0 * AK::Pi<double>);
+    static float const m0 = 2.0f * AK::cos(1.0f / 16.0f * 2.0f * AK::Pi<float>);
+    static float const m1 = 2.0f * AK::cos(2.0f / 16.0f * 2.0f * AK::Pi<float>);
+    static float const m3 = 2.0f * AK::cos(2.0f / 16.0f * 2.0f * AK::Pi<float>);
+    static float const m5 = 2.0f * AK::cos(3.0f / 16.0f * 2.0f * AK::Pi<float>);
     static float const m2 = m0 - m5;
     static float const m4 = m0 + m5;
-    static float const s0 = AK::cos(0.0 / 16.0 * AK::Pi<double>) / sqrt(8);
-    static float const s1 = AK::cos(1.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s2 = AK::cos(2.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s3 = AK::cos(3.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s4 = AK::cos(4.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s5 = AK::cos(5.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s6 = AK::cos(6.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s7 = AK::cos(7.0 / 16.0 * AK::Pi<double>) / 2.0;
+    static float const s0 = AK::cos(0.0f / 16.0f * AK::Pi<float>) * AK::rsqrt(8.0f);
+    static float const s1 = AK::cos(1.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s2 = AK::cos(2.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s3 = AK::cos(3.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s4 = AK::cos(4.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s5 = AK::cos(5.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s6 = AK::cos(6.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s7 = AK::cos(7.0f / 16.0f * AK::Pi<float>) / 2.0f;
 
     for (u32 vcursor = 0; vcursor < context.mblock_meta.vcount; vcursor += context.vsample_factor) {
         for (u32 hcursor = 0; hcursor < context.mblock_meta.hcount; hcursor += context.hsample_factor) {