LibGfx: Use AK's rsqrt and cast to floats earlier

2026-01-12 23:31:01 +00:00 · 2022-04-01 13:49:02 +02:00 · 2022-04-01 13:49:02 +02:00 · d03a6cc6c6
commit d03a6cc6c6
parent 5ba5a6615d
2 changed files with 17 additions and 16 deletions
--- a/Userland/Libraries/LibGfx/Gamma.h
+++ b/Userland/Libraries/LibGfx/Gamma.h
@ -14,6 +14,7 @@
 #endif

 #include <AK/SIMD.h>
+#include <AK/SIMDMath.h>

 #define GAMMA 2.2

@ -59,8 +60,8 @@ inline f32x4 linear_to_gamma4(f32x4 x)
    // Source for approximation: https://mimosa-pudica.net/fast-gamma/
    constexpr float a = 0.00279491f;
    constexpr float b = 1.15907984f;
-    float c = (b / AK::sqrt(1.0f + a)) - 1;
-    return ((b * __builtin_ia32_rsqrtps(x + a)) - c) * x;
+    float c = (b * AK::rsqrt(1.0f + a)) - 1;
+    return ((b * AK::SIMD::rsqrt(x + a)) - c) * x;
 }

 // Linearize v1 and v2, lerp them by mix factor, then convert back.
@ -86,8 +87,8 @@ inline float linear_to_gamma(float x)
    // Source for approximation: https://mimosa-pudica.net/fast-gamma/
    constexpr float a = 0.00279491;
    constexpr float b = 1.15907984;
-    float c = (b / AK::sqrt(1 + a)) - 1;
-    return ((b / AK::sqrt(x + a)) - c) * x;
+    float c = (b * AK::rsqrt(1 + a)) - 1;
+    return ((b * AK::rsqrt(x + a)) - c) * x;
 }

 // Linearize v1 and v2, lerp them by mix factor, then convert back.
--- a/Userland/Libraries/LibGfx/JPGLoader.cpp
+++ b/Userland/Libraries/LibGfx/JPGLoader.cpp
@ -864,20 +864,20 @@ static void dequantize(JPGLoadingContext& context, Vector<Macroblock>& macrobloc

 static void inverse_dct(JPGLoadingContext const& context, Vector<Macroblock>& macroblocks)
 {
-    static float const m0 = 2.0 * AK::cos(1.0 / 16.0 * 2.0 * AK::Pi<double>);
-    static float const m1 = 2.0 * AK::cos(2.0 / 16.0 * 2.0 * AK::Pi<double>);
-    static float const m3 = 2.0 * AK::cos(2.0 / 16.0 * 2.0 * AK::Pi<double>);
-    static float const m5 = 2.0 * AK::cos(3.0 / 16.0 * 2.0 * AK::Pi<double>);
+    static float const m0 = 2.0f * AK::cos(1.0f / 16.0f * 2.0f * AK::Pi<float>);
+    static float const m1 = 2.0f * AK::cos(2.0f / 16.0f * 2.0f * AK::Pi<float>);
+    static float const m3 = 2.0f * AK::cos(2.0f / 16.0f * 2.0f * AK::Pi<float>);
+    static float const m5 = 2.0f * AK::cos(3.0f / 16.0f * 2.0f * AK::Pi<float>);
    static float const m2 = m0 - m5;
    static float const m4 = m0 + m5;
-    static float const s0 = AK::cos(0.0 / 16.0 * AK::Pi<double>) / sqrt(8);
-    static float const s1 = AK::cos(1.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s2 = AK::cos(2.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s3 = AK::cos(3.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s4 = AK::cos(4.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s5 = AK::cos(5.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s6 = AK::cos(6.0 / 16.0 * AK::Pi<double>) / 2.0;
-    static float const s7 = AK::cos(7.0 / 16.0 * AK::Pi<double>) / 2.0;
+    static float const s0 = AK::cos(0.0f / 16.0f * AK::Pi<float>) * AK::rsqrt(8.0f);
+    static float const s1 = AK::cos(1.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s2 = AK::cos(2.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s3 = AK::cos(3.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s4 = AK::cos(4.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s5 = AK::cos(5.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s6 = AK::cos(6.0f / 16.0f * AK::Pi<float>) / 2.0f;
+    static float const s7 = AK::cos(7.0f / 16.0f * AK::Pi<float>) / 2.0f;

    for (u32 vcursor = 0; vcursor < context.mblock_meta.vcount; vcursor += context.vsample_factor) {
        for (u32 hcursor = 0; hcursor < context.mblock_meta.hcount; hcursor += context.hsample_factor) {