From f562c470e21a2e81df8bd6d630a6b661af155324 Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Fri, 2 Feb 2024 12:05:12 -0500 Subject: [PATCH] LibGfx+LibPDF: Simpler and faster N-D linear sampling Previously, if we wanted to to e.g. do linear interpolation in 2-D, we'd get a sample point like (1.3, 4.4), then get 4 samples around it at (1, 4), (2, 4), (1, 5), (2, 5), then reduce the 4 samples to 2 samples by computing the combined samples `0.3 * f(1, 4) + 0.7 * f(2, 4)` and `0.3 * f(1, 5) + 0.8 * f(2, 5)`, and then 1-D linearly blending between these two samples with the factor 0.4. In the end we'd multiply the first value by 0.3 * 0.4, the second by 0.7 * 0.4, the third by 0.3 * 0.6, and the third by 0.7 * 0.6, and then sum them all up. This requires computing and storing 2**N samples, followed by another 2**N iterations to combine the 2**N sampls to a single value. (N is in practice either 4 or 3, so 2**N isn't super huge.) Instead, for every sample we can directly compute the product of weights and sum them up directly. This lets us omit the second loop and storing 2**N values, in exchange for doing an additional O(n) work to compute the product. Takes Build/lagom/bin/image --no-output --invert-cmyk \ --assign-color-profile \ Build/lagom/Root/res/icc/Adobe/CMYK/USWebCoatedSWOP.icc \ --convert-to-color-profile serenity-sRGB.icc \ cmyk.jpg form 3.42s to 3.08s on my machine, almost 10% faster (and less code). Here cmyk.jpg is a 2253x3080 cmyk jpeg, and USWebCoatedSWOP.icc is an mft2 profile with input tables with 256 samples and a 9x9x9x9 CLUT. The LibPDF change is covered by TEST_CASE(sampled) in LibPDF.cpp, and the LibGfx change is basically the same change as the one in LibPDF (where the test results don't change) and the output subjectively looks identical. So hopefully this causes indeed no behavior change :^) --- Userland/Libraries/LibGfx/ICC/TagTypes.h | 17 +++++++---------- Userland/Libraries/LibPDF/Function.cpp | 19 +++++++++---------- 2 files changed, 16 insertions(+), 20 deletions(-) diff --git a/Userland/Libraries/LibGfx/ICC/TagTypes.h b/Userland/Libraries/LibGfx/ICC/TagTypes.h index eaaaf4579c..7fa00d4b2c 100644 --- a/Userland/Libraries/LibGfx/ICC/TagTypes.h +++ b/Userland/Libraries/LibGfx/ICC/TagTypes.h @@ -46,23 +46,20 @@ inline FloatVector3 lerp_nd(Function size, Function samples; - samples.resize(1u << x.size()); + FloatVector3 sample_output {}; // The i'th bit of mask indicates if the i'th coordinate is rounded up or down. Vector coordinates; coordinates.resize(x.size()); for (size_t mask = 0; mask < (1u << x.size()); ++mask) { - for (size_t i = 0; i < x.size(); ++i) + float sample_weight = 1.0f; + for (size_t i = 0; i < x.size(); ++i) { coordinates[i] = left_index[i] + ((mask >> i) & 1u); - samples[mask] = sample(coordinates); + sample_weight *= ((mask >> i) & 1u) ? factor[i] : 1.0f - factor[i]; + } + sample_output += sample(coordinates) * sample_weight; } - for (int i = static_cast(x.size() - 1); i >= 0; --i) { - for (size_t mask = 0; mask < (1u << i); ++mask) - samples[mask] = mix(samples[mask], samples[mask | (1u << i)], factor[i]); - } - - return samples[0]; + return sample_output; } using S15Fixed16 = FixedPoint<16, i32>; diff --git a/Userland/Libraries/LibPDF/Function.cpp b/Userland/Libraries/LibPDF/Function.cpp index fd9c05b350..36834499d4 100644 --- a/Userland/Libraries/LibPDF/Function.cpp +++ b/Userland/Libraries/LibPDF/Function.cpp @@ -206,23 +206,22 @@ PDFErrorOr> SampledFunction::evaluate(ReadonlySpan xs // then 2 by interpolating along y, then 1 by interpolating along x. // So for the general case, we create 2**N samples, and then for each coordinate, we cut the number of samples in half // by interpolating along that coordinate. - Vector samples; - samples.resize(1 << m_domain.size()); + // Instead of storing all the 2**N samples, we can calculate the product of weights for each corner, + // and sum up the weighted samples. + float sample_output = 0; // The i'th bit of mask indicates if the i'th coordinate is rounded up or down. Vector coordinates; coordinates.resize(m_domain.size()); for (size_t mask = 0; mask < (1u << m_domain.size()); ++mask) { - for (size_t i = 0; i < m_domain.size(); ++i) + float sample_weight = 1.0f; + for (size_t i = 0; i < m_domain.size(); ++i) { coordinates[i] = m_left_index[i] + ((mask >> i) & 1u); - samples[mask] = sample(coordinates, r); + sample_weight *= ((mask >> i) & 1u) ? m_inputs[i] : (1.0f - m_inputs[i]); + } + sample_output += sample(coordinates, r) * sample_weight; } - for (int i = static_cast(m_domain.size() - 1); i >= 0; --i) { - for (size_t mask = 0; mask < (1u << i); ++mask) - samples[mask] = mix(samples[mask], samples[mask | (1 << i)], m_inputs[i]); - } - - float result = interpolate(samples[0], 0.0f, 255.0f, m_decode[r].lower, m_decode[r].upper); + float result = interpolate(sample_output, 0.0f, 255.0f, m_decode[r].lower, m_decode[r].upper); m_outputs[r] = clamp(result, m_range[r].lower, m_range[r].upper); }