From 9875ce0c78262c94db256bc78f475e0983d8723b Mon Sep 17 00:00:00 2001 From: Nico Weber Date: Tue, 13 Feb 2024 09:08:52 -0500 Subject: [PATCH] LibPDF: Reorder loops in SampledFunction::evaluate() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously, we'd loop over the index of the output coordinate, for example for a CMYK->RGB function, we'd loop over RGB. For every output index, we'd then sample the function at the CMYK input point. Now, we sample at CMYK once and return a span for all outputs, since they're stored in contiguous memory. And we then loop over the outputs only to do weighting and mapping to the target range at the end. Reduces the runtime of (cd Tests/LibPDF; \ ../../Build/lagom/bin/BenchmarkPDF --benchmark_repetitions 5) from 235.6±2.3ms to 103.2±3.3ms on my system, and makes SampledFunction::evaluate() more similar to lerp_nd() in TagTypes.h. --- Userland/Libraries/LibPDF/Function.cpp | 51 ++++++++++++++------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/Userland/Libraries/LibPDF/Function.cpp b/Userland/Libraries/LibPDF/Function.cpp index 1c3867c750..6971b3c1ba 100644 --- a/Userland/Libraries/LibPDF/Function.cpp +++ b/Userland/Libraries/LibPDF/Function.cpp @@ -28,7 +28,7 @@ public: private: SampledFunction(NonnullRefPtr); - float sample(ReadonlySpan const& coordinates, size_t r) const + ReadonlySpan sample(ReadonlySpan const& coordinates) const { // "For a function with multidimensional input (more than one input variable), // the sample values in the first dimension vary fastest, @@ -46,7 +46,7 @@ private: offset += coordinates[i] * stride; stride *= m_sizes[i]; } - return m_sample_data[offset * m_range.size() + r]; + return m_sample_data.slice(offset * m_range.size(), m_range.size()); } Vector m_domain; @@ -198,30 +198,33 @@ PDFErrorOr> SampledFunction::evaluate(ReadonlySpan xs m_inputs[i] = ec - m_left_index[i]; } - for (size_t r = 0; r < m_range.size(); ++r) { - // For 1-D input data, we need to sample 2 points, one to the left and one to the right, and then interpolate between them. - // For 2-D input data, we need to sample 4 points (top-left, top-right, bottom-left, bottom-right), - // then reduce them to 2 points by interpolating along y, and then to 1 by interpolating along x. - // For 3-D input data, it's 8 points in a cube around the point, then reduce to 4 points by interpolating along z, - // then 2 by interpolating along y, then 1 by interpolating along x. - // So for the general case, we create 2**N samples, and then for each coordinate, we cut the number of samples in half - // by interpolating along that coordinate. - // Instead of storing all the 2**N samples, we can calculate the product of weights for each corner, - // and sum up the weighted samples. - float sample_output = 0; - // The i'th bit of mask indicates if the i'th coordinate is rounded up or down. - Vector coordinates; - coordinates.resize(m_domain.size()); - for (size_t mask = 0; mask < (1u << m_domain.size()); ++mask) { - float sample_weight = 1.0f; - for (size_t i = 0; i < m_domain.size(); ++i) { - coordinates[i] = m_left_index[i] + ((mask >> i) & 1u); - sample_weight *= ((mask >> i) & 1u) ? m_inputs[i] : (1.0f - m_inputs[i]); - } - sample_output += sample(coordinates, r) * sample_weight; + // For 1-D input data, we need to sample 2 points, one to the left and one to the right, and then interpolate between them. + // For 2-D input data, we need to sample 4 points (top-left, top-right, bottom-left, bottom-right), + // then reduce them to 2 points by interpolating along y, and then to 1 by interpolating along x. + // For 3-D input data, it's 8 points in a cube around the point, then reduce to 4 points by interpolating along z, + // then 2 by interpolating along y, then 1 by interpolating along x. + // So for the general case, we create 2**N samples, and then for each coordinate, we cut the number of samples in half + // by interpolating along that coordinate. + // Instead of storing all the 2**N samples, we can calculate the product of weights for each corner, + // and sum up the weighted samples. + Vector sample_outputs; + sample_outputs.resize(m_range.size()); + // The i'th bit of mask indicates if the i'th coordinate is rounded up or down. + Vector coordinates; + coordinates.resize(m_domain.size()); + for (size_t mask = 0; mask < (1u << m_domain.size()); ++mask) { + float sample_weight = 1.0f; + for (size_t i = 0; i < m_domain.size(); ++i) { + coordinates[i] = m_left_index[i] + ((mask >> i) & 1u); + sample_weight *= ((mask >> i) & 1u) ? m_inputs[i] : (1.0f - m_inputs[i]); } + ReadonlyBytes samples = sample(coordinates); + for (size_t r = 0; r < m_range.size(); ++r) + sample_outputs[r] += samples[r] * sample_weight; + } - float result = interpolate(sample_output, 0.0f, 255.0f, m_decode[r].lower, m_decode[r].upper); + for (size_t r = 0; r < m_range.size(); ++r) { + float result = interpolate(sample_outputs[r], 0.0f, 255.0f, m_decode[r].lower, m_decode[r].upper); m_outputs[r] = clamp(result, m_range[r].lower, m_range[r].upper); }