mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 19:22:45 +00:00 
			
		
		
		
	LibPDF: Reorder loops in SampledFunction::evaluate()
Previously, we'd loop over the index of the output coordinate,
for example for a CMYK->RGB function, we'd loop over RGB. For
every output index, we'd then sample the function at the CMYK
input point.
Now, we sample at CMYK once and return a span for all outputs,
since they're stored in contiguous memory. And we then loop
over the outputs only to do weighting and mapping to the target
range at the end.
Reduces the runtime of
      (cd Tests/LibPDF; \
          ../../Build/lagom/bin/BenchmarkPDF --benchmark_repetitions 5)
from 235.6±2.3ms to 103.2±3.3ms on my system, and makes
SampledFunction::evaluate() more similar to lerp_nd() in TagTypes.h.
			
			
This commit is contained in:
		
							parent
							
								
									b27aca9300
								
							
						
					
					
						commit
						9875ce0c78
					
				
					 1 changed files with 27 additions and 24 deletions
				
			
		|  | @ -28,7 +28,7 @@ public: | |||
| private: | ||||
|     SampledFunction(NonnullRefPtr<StreamObject>); | ||||
| 
 | ||||
|     float sample(ReadonlySpan<int> const& coordinates, size_t r) const | ||||
|     ReadonlySpan<u8> sample(ReadonlySpan<int> const& coordinates) const | ||||
|     { | ||||
|         // "For a function with multidimensional input (more than one input variable),
 | ||||
|         //  the sample values in the first dimension vary fastest,
 | ||||
|  | @ -46,7 +46,7 @@ private: | |||
|             offset += coordinates[i] * stride; | ||||
|             stride *= m_sizes[i]; | ||||
|         } | ||||
|         return m_sample_data[offset * m_range.size() + r]; | ||||
|         return m_sample_data.slice(offset * m_range.size(), m_range.size()); | ||||
|     } | ||||
| 
 | ||||
|     Vector<Bound> m_domain; | ||||
|  | @ -198,30 +198,33 @@ PDFErrorOr<ReadonlySpan<float>> SampledFunction::evaluate(ReadonlySpan<float> xs | |||
|         m_inputs[i] = ec - m_left_index[i]; | ||||
|     } | ||||
| 
 | ||||
|     for (size_t r = 0; r < m_range.size(); ++r) { | ||||
|         // For 1-D input data, we need to sample 2 points, one to the left and one to the right, and then interpolate between them.
 | ||||
|         // For 2-D input data, we need to sample 4 points (top-left, top-right, bottom-left, bottom-right),
 | ||||
|         // then reduce them to 2 points by interpolating along y, and then to 1 by interpolating along x.
 | ||||
|         // For 3-D input data, it's 8 points in a cube around the point, then reduce to 4 points by interpolating along z,
 | ||||
|         // then 2 by interpolating along y, then 1 by interpolating along x.
 | ||||
|         // So for the general case, we create 2**N samples, and then for each coordinate, we cut the number of samples in half
 | ||||
|         // by interpolating along that coordinate.
 | ||||
|         // Instead of storing all the 2**N samples, we can calculate the product of weights for each corner,
 | ||||
|         // and sum up the weighted samples.
 | ||||
|         float sample_output = 0; | ||||
|         // The i'th bit of mask indicates if the i'th coordinate is rounded up or down.
 | ||||
|         Vector<int> coordinates; | ||||
|         coordinates.resize(m_domain.size()); | ||||
|         for (size_t mask = 0; mask < (1u << m_domain.size()); ++mask) { | ||||
|             float sample_weight = 1.0f; | ||||
|             for (size_t i = 0; i < m_domain.size(); ++i) { | ||||
|                 coordinates[i] = m_left_index[i] + ((mask >> i) & 1u); | ||||
|                 sample_weight *= ((mask >> i) & 1u) ? m_inputs[i] : (1.0f - m_inputs[i]); | ||||
|             } | ||||
|             sample_output += sample(coordinates, r) * sample_weight; | ||||
|     // For 1-D input data, we need to sample 2 points, one to the left and one to the right, and then interpolate between them.
 | ||||
|     // For 2-D input data, we need to sample 4 points (top-left, top-right, bottom-left, bottom-right),
 | ||||
|     // then reduce them to 2 points by interpolating along y, and then to 1 by interpolating along x.
 | ||||
|     // For 3-D input data, it's 8 points in a cube around the point, then reduce to 4 points by interpolating along z,
 | ||||
|     // then 2 by interpolating along y, then 1 by interpolating along x.
 | ||||
|     // So for the general case, we create 2**N samples, and then for each coordinate, we cut the number of samples in half
 | ||||
|     // by interpolating along that coordinate.
 | ||||
|     // Instead of storing all the 2**N samples, we can calculate the product of weights for each corner,
 | ||||
|     // and sum up the weighted samples.
 | ||||
|     Vector<float, 4> sample_outputs; | ||||
|     sample_outputs.resize(m_range.size()); | ||||
|     // The i'th bit of mask indicates if the i'th coordinate is rounded up or down.
 | ||||
|     Vector<int> coordinates; | ||||
|     coordinates.resize(m_domain.size()); | ||||
|     for (size_t mask = 0; mask < (1u << m_domain.size()); ++mask) { | ||||
|         float sample_weight = 1.0f; | ||||
|         for (size_t i = 0; i < m_domain.size(); ++i) { | ||||
|             coordinates[i] = m_left_index[i] + ((mask >> i) & 1u); | ||||
|             sample_weight *= ((mask >> i) & 1u) ? m_inputs[i] : (1.0f - m_inputs[i]); | ||||
|         } | ||||
|         ReadonlyBytes samples = sample(coordinates); | ||||
|         for (size_t r = 0; r < m_range.size(); ++r) | ||||
|             sample_outputs[r] += samples[r] * sample_weight; | ||||
|     } | ||||
| 
 | ||||
|         float result = interpolate(sample_output, 0.0f, 255.0f, m_decode[r].lower, m_decode[r].upper); | ||||
|     for (size_t r = 0; r < m_range.size(); ++r) { | ||||
|         float result = interpolate(sample_outputs[r], 0.0f, 255.0f, m_decode[r].lower, m_decode[r].upper); | ||||
|         m_outputs[r] = clamp(result, m_range[r].lower, m_range[r].upper); | ||||
|     } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Nico Weber
						Nico Weber