1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 18:37:35 +00:00

LibSoftGPU: Return a const& texel in Image to prevent copying

On every texel access, some floating point instructions involved in
copying 4 floats popped up. Let `Image::texel() const` return a
`FloatVector4 const&` to prevent these operations.

This results in a ~7% FPS increase in GLQuake on my machine.
This commit is contained in:
Jelle Raaijmakers 2022-09-14 16:19:39 +02:00 committed by Andreas Kling
parent e9d2f9a95e
commit 8ff7c52cf4
3 changed files with 10 additions and 10 deletions

View file

@ -73,10 +73,10 @@ static f32x4 wrap(f32x4 value, GPU::TextureWrapMode mode, f32x4 num_texels)
ALWAYS_INLINE static Vector4<f32x4> texel4(Image const& image, u32x4 level, u32x4 x, u32x4 y, u32x4 z)
{
auto t0 = image.texel(level[0], x[0], y[0], z[0]);
auto t1 = image.texel(level[1], x[1], y[1], z[1]);
auto t2 = image.texel(level[2], x[2], y[2], z[2]);
auto t3 = image.texel(level[3], x[3], y[3], z[3]);
auto const& t0 = image.texel(level[0], x[0], y[0], z[0]);
auto const& t1 = image.texel(level[1], x[1], y[1], z[1]);
auto const& t2 = image.texel(level[2], x[2], y[2], z[2]);
auto const& t3 = image.texel(level[3], x[3], y[3], z[3]);
return Vector4<f32x4> {
f32x4 { t0.x(), t1.x(), t2.x(), t3.x() },
@ -90,10 +90,10 @@ ALWAYS_INLINE static Vector4<f32x4> texel4border(Image const& image, u32x4 level
{
auto border_mask = maskbits(x < 0 || x >= w || y < 0 || y >= h);
auto t0 = border_mask & 1 ? border : image.texel(level[0], x[0], y[0], z[0]);
auto t1 = border_mask & 2 ? border : image.texel(level[1], x[1], y[1], z[1]);
auto t2 = border_mask & 4 ? border : image.texel(level[2], x[2], y[2], z[2]);
auto t3 = border_mask & 8 ? border : image.texel(level[3], x[3], y[3], z[3]);
auto const& t0 = (border_mask & 1) > 0 ? border : image.texel(level[0], x[0], y[0], z[0]);
auto const& t1 = (border_mask & 2) > 0 ? border : image.texel(level[1], x[1], y[1], z[1]);
auto const& t2 = (border_mask & 4) > 0 ? border : image.texel(level[2], x[2], y[2], z[2]);
auto const& t3 = (border_mask & 8) > 0 ? border : image.texel(level[3], x[3], y[3], z[3]);
return Vector4<f32x4> {
f32x4 { t0.x(), t1.x(), t2.x(), t3.x() },