diff --git a/Userland/Libraries/LibSoftGPU/Config.h b/Userland/Libraries/LibSoftGPU/Config.h index a87f6e299c..a8b6d1fca3 100644 --- a/Userland/Libraries/LibSoftGPU/Config.h +++ b/Userland/Libraries/LibSoftGPU/Config.h @@ -24,6 +24,14 @@ static constexpr int MAX_TEXTURE_SIZE = 2048; static constexpr float MAX_TEXTURE_LOD_BIAS = 2.f; static constexpr int SUBPIXEL_BITS = 4; +static constexpr int NUM_SHADER_INPUTS = 64; + +// Verify that we have enough inputs to hold vertex color and texture coordinates for all fixed function texture units +static_assert(NUM_SHADER_INPUTS >= 4 + GPU::NUM_TEXTURE_UNITS * 4); + +static constexpr int SHADER_INPUT_VERTEX_COLOR = 0; +static constexpr int SHADER_INPUT_FIRST_TEXCOORD = 4; + // See: https://www.khronos.org/opengl/wiki/Common_Mistakes#Texture_edge_color_problem // FIXME: make this dynamically configurable through ConfigServer static constexpr bool CLAMP_DEPRECATED_BEHAVIOR = false; diff --git a/Userland/Libraries/LibSoftGPU/Device.cpp b/Userland/Libraries/LibSoftGPU/Device.cpp index 87108c05a7..98b5ef26e5 100644 --- a/Userland/Libraries/LibSoftGPU/Device.cpp +++ b/Userland/Libraries/LibSoftGPU/Device.cpp @@ -578,9 +578,10 @@ void Device::rasterize_line_antialiased(GPU::Vertex& from, GPU::Vertex& to) [&from_color4, &from, &from_fog_depth4](auto& quad) { // FIXME: interpolate color, tex coords and fog depth along the distance of the line // in clip space (i.e. NOT distance_from_line) - quad.vertex_color = from_color4; + quad.set_input(SHADER_INPUT_VERTEX_COLOR, from_color4); for (size_t i = 0; i < GPU::NUM_TEXTURE_UNITS; ++i) - quad.texture_coordinates[i] = expand4(from.tex_coords[i]); + quad.set_input(SHADER_INPUT_FIRST_TEXCOORD + i * 4, expand4(from.tex_coords[i])); + quad.fog_depth = from_fog_depth4; }); } @@ -625,9 +626,10 @@ void Device::rasterize_point_aliased(GPU::Vertex& point) quad.depth = expand4(point.window_coordinates.z()); }, [&point](auto& quad) { - quad.vertex_color = expand4(point.color); + quad.set_input(SHADER_INPUT_VERTEX_COLOR, expand4(point.color)); for (size_t i = 0; i < GPU::NUM_TEXTURE_UNITS; ++i) - quad.texture_coordinates[i] = expand4(point.tex_coords[i]); + quad.set_input(SHADER_INPUT_FIRST_TEXCOORD + i * 4, expand4(point.tex_coords[i])); + quad.fog_depth = expand4(abs(point.eye_coordinates.z())); }); } @@ -660,9 +662,10 @@ void Device::rasterize_point_antialiased(GPU::Vertex& point) quad.depth = expand4(point.window_coordinates.z()); }, [&point](auto& quad) { - quad.vertex_color = expand4(point.color); + quad.set_input(SHADER_INPUT_VERTEX_COLOR, expand4(point.color)); for (size_t i = 0; i < GPU::NUM_TEXTURE_UNITS; ++i) - quad.texture_coordinates[i] = expand4(point.tex_coords[i]); + quad.set_input(SHADER_INPUT_FIRST_TEXCOORD + i * 4, expand4(point.tex_coords[i])); + quad.fog_depth = expand4(abs(point.eye_coordinates.z())); }); } @@ -810,12 +813,12 @@ void Device::rasterize_triangle(Triangle& triangle) // FIXME: make this more generic. We want to interpolate more than just color and uv if (m_options.shade_smooth) - quad.vertex_color = interpolate(expand4(vertex0.color), expand4(vertex1.color), expand4(vertex2.color), quad.barycentrics); + quad.set_input(SHADER_INPUT_VERTEX_COLOR, interpolate(expand4(vertex0.color), expand4(vertex1.color), expand4(vertex2.color), quad.barycentrics)); else - quad.vertex_color = expand4(vertex0.color); + quad.set_input(SHADER_INPUT_VERTEX_COLOR, expand4(vertex0.color)); for (GPU::TextureUnitIndex i = 0; i < GPU::NUM_TEXTURE_UNITS; ++i) - quad.texture_coordinates[i] = interpolate(expand4(vertex0.tex_coords[i]), expand4(vertex1.tex_coords[i]), expand4(vertex2.tex_coords[i]), quad.barycentrics); + quad.set_input(SHADER_INPUT_FIRST_TEXCOORD + i * 4, interpolate(expand4(vertex0.tex_coords[i]), expand4(vertex1.tex_coords[i]), expand4(vertex2.tex_coords[i]), quad.barycentrics)); if (m_options.fog_enabled) quad.fog_depth = fog_depth.dot(quad.barycentrics); @@ -1208,14 +1211,16 @@ ALWAYS_INLINE void Device::shade_fragments(PixelQuad& quad) { Array, GPU::NUM_TEXTURE_UNITS> texture_stage_texel; - auto current_color = quad.vertex_color; + auto current_color = quad.get_input_vector4(SHADER_INPUT_VERTEX_COLOR); + for (GPU::TextureUnitIndex i = 0; i < GPU::NUM_TEXTURE_UNITS; ++i) { if (!m_texture_unit_configuration[i].enabled) continue; auto const& sampler = m_samplers[i]; // OpenGL 2.0 ΒΆ 3.5.1 states (in a roundabout way) that texture coordinates must be divided by Q - auto texel = sampler.sample_2d(quad.texture_coordinates[i].xy() / quad.texture_coordinates[i].w()); + auto homogeneous_texture_coordinate = quad.get_input_vector4(SHADER_INPUT_FIRST_TEXCOORD + i * 4); + auto texel = sampler.sample_2d(homogeneous_texture_coordinate.xy() / homogeneous_texture_coordinate.w()); texture_stage_texel[i] = texel; INCREASE_STATISTICS_COUNTER(g_num_sampler_calls, 1); @@ -1244,7 +1249,7 @@ ALWAYS_INLINE void Device::shade_fragments(PixelQuad& quad) case GPU::TextureSource::Previous: return current_color; case GPU::TextureSource::PrimaryColor: - return quad.vertex_color; + return quad.get_input_vector4(SHADER_INPUT_VERTEX_COLOR); case GPU::TextureSource::Texture: return texel; case GPU::TextureSource::TextureStage: diff --git a/Userland/Libraries/LibSoftGPU/PixelQuad.h b/Userland/Libraries/LibSoftGPU/PixelQuad.h index 009ff33c03..643ea4a07e 100644 --- a/Userland/Libraries/LibSoftGPU/PixelQuad.h +++ b/Userland/Libraries/LibSoftGPU/PixelQuad.h @@ -21,11 +21,29 @@ using AK::SIMD::f32x4; using AK::SIMD::i32x4; struct PixelQuad final { + void set_input(int index, f32x4 value) { inputs[index] = value; } + f32x4 get_input_float(int index) const { return inputs[index]; } + + void set_input(int index, Vector4 const& value) + { + inputs[index] = value.x(); + inputs[index + 1] = value.y(); + inputs[index + 2] = value.z(); + inputs[index + 3] = value.w(); + } + Vector4 get_input_vector4(int index) const + { + return Vector4( + inputs[index], + inputs[index + 1], + inputs[index + 2], + inputs[index + 3]); + } + Vector2 screen_coordinates; Vector3 barycentrics; f32x4 depth; - Vector4 vertex_color; - Array, GPU::NUM_TEXTURE_UNITS> texture_coordinates; + Array inputs; Vector4 out_color; f32x4 fog_depth; i32x4 mask;