mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 07:07:34 +00:00
LibSoftGPU: Use float
instead of int
for triangle screen coords
This replaces the fixed point subpixel precision logic. GLQuake now effectively renders artifact-free. Previously white/gray pixels would sometimes be visible at triangle edges, caused by slightly misaligned triangle edges as a result of converting the vertex window coordinates to `int`. These artifacts were reduced by the introduction of subpixel precision, but not completely eliminated. Some interesting changes in this commit: * Applying the top-left rule for our counter-clockwise vertices is now done with simpler conditions: every vertex that has a Y coordinate lower than or equal to the previous vertex' Y coordinate is counted as a top or left edge. A float epsilon is used to emulate a switch between `> 0` and `>= 0` comparisons. * Fog depth calculation into a `f32x4` is now done once per triangle instead of once per fragment, and only if fog is enabled. * The `one_over_area` value was previously calculated as `1.0f / area`, where `area` was an `int`. This resulted in a lower quality reciprocal value whereas we can now retain floating point precision. The effect of this can be seen in Tux Racer, where the ice reflection is noticeably smoother.
This commit is contained in:
parent
6133acb8c0
commit
37dd10fbbe
3 changed files with 58 additions and 68 deletions
|
@ -17,7 +17,6 @@ namespace SoftGPU {
|
||||||
static constexpr bool ENABLE_STATISTICS_OVERLAY = false;
|
static constexpr bool ENABLE_STATISTICS_OVERLAY = false;
|
||||||
static constexpr int NUM_SAMPLERS = 2;
|
static constexpr int NUM_SAMPLERS = 2;
|
||||||
static constexpr int MILLISECONDS_PER_STATISTICS_PERIOD = 500;
|
static constexpr int MILLISECONDS_PER_STATISTICS_PERIOD = 500;
|
||||||
static constexpr int SUBPIXEL_BITS = 5;
|
|
||||||
static constexpr int NUM_LIGHTS = 8;
|
static constexpr int NUM_LIGHTS = 8;
|
||||||
|
|
||||||
// See: https://www.khronos.org/opengl/wiki/Common_Mistakes#Texture_edge_color_problem
|
// See: https://www.khronos.org/opengl/wiki/Common_Mistakes#Texture_edge_color_problem
|
||||||
|
|
|
@ -47,14 +47,14 @@ using AK::SIMD::to_f32x4;
|
||||||
using AK::SIMD::to_u32x4;
|
using AK::SIMD::to_u32x4;
|
||||||
using AK::SIMD::u32x4;
|
using AK::SIMD::u32x4;
|
||||||
|
|
||||||
constexpr static int edge_function(const IntVector2& a, const IntVector2& b, const IntVector2& c)
|
constexpr static float edge_function(const FloatVector2& a, const FloatVector2& b, const FloatVector2& c)
|
||||||
{
|
{
|
||||||
return ((c.x() - a.x()) * (b.y() - a.y()) - (c.y() - a.y()) * (b.x() - a.x()));
|
return (c.x() - a.x()) * (b.y() - a.y()) - (c.y() - a.y()) * (b.x() - a.x());
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr static i32x4 edge_function4(const IntVector2& a, const IntVector2& b, const Vector2<i32x4>& c)
|
constexpr static f32x4 edge_function4(const FloatVector2& a, const FloatVector2& b, const Vector2<f32x4>& c)
|
||||||
{
|
{
|
||||||
return ((c.x() - a.x()) * (b.y() - a.y()) - (c.y() - a.y()) * (b.x() - a.x()));
|
return (c.x() - a.x()) * (b.y() - a.y()) - (c.y() - a.y()) * (b.x() - a.x());
|
||||||
}
|
}
|
||||||
|
|
||||||
template<typename T, typename U>
|
template<typename T, typename U>
|
||||||
|
@ -191,38 +191,20 @@ void Device::rasterize_triangle(const Triangle& triangle)
|
||||||
Vertex const vertex1 = triangle.vertices[1];
|
Vertex const vertex1 = triangle.vertices[1];
|
||||||
Vertex const vertex2 = triangle.vertices[2];
|
Vertex const vertex2 = triangle.vertices[2];
|
||||||
|
|
||||||
constexpr int subpixel_factor = 1 << SUBPIXEL_BITS;
|
|
||||||
|
|
||||||
// Calculate area of the triangle for later tests
|
// Calculate area of the triangle for later tests
|
||||||
IntVector2 const v0 { static_cast<int>(vertex0.window_coordinates.x() * subpixel_factor), static_cast<int>(vertex0.window_coordinates.y() * subpixel_factor) };
|
FloatVector2 const v0 { vertex0.window_coordinates.x(), vertex0.window_coordinates.y() };
|
||||||
IntVector2 const v1 { static_cast<int>(vertex1.window_coordinates.x() * subpixel_factor), static_cast<int>(vertex1.window_coordinates.y() * subpixel_factor) };
|
FloatVector2 const v1 { vertex1.window_coordinates.x(), vertex1.window_coordinates.y() };
|
||||||
IntVector2 const v2 { static_cast<int>(vertex2.window_coordinates.x() * subpixel_factor), static_cast<int>(vertex2.window_coordinates.y() * subpixel_factor) };
|
FloatVector2 const v2 { vertex2.window_coordinates.x(), vertex2.window_coordinates.y() };
|
||||||
|
|
||||||
int area = edge_function(v0, v1, v2);
|
|
||||||
if (area == 0)
|
|
||||||
return;
|
|
||||||
|
|
||||||
|
auto const area = edge_function(v0, v1, v2);
|
||||||
auto const one_over_area = 1.0f / area;
|
auto const one_over_area = 1.0f / area;
|
||||||
|
|
||||||
auto render_bounds = m_frame_buffer->rect();
|
auto render_bounds = m_frame_buffer->rect();
|
||||||
if (m_options.scissor_enabled)
|
if (m_options.scissor_enabled)
|
||||||
render_bounds.intersect(m_options.scissor_box);
|
render_bounds.intersect(m_options.scissor_box);
|
||||||
|
|
||||||
// Obey top-left rule:
|
|
||||||
// This sets up "zero" for later pixel coverage tests.
|
|
||||||
// Depending on where on the triangle the edge is located
|
|
||||||
// it is either tested against 0 or 1, effectively
|
|
||||||
// turning "< 0" into "<= 0"
|
|
||||||
IntVector3 zero { 1, 1, 1 };
|
|
||||||
if (v1.y() > v0.y() || (v1.y() == v0.y() && v1.x() < v0.x()))
|
|
||||||
zero.set_z(0);
|
|
||||||
if (v2.y() > v1.y() || (v2.y() == v1.y() && v2.x() < v1.x()))
|
|
||||||
zero.set_x(0);
|
|
||||||
if (v0.y() > v2.y() || (v0.y() == v2.y() && v0.x() < v2.x()))
|
|
||||||
zero.set_y(0);
|
|
||||||
|
|
||||||
// This function calculates the 3 edge values for the pixel relative to the triangle.
|
// This function calculates the 3 edge values for the pixel relative to the triangle.
|
||||||
auto calculate_edge_values4 = [v0, v1, v2](Vector2<i32x4> const& p) -> Vector3<i32x4> {
|
auto calculate_edge_values4 = [v0, v1, v2](Vector2<f32x4> const& p) -> Vector3<f32x4> {
|
||||||
return {
|
return {
|
||||||
edge_function4(v1, v2, p),
|
edge_function4(v1, v2, p),
|
||||||
edge_function4(v2, v0, p),
|
edge_function4(v2, v0, p),
|
||||||
|
@ -230,8 +212,22 @@ void Device::rasterize_triangle(const Triangle& triangle)
|
||||||
};
|
};
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// Zero is used in testing against edge values below, applying the "top-left rule". If a pixel
|
||||||
|
// lies exactly on an edge shared by two triangles, we only render that pixel if the edge in
|
||||||
|
// question is a "top" or "left" edge. We can detect those easily by testing for Y2 <= Y1,
|
||||||
|
// since we know our vertices are in CCW order. By changing a float epsilon to 0, we
|
||||||
|
// effectively change the comparisons against the edge values below from "> 0" into ">= 0".
|
||||||
|
constexpr auto epsilon = NumericLimits<float>::epsilon();
|
||||||
|
FloatVector3 zero { epsilon, epsilon, epsilon };
|
||||||
|
if (v2.y() <= v1.y())
|
||||||
|
zero.set_x(0.f);
|
||||||
|
if (v0.y() <= v2.y())
|
||||||
|
zero.set_y(0.f);
|
||||||
|
if (v1.y() <= v0.y())
|
||||||
|
zero.set_z(0.f);
|
||||||
|
|
||||||
// This function tests whether a point as identified by its 3 edge values lies within the triangle
|
// This function tests whether a point as identified by its 3 edge values lies within the triangle
|
||||||
auto test_point4 = [zero](Vector3<i32x4> const& edges) -> i32x4 {
|
auto test_point4 = [zero](Vector3<f32x4> const& edges) -> i32x4 {
|
||||||
return edges.x() >= zero.x()
|
return edges.x() >= zero.x()
|
||||||
&& edges.y() >= zero.y()
|
&& edges.y() >= zero.y()
|
||||||
&& edges.z() >= zero.z();
|
&& edges.z() >= zero.z();
|
||||||
|
@ -239,26 +235,30 @@ void Device::rasterize_triangle(const Triangle& triangle)
|
||||||
|
|
||||||
// Calculate block-based bounds
|
// Calculate block-based bounds
|
||||||
// clang-format off
|
// clang-format off
|
||||||
int const bx0 = max(render_bounds.left(), min(min(v0.x(), v1.x()), v2.x()) / subpixel_factor) & ~1;
|
int const bx0 = max(render_bounds.left(), min(min(v0.x(), v1.x()), v2.x())) & ~1;
|
||||||
int const bx1 = (min(render_bounds.right(), max(max(v0.x(), v1.x()), v2.x()) / subpixel_factor) & ~1) + 2;
|
int const bx1 = (min(render_bounds.right(), max(max(v0.x(), v1.x()), v2.x())) & ~1) + 2;
|
||||||
int const by0 = max(render_bounds.top(), min(min(v0.y(), v1.y()), v2.y()) / subpixel_factor) & ~1;
|
int const by0 = max(render_bounds.top(), min(min(v0.y(), v1.y()), v2.y())) & ~1;
|
||||||
int const by1 = (min(render_bounds.bottom(), max(max(v0.y(), v1.y()), v2.y()) / subpixel_factor) & ~1) + 2;
|
int const by1 = (min(render_bounds.bottom(), max(max(v0.y(), v1.y()), v2.y())) & ~1) + 2;
|
||||||
// clang-format on
|
// clang-format on
|
||||||
|
|
||||||
// Fog depths
|
// Calculate depth of fragment for fog;
|
||||||
float const vertex0_eye_absz = fabsf(vertex0.eye_coordinates.z());
|
// OpenGL 1.5 spec chapter 3.10: "An implementation may choose to approximate the
|
||||||
float const vertex1_eye_absz = fabsf(vertex1.eye_coordinates.z());
|
// eye-coordinate distance from the eye to each fragment center by |Ze|."
|
||||||
float const vertex2_eye_absz = fabsf(vertex2.eye_coordinates.z());
|
f32x4 vertex0_fog_depth;
|
||||||
|
f32x4 vertex1_fog_depth;
|
||||||
|
f32x4 vertex2_fog_depth;
|
||||||
|
if (m_options.fog_enabled) {
|
||||||
|
vertex0_fog_depth = expand4(fabsf(vertex0.eye_coordinates.z()));
|
||||||
|
vertex1_fog_depth = expand4(fabsf(vertex1.eye_coordinates.z()));
|
||||||
|
vertex2_fog_depth = expand4(fabsf(vertex2.eye_coordinates.z()));
|
||||||
|
}
|
||||||
|
|
||||||
int const render_bounds_left = render_bounds.x();
|
float const render_bounds_left = render_bounds.left();
|
||||||
int const render_bounds_right = render_bounds.x() + render_bounds.width();
|
float const render_bounds_right = render_bounds.right();
|
||||||
int const render_bounds_top = render_bounds.y();
|
float const render_bounds_top = render_bounds.top();
|
||||||
int const render_bounds_bottom = render_bounds.y() + render_bounds.height();
|
float const render_bounds_bottom = render_bounds.bottom();
|
||||||
|
|
||||||
auto const half_pixel_offset = Vector2<i32x4> {
|
auto const half_pixel_offset = Vector2<f32x4> { expand4(.5f), expand4(.5f) };
|
||||||
expand4(subpixel_factor / 2),
|
|
||||||
expand4(subpixel_factor / 2),
|
|
||||||
};
|
|
||||||
|
|
||||||
auto color_buffer = m_frame_buffer->color_buffer();
|
auto color_buffer = m_frame_buffer->color_buffer();
|
||||||
auto depth_buffer = m_frame_buffer->depth_buffer();
|
auto depth_buffer = m_frame_buffer->depth_buffer();
|
||||||
|
@ -304,24 +304,26 @@ void Device::rasterize_triangle(const Triangle& triangle)
|
||||||
|
|
||||||
// Iterate over all blocks within the bounds of the triangle
|
// Iterate over all blocks within the bounds of the triangle
|
||||||
for (int by = by0; by < by1; by += 2) {
|
for (int by = by0; by < by1; by += 2) {
|
||||||
|
auto const f_by = static_cast<float>(by);
|
||||||
for (int bx = bx0; bx < bx1; bx += 2) {
|
for (int bx = bx0; bx < bx1; bx += 2) {
|
||||||
PixelQuad quad;
|
PixelQuad quad;
|
||||||
|
|
||||||
|
auto const f_bx = static_cast<float>(bx);
|
||||||
quad.screen_coordinates = {
|
quad.screen_coordinates = {
|
||||||
i32x4 { bx, bx + 1, bx, bx + 1 },
|
f32x4 { f_bx, f_bx + 1, f_bx, f_bx + 1 },
|
||||||
i32x4 { by, by, by + 1, by + 1 },
|
f32x4 { f_by, f_by, f_by + 1, f_by + 1 },
|
||||||
};
|
};
|
||||||
|
|
||||||
auto edge_values = calculate_edge_values4(quad.screen_coordinates * subpixel_factor + half_pixel_offset);
|
auto edge_values = calculate_edge_values4(quad.screen_coordinates + half_pixel_offset);
|
||||||
|
|
||||||
// Generate triangle coverage mask
|
// Generate triangle coverage mask
|
||||||
quad.mask = test_point4(edge_values);
|
quad.mask = test_point4(edge_values);
|
||||||
|
|
||||||
// Test quad against intersection of render target size and scissor rect
|
// Test quad against intersection of render target size and scissor rect
|
||||||
quad.mask &= quad.screen_coordinates.x() >= render_bounds_left
|
quad.mask &= quad.screen_coordinates.x() >= render_bounds_left
|
||||||
&& quad.screen_coordinates.x() < render_bounds_right
|
&& quad.screen_coordinates.x() <= render_bounds_right
|
||||||
&& quad.screen_coordinates.y() >= render_bounds_top
|
&& quad.screen_coordinates.y() >= render_bounds_top
|
||||||
&& quad.screen_coordinates.y() < render_bounds_bottom;
|
&& quad.screen_coordinates.y() <= render_bounds_bottom;
|
||||||
|
|
||||||
if (none(quad.mask))
|
if (none(quad.mask))
|
||||||
continue;
|
continue;
|
||||||
|
@ -329,13 +331,6 @@ void Device::rasterize_triangle(const Triangle& triangle)
|
||||||
INCREASE_STATISTICS_COUNTER(g_num_quads, 1);
|
INCREASE_STATISTICS_COUNTER(g_num_quads, 1);
|
||||||
INCREASE_STATISTICS_COUNTER(g_num_pixels, maskcount(quad.mask));
|
INCREASE_STATISTICS_COUNTER(g_num_pixels, maskcount(quad.mask));
|
||||||
|
|
||||||
// Calculate barycentric coordinates from previously calculated edge values
|
|
||||||
quad.barycentrics = Vector3<f32x4> {
|
|
||||||
to_f32x4(edge_values.x()),
|
|
||||||
to_f32x4(edge_values.y()),
|
|
||||||
to_f32x4(edge_values.z()),
|
|
||||||
} * one_over_area;
|
|
||||||
|
|
||||||
int coverage_bits = maskbits(quad.mask);
|
int coverage_bits = maskbits(quad.mask);
|
||||||
|
|
||||||
// Stencil testing
|
// Stencil testing
|
||||||
|
@ -395,6 +390,9 @@ void Device::rasterize_triangle(const Triangle& triangle)
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate barycentric coordinates from previously calculated edge values
|
||||||
|
quad.barycentrics = edge_values * one_over_area;
|
||||||
|
|
||||||
// Depth testing
|
// Depth testing
|
||||||
DepthType* depth_ptrs[4] = {
|
DepthType* depth_ptrs[4] = {
|
||||||
coverage_bits & 1 ? &depth_buffer->scanline(by)[bx] : nullptr,
|
coverage_bits & 1 ? &depth_buffer->scanline(by)[bx] : nullptr,
|
||||||
|
@ -507,8 +505,7 @@ void Device::rasterize_triangle(const Triangle& triangle)
|
||||||
};
|
};
|
||||||
|
|
||||||
auto const interpolated_reciprocal_w = interpolate(w_coordinates.x(), w_coordinates.y(), w_coordinates.z(), quad.barycentrics);
|
auto const interpolated_reciprocal_w = interpolate(w_coordinates.x(), w_coordinates.y(), w_coordinates.z(), quad.barycentrics);
|
||||||
auto const interpolated_w = 1.0f / interpolated_reciprocal_w;
|
quad.barycentrics = quad.barycentrics * w_coordinates / interpolated_reciprocal_w;
|
||||||
quad.barycentrics = quad.barycentrics * w_coordinates * interpolated_w;
|
|
||||||
|
|
||||||
// FIXME: make this more generic. We want to interpolate more than just color and uv
|
// FIXME: make this more generic. We want to interpolate more than just color and uv
|
||||||
if (m_options.shade_smooth)
|
if (m_options.shade_smooth)
|
||||||
|
@ -519,14 +516,8 @@ void Device::rasterize_triangle(const Triangle& triangle)
|
||||||
for (size_t i = 0; i < NUM_SAMPLERS; ++i)
|
for (size_t i = 0; i < NUM_SAMPLERS; ++i)
|
||||||
quad.texture_coordinates[i] = interpolate(expand4(vertex0.tex_coords[i]), expand4(vertex1.tex_coords[i]), expand4(vertex2.tex_coords[i]), quad.barycentrics);
|
quad.texture_coordinates[i] = interpolate(expand4(vertex0.tex_coords[i]), expand4(vertex1.tex_coords[i]), expand4(vertex2.tex_coords[i]), quad.barycentrics);
|
||||||
|
|
||||||
if (m_options.fog_enabled) {
|
if (m_options.fog_enabled)
|
||||||
// Calculate depth of fragment for fog
|
quad.fog_depth = interpolate(vertex0_fog_depth, vertex1_fog_depth, vertex2_fog_depth, quad.barycentrics);
|
||||||
//
|
|
||||||
// OpenGL 1.5 spec chapter 3.10: "An implementation may choose to approximate the
|
|
||||||
// eye-coordinate distance from the eye to each fragment center by |Ze|."
|
|
||||||
|
|
||||||
quad.fog_depth = interpolate(expand4(vertex0_eye_absz), expand4(vertex1_eye_absz), expand4(vertex2_eye_absz), quad.barycentrics);
|
|
||||||
}
|
|
||||||
|
|
||||||
shade_fragments(quad);
|
shade_fragments(quad);
|
||||||
|
|
||||||
|
|
|
@ -15,7 +15,7 @@
|
||||||
namespace SoftGPU {
|
namespace SoftGPU {
|
||||||
|
|
||||||
struct PixelQuad final {
|
struct PixelQuad final {
|
||||||
Vector2<AK::SIMD::i32x4> screen_coordinates;
|
Vector2<AK::SIMD::f32x4> screen_coordinates;
|
||||||
Vector3<AK::SIMD::f32x4> barycentrics;
|
Vector3<AK::SIMD::f32x4> barycentrics;
|
||||||
AK::SIMD::f32x4 depth;
|
AK::SIMD::f32x4 depth;
|
||||||
Vector4<AK::SIMD::f32x4> vertex_color;
|
Vector4<AK::SIMD::f32x4> vertex_color;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue