From 4c1d8a7785c90069ee0719318c4d8d1f3c593be3 Mon Sep 17 00:00:00 2001 From: Jelle Raaijmakers Date: Sun, 10 Apr 2022 02:50:26 +0200 Subject: [PATCH] LibSoftGPU: Optimize clipping code Three optimizations are applied: 1. If the list of vertices to clip is empty, return immediately after clearing the output list. 2. Remember the previous vertex instead of recalculating whether it is within the clip plane. 3. Instead of copying and swapping lists around, operate on the input and output lists directly. This prevents a lot of `malloc`/`free` traffic as a result of vector assignments. This takes the clipping code CPU load from 3.9% down to 1.8% for Quake 3 on my machine. --- Userland/Libraries/LibSoftGPU/Clipper.cpp | 50 +++++++++++------------ Userland/Libraries/LibSoftGPU/Clipper.h | 3 +- 2 files changed, 25 insertions(+), 28 deletions(-) diff --git a/Userland/Libraries/LibSoftGPU/Clipper.cpp b/Userland/Libraries/LibSoftGPU/Clipper.cpp index 0636dfc3d1..a6ba387db5 100644 --- a/Userland/Libraries/LibSoftGPU/Clipper.cpp +++ b/Userland/Libraries/LibSoftGPU/Clipper.cpp @@ -65,43 +65,41 @@ static constexpr GPU::Vertex clip_intersection_point(GPU::Vertex const& p1, GPU: } template -FLATTEN static constexpr void clip_plane(Vector& read_list, Vector& write_list) +FLATTEN static constexpr void clip_plane(Vector& input_list, Vector& output_list) { - auto read_from = &read_list; - auto write_to = &write_list; + output_list.clear_with_capacity(); - write_to->clear_with_capacity(); - for (size_t i = 0; i < read_from->size(); i++) { - auto const& curr_vec = read_from->at((i + 1) % read_from->size()); - auto const& prev_vec = read_from->at(i); + auto input_list_size = input_list.size(); + if (input_list_size == 0) + return; - bool const is_curr_point_within_clip_plane = point_within_clip_plane(curr_vec.clip_coordinates); - bool const is_prev_point_within_clip_plane = point_within_clip_plane(prev_vec.clip_coordinates); - if (is_curr_point_within_clip_plane != is_prev_point_within_clip_plane) { - auto const intersect = clip_intersection_point(prev_vec, curr_vec); - write_to->append(intersect); - } + auto const* prev_vec = &input_list.data()[0]; + auto is_prev_point_within_clip_plane = point_within_clip_plane(prev_vec->clip_coordinates); + + for (size_t i = 1; i <= input_list_size; i++) { + auto const& curr_vec = input_list[i % input_list_size]; + auto const is_curr_point_within_clip_plane = point_within_clip_plane(curr_vec.clip_coordinates); + + if (is_curr_point_within_clip_plane != is_prev_point_within_clip_plane) + output_list.append(clip_intersection_point(*prev_vec, curr_vec)); if (is_curr_point_within_clip_plane) - write_to->append(curr_vec); + output_list.append(curr_vec); + + prev_vec = &curr_vec; + is_prev_point_within_clip_plane = is_curr_point_within_clip_plane; } - swap(write_list, read_list); } void Clipper::clip_triangle_against_frustum(Vector& input_verts) { - list_a = input_verts; - list_b.clear_with_capacity(); - // FIXME C++23. Static reflection will provide looping over all enum values. - clip_plane(list_a, list_b); - clip_plane(list_a, list_b); - clip_plane(list_a, list_b); - clip_plane(list_a, list_b); - clip_plane(list_a, list_b); - clip_plane(list_a, list_b); - - input_verts = list_a; + clip_plane(input_verts, m_vertex_buffer); + clip_plane(m_vertex_buffer, input_verts); + clip_plane(input_verts, m_vertex_buffer); + clip_plane(m_vertex_buffer, input_verts); + clip_plane(input_verts, m_vertex_buffer); + clip_plane(m_vertex_buffer, input_verts); } } diff --git a/Userland/Libraries/LibSoftGPU/Clipper.h b/Userland/Libraries/LibSoftGPU/Clipper.h index 108626462c..f944088331 100644 --- a/Userland/Libraries/LibSoftGPU/Clipper.h +++ b/Userland/Libraries/LibSoftGPU/Clipper.h @@ -29,8 +29,7 @@ public: void clip_triangle_against_frustum(Vector& input_vecs); private: - Vector list_a; - Vector list_b; + Vector m_vertex_buffer; }; }