1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 07:17:35 +00:00

LibGfx: Increase cache-friendliness in FastBoxBlurFilter

Change from 4 vectors of u8 to a single vector of a struct of 4
u8. Since all the accesses are on the same indicies into each vector
the cache pressure is decreased by co-locating the data.

When scrolling up and down in the `box-shadow` example page the
profile changes from about 75% of the function calls being in
`apply_single_pass()` to only about 70% on my machine.
This commit is contained in:
Lenny Maiorani 2022-06-23 13:11:05 -06:00 committed by Linus Groh
parent b652546a16
commit ef46100fd3

View file

@ -43,15 +43,8 @@ static void do_single_pass(int width, int height, size_t radius_x, size_t radius
int div_x = 2 * radius_x + 1;
int div_y = 2 * radius_y + 1;
Vector<u8, 1024> intermediate_red;
Vector<u8, 1024> intermediate_green;
Vector<u8, 1024> intermediate_blue;
Vector<u8, 1024> intermediate_alpha;
intermediate_red.resize(width * height);
intermediate_green.resize(width * height);
intermediate_blue.resize(width * height);
intermediate_alpha.resize(width * height);
Vector<Color, 1024> intermediate;
intermediate.resize(width * height);
// First pass: vertical
for (int y = 0; y < height; ++y) {
@ -70,10 +63,12 @@ static void do_single_pass(int width, int height, size_t radius_x, size_t radius
}
// Slide horizontally
for (int x = 0; x < width; ++x) {
intermediate_red[y * width + x] = (sum_red / div_x);
intermediate_green[y * width + x] = (sum_green / div_x);
intermediate_blue[y * width + x] = (sum_blue / div_x);
intermediate_alpha[y * width + x] = (sum_alpha / div_x);
auto const index = y * width + x;
auto& current_intermediate = intermediate[index];
current_intermediate.set_red(sum_red / div_x);
current_intermediate.set_green(sum_green / div_x);
current_intermediate.set_blue(sum_blue / div_x);
current_intermediate.set_alpha(sum_alpha / div_x);
auto leftmost_x_coord = max(x - (int)radius_x, 0);
auto rightmost_x_coord = min(x + (int)radius_x + 1, width - 1);
@ -102,10 +97,11 @@ static void do_single_pass(int width, int height, size_t radius_x, size_t radius
// Setup sliding window
for (int i = -(int)radius_y; i <= (int)radius_y; ++i) {
int offset = clamp(i, 0, height - 1) * width + x;
sum_red += intermediate_red[offset];
sum_green += intermediate_green[offset];
sum_blue += intermediate_blue[offset];
sum_alpha += intermediate_alpha[offset];
auto& current_intermediate = intermediate[offset];
sum_red += current_intermediate.red();
sum_green += current_intermediate.green();
sum_blue += current_intermediate.blue();
sum_alpha += current_intermediate.alpha();
}
for (int y = 0; y < height; ++y) {
@ -117,17 +113,21 @@ static void do_single_pass(int width, int height, size_t radius_x, size_t radius
set_pixel_function(x, y, color);
auto topmost_y_coord = max(y - (int)radius_y, 0);
auto bottommost_y_coord = min(y + (int)radius_y + 1, height - 1);
auto const bottommost_y_coord = min(y + (int)radius_y + 1, height - 1);
auto const bottom_index = x + bottommost_y_coord * width;
auto& bottom_intermediate = intermediate[bottom_index];
sum_red += bottom_intermediate.red();
sum_green += bottom_intermediate.green();
sum_blue += bottom_intermediate.blue();
sum_alpha += bottom_intermediate.alpha();
sum_red += intermediate_red[x + bottommost_y_coord * width];
sum_red -= intermediate_red[x + topmost_y_coord * width];
sum_green += intermediate_green[x + bottommost_y_coord * width];
sum_green -= intermediate_green[x + topmost_y_coord * width];
sum_blue += intermediate_blue[x + bottommost_y_coord * width];
sum_blue -= intermediate_blue[x + topmost_y_coord * width];
sum_alpha += intermediate_alpha[x + bottommost_y_coord * width];
sum_alpha -= intermediate_alpha[x + topmost_y_coord * width];
auto const topmost_y_coord = max(y - (int)radius_y, 0);
auto const top_index = x + topmost_y_coord * width;
auto& top_intermediate = intermediate[top_index];
sum_red -= top_intermediate.red();
sum_green -= top_intermediate.green();
sum_blue -= top_intermediate.blue();
sum_alpha -= top_intermediate.alpha();
}
}
}