1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-14 06:14:58 +00:00

LibJS: Make line-and-column resolution fast for large minified JS

Instead of caching start-of-line offsets, we now cache byte offsets
at regular intervals. This fixes an issue where we had terrible
performance on large minified JS, since that often means one very,
VERY long line (with no line endings to cache).

My machine was spending ~35ms per stack frame when throwing errors
on some heavy minified websites, and after this patch, we now spend
<1ms per stack frame.
This commit is contained in:
Andreas Kling 2023-09-12 13:03:56 +02:00
parent ff02de4ad0
commit 44b2735b9e
4 changed files with 67 additions and 35 deletions

View file

@ -0,0 +1,17 @@
/*
* Copyright (c) 2023, Andreas Kling <kling@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
namespace JS {
struct Position {
size_t line { 0 };
size_t column { 0 };
size_t offset { 0 };
};
}

View file

@ -33,22 +33,38 @@ String const& SourceCode::code() const
return m_code;
}
void SourceCode::compute_line_break_offsets() const
void SourceCode::fill_position_cache() const
{
m_line_break_offsets = Vector<size_t> {};
constexpr size_t minimum_distance_between_cached_positions = 10000;
if (m_code.is_empty())
return;
bool previous_code_point_was_carriage_return = false;
Utf8View view(m_code);
size_t line = 1;
size_t column = 1;
size_t offset_of_last_starting_point = 0;
m_cached_positions.ensure_capacity(m_code.bytes().size() / minimum_distance_between_cached_positions);
m_cached_positions.append({ .line = 1, .column = 1, .offset = 0 });
Utf8View const view(m_code);
for (auto it = view.begin(); it != view.end(); ++it) {
u32 code_point = *it;
bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
previous_code_point_was_carriage_return = code_point == '\r';
if (is_line_terminator)
m_line_break_offsets->append(view.byte_offset_of(it));
auto byte_offset = view.byte_offset_of(it);
if ((byte_offset - offset_of_last_starting_point) >= minimum_distance_between_cached_positions) {
m_cached_positions.append({ .line = line, .column = column, .offset = byte_offset });
offset_of_last_starting_point = byte_offset;
}
if (is_line_terminator) {
line += 1;
column = 1;
} else {
column += 1;
}
}
}
@ -58,34 +74,35 @@ SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) con
if (m_code.is_empty())
return { *this, { .line = 1, .column = 1, .offset = 0 }, { .line = 1, .column = 1, .offset = 0 } };
if (!m_line_break_offsets.has_value())
compute_line_break_offsets();
if (m_cached_positions.is_empty())
fill_position_cache();
size_t line = 1;
size_t nearest_line_break_index = 0;
size_t nearest_preceding_line_break_offset = 0;
Position current { .line = 1, .column = 1, .offset = 0 };
if (!m_line_break_offsets->is_empty()) {
binary_search(*m_line_break_offsets, start_offset, &nearest_line_break_index);
line = 1 + nearest_line_break_index;
nearest_preceding_line_break_offset = (*m_line_break_offsets)[nearest_line_break_index];
if (!m_cached_positions.is_empty()) {
Position const dummy;
size_t nearest_index = 0;
binary_search(m_cached_positions, dummy, &nearest_index,
[&](auto&, auto& starting_point) {
return start_offset - starting_point.offset;
});
current = m_cached_positions[nearest_index];
}
Optional<Position> start;
Optional<Position> end;
size_t column = 1;
bool previous_code_point_was_carriage_return = false;
Utf8View view(m_code);
for (auto it = view.iterator_at_byte_offset_without_validation(nearest_preceding_line_break_offset); it != view.end(); ++it) {
Utf8View const view(m_code);
for (auto it = view.iterator_at_byte_offset_without_validation(current.offset); it != view.end(); ++it) {
// If we're on or after the start offset, this is the start position.
if (!start.has_value() && view.byte_offset_of(it) >= start_offset) {
start = Position {
.line = line,
.column = column,
.line = current.line,
.column = current.column,
.offset = start_offset,
};
}
@ -93,8 +110,8 @@ SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) con
// If we're on or after the end offset, this is the end position.
if (!end.has_value() && view.byte_offset_of(it) >= end_offset) {
end = Position {
.line = line,
.column = column,
.line = current.line,
.column = current.column,
.offset = end_offset,
};
break;
@ -102,15 +119,15 @@ SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) con
u32 code_point = *it;
bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR;
previous_code_point_was_carriage_return = code_point == '\r';
if (is_line_terminator) {
++line;
column = 1;
current.line += 1;
current.column = 1;
continue;
}
++column;
current.column += 1;
}
// If we didn't find both a start and end position, just return 1,1-1,1.

View file

@ -9,6 +9,7 @@
#include <AK/String.h>
#include <AK/Vector.h>
#include <LibJS/Forward.h>
#include <LibJS/Position.h>
namespace JS {
@ -24,12 +25,14 @@ public:
private:
SourceCode(String filename, String code);
void compute_line_break_offsets() const;
String m_filename;
String m_code;
Optional<Vector<size_t>> mutable m_line_break_offsets;
// For fast mapping of offsets to line/column numbers, we build a list of
// starting points (with byte offsets into the source string) and which
// line:column they map to. This can then be binary-searched.
void fill_position_cache() const;
Vector<Position> mutable m_cached_positions;
};
}

View file

@ -10,16 +10,11 @@
#include <AK/NonnullRefPtr.h>
#include <AK/StringView.h>
#include <AK/Types.h>
#include <LibJS/Position.h>
#include <LibJS/SourceCode.h>
namespace JS {
struct Position {
size_t line { 0 };
size_t column { 0 };
size_t offset { 0 };
};
struct SourceRange {
[[nodiscard]] bool contains(Position const& position) const { return position.offset <= end.offset && position.offset >= start.offset; }