diff --git a/Userland/Libraries/LibJS/Position.h b/Userland/Libraries/LibJS/Position.h new file mode 100644 index 0000000000..2491e6baac --- /dev/null +++ b/Userland/Libraries/LibJS/Position.h @@ -0,0 +1,17 @@ +/* + * Copyright (c) 2023, Andreas Kling + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +namespace JS { + +struct Position { + size_t line { 0 }; + size_t column { 0 }; + size_t offset { 0 }; +}; + +} diff --git a/Userland/Libraries/LibJS/SourceCode.cpp b/Userland/Libraries/LibJS/SourceCode.cpp index 060623ca50..2fc2bd1242 100644 --- a/Userland/Libraries/LibJS/SourceCode.cpp +++ b/Userland/Libraries/LibJS/SourceCode.cpp @@ -33,22 +33,38 @@ String const& SourceCode::code() const return m_code; } -void SourceCode::compute_line_break_offsets() const +void SourceCode::fill_position_cache() const { - m_line_break_offsets = Vector {}; + constexpr size_t minimum_distance_between_cached_positions = 10000; if (m_code.is_empty()) return; bool previous_code_point_was_carriage_return = false; - Utf8View view(m_code); + size_t line = 1; + size_t column = 1; + size_t offset_of_last_starting_point = 0; + m_cached_positions.ensure_capacity(m_code.bytes().size() / minimum_distance_between_cached_positions); + m_cached_positions.append({ .line = 1, .column = 1, .offset = 0 }); + + Utf8View const view(m_code); for (auto it = view.begin(); it != view.end(); ++it) { u32 code_point = *it; bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR; previous_code_point_was_carriage_return = code_point == '\r'; - if (is_line_terminator) - m_line_break_offsets->append(view.byte_offset_of(it)); + auto byte_offset = view.byte_offset_of(it); + if ((byte_offset - offset_of_last_starting_point) >= minimum_distance_between_cached_positions) { + m_cached_positions.append({ .line = line, .column = column, .offset = byte_offset }); + offset_of_last_starting_point = byte_offset; + } + + if (is_line_terminator) { + line += 1; + column = 1; + } else { + column += 1; + } } } @@ -58,34 +74,35 @@ SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) con if (m_code.is_empty()) return { *this, { .line = 1, .column = 1, .offset = 0 }, { .line = 1, .column = 1, .offset = 0 } }; - if (!m_line_break_offsets.has_value()) - compute_line_break_offsets(); + if (m_cached_positions.is_empty()) + fill_position_cache(); - size_t line = 1; - size_t nearest_line_break_index = 0; - size_t nearest_preceding_line_break_offset = 0; + Position current { .line = 1, .column = 1, .offset = 0 }; - if (!m_line_break_offsets->is_empty()) { - binary_search(*m_line_break_offsets, start_offset, &nearest_line_break_index); - line = 1 + nearest_line_break_index; - nearest_preceding_line_break_offset = (*m_line_break_offsets)[nearest_line_break_index]; + if (!m_cached_positions.is_empty()) { + Position const dummy; + size_t nearest_index = 0; + binary_search(m_cached_positions, dummy, &nearest_index, + [&](auto&, auto& starting_point) { + return start_offset - starting_point.offset; + }); + + current = m_cached_positions[nearest_index]; } Optional start; Optional end; - size_t column = 1; - bool previous_code_point_was_carriage_return = false; - Utf8View view(m_code); - for (auto it = view.iterator_at_byte_offset_without_validation(nearest_preceding_line_break_offset); it != view.end(); ++it) { + Utf8View const view(m_code); + for (auto it = view.iterator_at_byte_offset_without_validation(current.offset); it != view.end(); ++it) { // If we're on or after the start offset, this is the start position. if (!start.has_value() && view.byte_offset_of(it) >= start_offset) { start = Position { - .line = line, - .column = column, + .line = current.line, + .column = current.column, .offset = start_offset, }; } @@ -93,8 +110,8 @@ SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) con // If we're on or after the end offset, this is the end position. if (!end.has_value() && view.byte_offset_of(it) >= end_offset) { end = Position { - .line = line, - .column = column, + .line = current.line, + .column = current.column, .offset = end_offset, }; break; @@ -102,15 +119,15 @@ SourceRange SourceCode::range_from_offsets(u32 start_offset, u32 end_offset) con u32 code_point = *it; - bool is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR; + bool const is_line_terminator = code_point == '\r' || (code_point == '\n' && !previous_code_point_was_carriage_return) || code_point == LINE_SEPARATOR || code_point == PARAGRAPH_SEPARATOR; previous_code_point_was_carriage_return = code_point == '\r'; if (is_line_terminator) { - ++line; - column = 1; + current.line += 1; + current.column = 1; continue; } - ++column; + current.column += 1; } // If we didn't find both a start and end position, just return 1,1-1,1. diff --git a/Userland/Libraries/LibJS/SourceCode.h b/Userland/Libraries/LibJS/SourceCode.h index f48e3bdda9..71173f0923 100644 --- a/Userland/Libraries/LibJS/SourceCode.h +++ b/Userland/Libraries/LibJS/SourceCode.h @@ -9,6 +9,7 @@ #include #include #include +#include namespace JS { @@ -24,12 +25,14 @@ public: private: SourceCode(String filename, String code); - void compute_line_break_offsets() const; - String m_filename; String m_code; - Optional> mutable m_line_break_offsets; + // For fast mapping of offsets to line/column numbers, we build a list of + // starting points (with byte offsets into the source string) and which + // line:column they map to. This can then be binary-searched. + void fill_position_cache() const; + Vector mutable m_cached_positions; }; } diff --git a/Userland/Libraries/LibJS/SourceRange.h b/Userland/Libraries/LibJS/SourceRange.h index 05b1435e24..eec5e3c7df 100644 --- a/Userland/Libraries/LibJS/SourceRange.h +++ b/Userland/Libraries/LibJS/SourceRange.h @@ -10,16 +10,11 @@ #include #include #include +#include #include namespace JS { -struct Position { - size_t line { 0 }; - size_t column { 0 }; - size_t offset { 0 }; -}; - struct SourceRange { [[nodiscard]] bool contains(Position const& position) const { return position.offset <= end.offset && position.offset >= start.offset; }