1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 16:48:12 +00:00

LibWeb: Refactor TextNode::ChunkIterator

This commit refactors the text chunking algorithm used in
TextNode::ChunkIterator. The m_start_of_chunk member parameter has been
replaced with a local variable that's anchored to the current iterator
at the start of every next() call, and the algorithm is made a little
more clear by explicitly separating what can and cannot peek into the
next character during iteration.
This commit is contained in:
sin-ack 2021-08-28 00:45:28 +00:00 committed by Andreas Kling
parent 48a1a3c0ce
commit 0342fe4e0c
2 changed files with 48 additions and 28 deletions

View file

@ -322,7 +322,6 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout
, m_wrap_lines(wrap_lines)
, m_wrap_breaks(wrap_breaks)
, m_utf8_view(text)
, m_start_of_chunk(m_utf8_view.begin())
, m_iterator(m_utf8_view.begin())
{
m_last_was_space = !text.is_empty() && is_ascii_space(*m_utf8_view.begin());
@ -330,66 +329,88 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout
Optional<TextNode::Chunk> TextNode::ChunkIterator::next()
{
if (m_iterator == m_utf8_view.end())
return {};
auto start_of_chunk = m_iterator;
while (m_iterator != m_utf8_view.end()) {
auto guard = ScopeGuard([&] { ++m_iterator; });
if (m_layout_mode == LayoutMode::AllPossibleLineBreaks) {
if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
return result.release_value();
}
++m_iterator;
if (m_last_was_newline) {
m_last_was_newline = false;
if (auto result = try_commit_chunk(m_iterator, true); result.has_value())
// NOTE: This expression looks out for the case where we have
// multiple newlines in a row. Because every output next()
// that's a newline newline must be prepared for in advance by
// the previous next() call, we need to check whether the next
// character is a newline here as well. Otherwise, the newline
// becomes part of the next expression and causes rendering
// issues.
m_last_was_newline = m_iterator != m_utf8_view.end() && *m_iterator == '\n';
if (auto result = try_commit_chunk(start_of_chunk, m_iterator, true); result.has_value())
return result.release_value();
}
if (m_layout_mode == LayoutMode::AllPossibleLineBreaks) {
if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
return result.release_value();
}
}
// NOTE: The checks after this need to look at the current iterator
// position, which depends on not being at the end.
if (m_iterator == m_utf8_view.end())
break;
// NOTE: When we're supposed to stop on linebreaks, we're actually
// supposed to output two chunks: "content" and "\n". Since we
// can't output two chunks at once, we store this information as a
// flag to output the newline immediately at the earliest
// opportunity.
if (m_wrap_breaks && *m_iterator == '\n') {
m_last_was_newline = true;
if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
return result.release_value();
}
}
if (m_wrap_lines) {
bool is_space = is_ascii_space(*m_iterator);
if (is_space != m_last_was_space) {
m_last_was_space = is_space;
if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
return result.release_value();
}
}
}
}
if (m_last_was_newline) {
m_last_was_newline = false;
if (auto result = try_commit_chunk(m_utf8_view.end(), true); result.has_value())
return result.release_value();
}
if (m_start_of_chunk != m_utf8_view.end()) {
if (auto result = try_commit_chunk(m_utf8_view.end(), false, true); result.has_value())
if (start_of_chunk != m_utf8_view.end()) {
// Try to output whatever's left at the end of the text node.
if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.end(), false, true); result.has_value())
return result.release_value();
}
return {};
}
Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& it, bool has_breaking_newline, bool must_commit)
Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& start, Utf8View::Iterator const& end, bool has_breaking_newline, bool must_commit)
{
if (m_layout_mode == LayoutMode::OnlyRequiredLineBreaks && !must_commit)
return {};
auto start = m_utf8_view.byte_offset_of(m_start_of_chunk);
auto length = m_utf8_view.byte_offset_of(it) - m_utf8_view.byte_offset_of(m_start_of_chunk);
auto byte_offset = m_utf8_view.byte_offset_of(start);
auto byte_length = m_utf8_view.byte_offset_of(end) - byte_offset;
if (has_breaking_newline || length > 0) {
auto chunk_view = m_utf8_view.substring_view(start, length);
m_start_of_chunk = it;
if (byte_length > 0) {
auto chunk_view = m_utf8_view.substring_view(byte_offset, byte_length);
return Chunk {
.view = chunk_view,
.start = start,
.length = length,
.start = byte_offset,
.length = byte_length,
.has_breaking_newline = has_breaking_newline,
.is_all_whitespace = is_all_whitespace(chunk_view.as_string()),
};
}
m_start_of_chunk = it;
return {};
}