1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 15:37:46 +00:00

LibWeb: Refactor TextNode::ChunkIterator

This commit refactors the text chunking algorithm used in
TextNode::ChunkIterator. The m_start_of_chunk member parameter has been
replaced with a local variable that's anchored to the current iterator
at the start of every next() call, and the algorithm is made a little
more clear by explicitly separating what can and cannot peek into the
next character during iteration.
This commit is contained in:
sin-ack 2021-08-28 00:45:28 +00:00 committed by Andreas Kling
parent 48a1a3c0ce
commit 0342fe4e0c
2 changed files with 48 additions and 28 deletions

View file

@ -322,7 +322,6 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout
, m_wrap_lines(wrap_lines) , m_wrap_lines(wrap_lines)
, m_wrap_breaks(wrap_breaks) , m_wrap_breaks(wrap_breaks)
, m_utf8_view(text) , m_utf8_view(text)
, m_start_of_chunk(m_utf8_view.begin())
, m_iterator(m_utf8_view.begin()) , m_iterator(m_utf8_view.begin())
{ {
m_last_was_space = !text.is_empty() && is_ascii_space(*m_utf8_view.begin()); m_last_was_space = !text.is_empty() && is_ascii_space(*m_utf8_view.begin());
@ -330,66 +329,88 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout
Optional<TextNode::Chunk> TextNode::ChunkIterator::next() Optional<TextNode::Chunk> TextNode::ChunkIterator::next()
{ {
if (m_iterator == m_utf8_view.end())
return {};
auto start_of_chunk = m_iterator;
while (m_iterator != m_utf8_view.end()) { while (m_iterator != m_utf8_view.end()) {
auto guard = ScopeGuard([&] { ++m_iterator; }); ++m_iterator;
if (m_layout_mode == LayoutMode::AllPossibleLineBreaks) {
if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
return result.release_value();
}
if (m_last_was_newline) { if (m_last_was_newline) {
m_last_was_newline = false; // NOTE: This expression looks out for the case where we have
if (auto result = try_commit_chunk(m_iterator, true); result.has_value()) // multiple newlines in a row. Because every output next()
// that's a newline newline must be prepared for in advance by
// the previous next() call, we need to check whether the next
// character is a newline here as well. Otherwise, the newline
// becomes part of the next expression and causes rendering
// issues.
m_last_was_newline = m_iterator != m_utf8_view.end() && *m_iterator == '\n';
if (auto result = try_commit_chunk(start_of_chunk, m_iterator, true); result.has_value())
return result.release_value(); return result.release_value();
} }
if (m_layout_mode == LayoutMode::AllPossibleLineBreaks) {
if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
return result.release_value();
}
}
// NOTE: The checks after this need to look at the current iterator
// position, which depends on not being at the end.
if (m_iterator == m_utf8_view.end())
break;
// NOTE: When we're supposed to stop on linebreaks, we're actually
// supposed to output two chunks: "content" and "\n". Since we
// can't output two chunks at once, we store this information as a
// flag to output the newline immediately at the earliest
// opportunity.
if (m_wrap_breaks && *m_iterator == '\n') { if (m_wrap_breaks && *m_iterator == '\n') {
m_last_was_newline = true; m_last_was_newline = true;
if (auto result = try_commit_chunk(m_iterator, false); result.has_value()) if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
return result.release_value(); return result.release_value();
}
} }
if (m_wrap_lines) { if (m_wrap_lines) {
bool is_space = is_ascii_space(*m_iterator); bool is_space = is_ascii_space(*m_iterator);
if (is_space != m_last_was_space) { if (is_space != m_last_was_space) {
m_last_was_space = is_space; m_last_was_space = is_space;
if (auto result = try_commit_chunk(m_iterator, false); result.has_value()) if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
return result.release_value(); return result.release_value();
}
} }
} }
} }
if (m_last_was_newline) { if (start_of_chunk != m_utf8_view.end()) {
m_last_was_newline = false; // Try to output whatever's left at the end of the text node.
if (auto result = try_commit_chunk(m_utf8_view.end(), true); result.has_value()) if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.end(), false, true); result.has_value())
return result.release_value();
}
if (m_start_of_chunk != m_utf8_view.end()) {
if (auto result = try_commit_chunk(m_utf8_view.end(), false, true); result.has_value())
return result.release_value(); return result.release_value();
} }
return {}; return {};
} }
Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& it, bool has_breaking_newline, bool must_commit) Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& start, Utf8View::Iterator const& end, bool has_breaking_newline, bool must_commit)
{ {
if (m_layout_mode == LayoutMode::OnlyRequiredLineBreaks && !must_commit) if (m_layout_mode == LayoutMode::OnlyRequiredLineBreaks && !must_commit)
return {}; return {};
auto start = m_utf8_view.byte_offset_of(m_start_of_chunk); auto byte_offset = m_utf8_view.byte_offset_of(start);
auto length = m_utf8_view.byte_offset_of(it) - m_utf8_view.byte_offset_of(m_start_of_chunk); auto byte_length = m_utf8_view.byte_offset_of(end) - byte_offset;
if (has_breaking_newline || length > 0) { if (byte_length > 0) {
auto chunk_view = m_utf8_view.substring_view(start, length); auto chunk_view = m_utf8_view.substring_view(byte_offset, byte_length);
m_start_of_chunk = it;
return Chunk { return Chunk {
.view = chunk_view, .view = chunk_view,
.start = start, .start = byte_offset,
.length = length, .length = byte_length,
.has_breaking_newline = has_breaking_newline, .has_breaking_newline = has_breaking_newline,
.is_all_whitespace = is_all_whitespace(chunk_view.as_string()), .is_all_whitespace = is_all_whitespace(chunk_view.as_string()),
}; };
} }
m_start_of_chunk = it;
return {}; return {};
} }

View file

@ -41,7 +41,7 @@ public:
Optional<Chunk> next(); Optional<Chunk> next();
private: private:
Optional<Chunk> try_commit_chunk(Utf8View::Iterator const&, bool has_breaking_newline, bool must_commit = false); Optional<Chunk> try_commit_chunk(Utf8View::Iterator const& start, Utf8View::Iterator const& end, bool has_breaking_newline, bool must_commit = false);
const LayoutMode m_layout_mode; const LayoutMode m_layout_mode;
const bool m_wrap_lines; const bool m_wrap_lines;
@ -49,7 +49,6 @@ public:
bool m_last_was_space { false }; bool m_last_was_space { false };
bool m_last_was_newline { false }; bool m_last_was_newline { false };
Utf8View m_utf8_view; Utf8View m_utf8_view;
Utf8View::Iterator m_start_of_chunk;
Utf8View::Iterator m_iterator; Utf8View::Iterator m_iterator;
}; };