mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 16:48:12 +00:00
LibWeb: Refactor TextNode::ChunkIterator
This commit refactors the text chunking algorithm used in TextNode::ChunkIterator. The m_start_of_chunk member parameter has been replaced with a local variable that's anchored to the current iterator at the start of every next() call, and the algorithm is made a little more clear by explicitly separating what can and cannot peek into the next character during iteration.
This commit is contained in:
parent
48a1a3c0ce
commit
0342fe4e0c
2 changed files with 48 additions and 28 deletions
|
@ -322,7 +322,6 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout
|
|||
, m_wrap_lines(wrap_lines)
|
||||
, m_wrap_breaks(wrap_breaks)
|
||||
, m_utf8_view(text)
|
||||
, m_start_of_chunk(m_utf8_view.begin())
|
||||
, m_iterator(m_utf8_view.begin())
|
||||
{
|
||||
m_last_was_space = !text.is_empty() && is_ascii_space(*m_utf8_view.begin());
|
||||
|
@ -330,66 +329,88 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout
|
|||
|
||||
Optional<TextNode::Chunk> TextNode::ChunkIterator::next()
|
||||
{
|
||||
if (m_iterator == m_utf8_view.end())
|
||||
return {};
|
||||
|
||||
auto start_of_chunk = m_iterator;
|
||||
while (m_iterator != m_utf8_view.end()) {
|
||||
auto guard = ScopeGuard([&] { ++m_iterator; });
|
||||
if (m_layout_mode == LayoutMode::AllPossibleLineBreaks) {
|
||||
if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
|
||||
return result.release_value();
|
||||
}
|
||||
++m_iterator;
|
||||
|
||||
if (m_last_was_newline) {
|
||||
m_last_was_newline = false;
|
||||
if (auto result = try_commit_chunk(m_iterator, true); result.has_value())
|
||||
// NOTE: This expression looks out for the case where we have
|
||||
// multiple newlines in a row. Because every output next()
|
||||
// that's a newline newline must be prepared for in advance by
|
||||
// the previous next() call, we need to check whether the next
|
||||
// character is a newline here as well. Otherwise, the newline
|
||||
// becomes part of the next expression and causes rendering
|
||||
// issues.
|
||||
m_last_was_newline = m_iterator != m_utf8_view.end() && *m_iterator == '\n';
|
||||
if (auto result = try_commit_chunk(start_of_chunk, m_iterator, true); result.has_value())
|
||||
return result.release_value();
|
||||
}
|
||||
|
||||
if (m_layout_mode == LayoutMode::AllPossibleLineBreaks) {
|
||||
if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
|
||||
return result.release_value();
|
||||
}
|
||||
}
|
||||
|
||||
// NOTE: The checks after this need to look at the current iterator
|
||||
// position, which depends on not being at the end.
|
||||
if (m_iterator == m_utf8_view.end())
|
||||
break;
|
||||
|
||||
// NOTE: When we're supposed to stop on linebreaks, we're actually
|
||||
// supposed to output two chunks: "content" and "\n". Since we
|
||||
// can't output two chunks at once, we store this information as a
|
||||
// flag to output the newline immediately at the earliest
|
||||
// opportunity.
|
||||
if (m_wrap_breaks && *m_iterator == '\n') {
|
||||
m_last_was_newline = true;
|
||||
if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
|
||||
if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
|
||||
return result.release_value();
|
||||
}
|
||||
}
|
||||
|
||||
if (m_wrap_lines) {
|
||||
bool is_space = is_ascii_space(*m_iterator);
|
||||
if (is_space != m_last_was_space) {
|
||||
m_last_was_space = is_space;
|
||||
if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
|
||||
if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
|
||||
return result.release_value();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (m_last_was_newline) {
|
||||
m_last_was_newline = false;
|
||||
if (auto result = try_commit_chunk(m_utf8_view.end(), true); result.has_value())
|
||||
return result.release_value();
|
||||
}
|
||||
if (m_start_of_chunk != m_utf8_view.end()) {
|
||||
if (auto result = try_commit_chunk(m_utf8_view.end(), false, true); result.has_value())
|
||||
if (start_of_chunk != m_utf8_view.end()) {
|
||||
// Try to output whatever's left at the end of the text node.
|
||||
if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.end(), false, true); result.has_value())
|
||||
return result.release_value();
|
||||
}
|
||||
|
||||
return {};
|
||||
}
|
||||
|
||||
Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& it, bool has_breaking_newline, bool must_commit)
|
||||
Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& start, Utf8View::Iterator const& end, bool has_breaking_newline, bool must_commit)
|
||||
{
|
||||
if (m_layout_mode == LayoutMode::OnlyRequiredLineBreaks && !must_commit)
|
||||
return {};
|
||||
|
||||
auto start = m_utf8_view.byte_offset_of(m_start_of_chunk);
|
||||
auto length = m_utf8_view.byte_offset_of(it) - m_utf8_view.byte_offset_of(m_start_of_chunk);
|
||||
auto byte_offset = m_utf8_view.byte_offset_of(start);
|
||||
auto byte_length = m_utf8_view.byte_offset_of(end) - byte_offset;
|
||||
|
||||
if (has_breaking_newline || length > 0) {
|
||||
auto chunk_view = m_utf8_view.substring_view(start, length);
|
||||
m_start_of_chunk = it;
|
||||
if (byte_length > 0) {
|
||||
auto chunk_view = m_utf8_view.substring_view(byte_offset, byte_length);
|
||||
return Chunk {
|
||||
.view = chunk_view,
|
||||
.start = start,
|
||||
.length = length,
|
||||
.start = byte_offset,
|
||||
.length = byte_length,
|
||||
.has_breaking_newline = has_breaking_newline,
|
||||
.is_all_whitespace = is_all_whitespace(chunk_view.as_string()),
|
||||
};
|
||||
}
|
||||
|
||||
m_start_of_chunk = it;
|
||||
return {};
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue