LibWeb: Refactor TextNode::ChunkIterator

This commit refactors the text chunking algorithm used in TextNode::ChunkIterator. The m_start_of_chunk member parameter has been replaced with a local variable that's anchored to the current iterator at the start of every next() call, and the algorithm is made a little more clear by explicitly separating what can and cannot peek into the next character during iteration.
2025-07-24 22:17:42 +00:00 · 2021-08-28 00:45:28 +00:00 · 2021-08-28 00:45:28 +00:00 · 0342fe4e0c
commit 0342fe4e0c
parent 48a1a3c0ce
2 changed files with 48 additions and 28 deletions
--- a/Userland/Libraries/LibWeb/Layout/TextNode.cpp
+++ b/Userland/Libraries/LibWeb/Layout/TextNode.cpp
@ -322,7 +322,6 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout
    , m_wrap_lines(wrap_lines)
    , m_wrap_breaks(wrap_breaks)
    , m_utf8_view(text)
-    , m_start_of_chunk(m_utf8_view.begin())
    , m_iterator(m_utf8_view.begin())
 {
    m_last_was_space = !text.is_empty() && is_ascii_space(*m_utf8_view.begin());
@ -330,66 +329,88 @@ TextNode::ChunkIterator::ChunkIterator(StringView const& text, LayoutMode layout

 Optional<TextNode::Chunk> TextNode::ChunkIterator::next()
 {
+    if (m_iterator == m_utf8_view.end())
+        return {};
+
+    auto start_of_chunk = m_iterator;
    while (m_iterator != m_utf8_view.end()) {
-        auto guard = ScopeGuard([&] { ++m_iterator; });
-        if (m_layout_mode == LayoutMode::AllPossibleLineBreaks) {
-            if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
-                return result.release_value();
-        }
+        ++m_iterator;
+
        if (m_last_was_newline) {
-            m_last_was_newline = false;
-            if (auto result = try_commit_chunk(m_iterator, true); result.has_value())
+            // NOTE: This expression looks out for the case where we have
+            //       multiple newlines in a row. Because every output next()
+            //       that's a newline newline must be prepared for in advance by
+            //       the previous next() call, we need to check whether the next
+            //       character is a newline here as well. Otherwise, the newline
+            //       becomes part of the next expression and causes rendering
+            //       issues.
+            m_last_was_newline = m_iterator != m_utf8_view.end() && *m_iterator == '\n';
+            if (auto result = try_commit_chunk(start_of_chunk, m_iterator, true); result.has_value())
                return result.release_value();
        }
+
+        if (m_layout_mode == LayoutMode::AllPossibleLineBreaks) {
+            if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
+                return result.release_value();
+            }
+        }
+
+        // NOTE: The checks after this need to look at the current iterator
+        //       position, which depends on not being at the end.
+        if (m_iterator == m_utf8_view.end())
+            break;
+
+        // NOTE: When we're supposed to stop on linebreaks, we're actually
+        //       supposed to output two chunks: "content" and "\n". Since we
+        //       can't output two chunks at once, we store this information as a
+        //       flag to output the newline immediately at the earliest
+        //       opportunity.
        if (m_wrap_breaks && *m_iterator == '\n') {
            m_last_was_newline = true;
-            if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
+            if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
                return result.release_value();
+            }
        }
+
        if (m_wrap_lines) {
            bool is_space = is_ascii_space(*m_iterator);
            if (is_space != m_last_was_space) {
                m_last_was_space = is_space;
-                if (auto result = try_commit_chunk(m_iterator, false); result.has_value())
+                if (auto result = try_commit_chunk(start_of_chunk, m_iterator, false); result.has_value()) {
                    return result.release_value();
+                }
            }
        }
    }

-    if (m_last_was_newline) {
-        m_last_was_newline = false;
-        if (auto result = try_commit_chunk(m_utf8_view.end(), true); result.has_value())
-            return result.release_value();
-    }
-    if (m_start_of_chunk != m_utf8_view.end()) {
-        if (auto result = try_commit_chunk(m_utf8_view.end(), false, true); result.has_value())
+    if (start_of_chunk != m_utf8_view.end()) {
+        // Try to output whatever's left at the end of the text node.
+        if (auto result = try_commit_chunk(start_of_chunk, m_utf8_view.end(), false, true); result.has_value())
            return result.release_value();
    }

    return {};
 }

-Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& it, bool has_breaking_newline, bool must_commit)
+Optional<TextNode::Chunk> TextNode::ChunkIterator::try_commit_chunk(Utf8View::Iterator const& start, Utf8View::Iterator const& end, bool has_breaking_newline, bool must_commit)
 {
    if (m_layout_mode == LayoutMode::OnlyRequiredLineBreaks && !must_commit)
        return {};

-    auto start = m_utf8_view.byte_offset_of(m_start_of_chunk);
-    auto length = m_utf8_view.byte_offset_of(it) - m_utf8_view.byte_offset_of(m_start_of_chunk);
+    auto byte_offset = m_utf8_view.byte_offset_of(start);
+    auto byte_length = m_utf8_view.byte_offset_of(end) - byte_offset;

-    if (has_breaking_newline || length > 0) {
-        auto chunk_view = m_utf8_view.substring_view(start, length);
-        m_start_of_chunk = it;
+    if (byte_length > 0) {
+        auto chunk_view = m_utf8_view.substring_view(byte_offset, byte_length);
        return Chunk {
            .view = chunk_view,
-            .start = start,
-            .length = length,
+            .start = byte_offset,
+            .length = byte_length,
            .has_breaking_newline = has_breaking_newline,
            .is_all_whitespace = is_all_whitespace(chunk_view.as_string()),
        };
    }

-    m_start_of_chunk = it;
    return {};
 }