From e9dfa615888d2623435449c009c07f1525962b14 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Thu, 4 Jan 2024 10:27:25 +1300 Subject: [PATCH] LibWeb: Use UTF-16 code unit offsets in Range::to_string MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to another problem we had in CharacterData, we were assuming that the offsets were raw utf8 byte offsets into the data, instead of utf16 code units. Fix this by using the substring helpers in CharacterData to get the text data from the Range. There are more instances of this issue around the place that we will need to track down and add tests for, but this fixes one of them :^) For the test included in this commit, we were previously returning: llo💨😮 Instead of the expected: llo💨😮 Wo --- .../Text/expected/DOM/Range-to-string.txt | 2 ++ .../LibWeb/Text/input/DOM/Range-to-string.html | 14 ++++++++++++++ Userland/Libraries/LibWeb/DOM/Range.cpp | 18 ++++++++++++------ 3 files changed, 28 insertions(+), 6 deletions(-) create mode 100644 Tests/LibWeb/Text/expected/DOM/Range-to-string.txt create mode 100644 Tests/LibWeb/Text/input/DOM/Range-to-string.html diff --git a/Tests/LibWeb/Text/expected/DOM/Range-to-string.txt b/Tests/LibWeb/Text/expected/DOM/Range-to-string.txt new file mode 100644 index 0000000000..eb90804337 --- /dev/null +++ b/Tests/LibWeb/Text/expected/DOM/Range-to-string.txt @@ -0,0 +1,2 @@ +Hello💨😮 World +llo💨😮 Wo diff --git a/Tests/LibWeb/Text/input/DOM/Range-to-string.html b/Tests/LibWeb/Text/input/DOM/Range-to-string.html new file mode 100644 index 0000000000..5583df7c10 --- /dev/null +++ b/Tests/LibWeb/Text/input/DOM/Range-to-string.html @@ -0,0 +1,14 @@ +

Hello💨😮 World

+ + diff --git a/Userland/Libraries/LibWeb/DOM/Range.cpp b/Userland/Libraries/LibWeb/DOM/Range.cpp index ff1778e36f..191cfa6f03 100644 --- a/Userland/Libraries/LibWeb/DOM/Range.cpp +++ b/Userland/Libraries/LibWeb/DOM/Range.cpp @@ -560,12 +560,16 @@ String Range::to_string() const // 2. If this’s start node is this’s end node and it is a Text node, // then return the substring of that Text node’s data beginning at this’s start offset and ending at this’s end offset. - if (start_container() == end_container() && is(*start_container())) - return MUST(static_cast(*start_container()).data().substring_from_byte_offset(start_offset(), end_offset() - start_offset())); + if (start_container() == end_container() && is(*start_container())) { + auto const& text = static_cast(*start_container()); + return MUST(text.substring_data(start_offset(), end_offset() - start_offset())); + } // 3. If this’s start node is a Text node, then append the substring of that node’s data from this’s start offset until the end to s. - if (is(*start_container())) - builder.append(static_cast(*start_container()).data().bytes_as_string_view().substring_view(start_offset())); + if (is(*start_container())) { + auto const& text = static_cast(*start_container()); + builder.append(MUST(text.substring_data(start_offset(), text.length_in_utf16_code_units() - start_offset()))); + } // 4. Append the concatenation of the data of all Text nodes that are contained in this, in tree order, to s. for (Node const* node = start_container(); node != end_container()->next_sibling(); node = node->next_in_pre_order()) { @@ -574,8 +578,10 @@ String Range::to_string() const } // 5. If this’s end node is a Text node, then append the substring of that node’s data from its start until this’s end offset to s. - if (is(*end_container())) - builder.append(static_cast(*end_container()).data().bytes_as_string_view().substring_view(0, end_offset())); + if (is(*end_container())) { + auto const& text = static_cast(*end_container()); + builder.append(MUST(text.substring_data(0, end_offset()))); + } // 6. Return s. return MUST(builder.to_string());