1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 20:28:11 +00:00

LibWeb: Use UTF-16 code unit offsets in Range::to_string

Similar to another problem we had in CharacterData, we were assuming
that the offsets were raw utf8 byte offsets into the data, instead of
utf16 code units. Fix this by using the substring helpers in
CharacterData to get the text data from the Range.

There are more instances of this issue around the place that we will
need to track down and add tests for, but this fixes one of them :^)

For the test included in this commit, we were previously returning:

llo💨😮

Instead of the expected:

llo💨😮 Wo
This commit is contained in:
Shannon Booth 2024-01-04 10:27:25 +13:00 committed by Andreas Kling
parent ee431e6911
commit e9dfa61588
3 changed files with 28 additions and 6 deletions

View file

@ -560,12 +560,16 @@ String Range::to_string() const
// 2. If thiss start node is thiss end node and it is a Text node,
// then return the substring of that Text nodes data beginning at thiss start offset and ending at thiss end offset.
if (start_container() == end_container() && is<Text>(*start_container()))
return MUST(static_cast<Text const&>(*start_container()).data().substring_from_byte_offset(start_offset(), end_offset() - start_offset()));
if (start_container() == end_container() && is<Text>(*start_container())) {
auto const& text = static_cast<Text const&>(*start_container());
return MUST(text.substring_data(start_offset(), end_offset() - start_offset()));
}
// 3. If thiss start node is a Text node, then append the substring of that nodes data from thiss start offset until the end to s.
if (is<Text>(*start_container()))
builder.append(static_cast<Text const&>(*start_container()).data().bytes_as_string_view().substring_view(start_offset()));
if (is<Text>(*start_container())) {
auto const& text = static_cast<Text const&>(*start_container());
builder.append(MUST(text.substring_data(start_offset(), text.length_in_utf16_code_units() - start_offset())));
}
// 4. Append the concatenation of the data of all Text nodes that are contained in this, in tree order, to s.
for (Node const* node = start_container(); node != end_container()->next_sibling(); node = node->next_in_pre_order()) {
@ -574,8 +578,10 @@ String Range::to_string() const
}
// 5. If thiss end node is a Text node, then append the substring of that nodes data from its start until thiss end offset to s.
if (is<Text>(*end_container()))
builder.append(static_cast<Text const&>(*end_container()).data().bytes_as_string_view().substring_view(0, end_offset()));
if (is<Text>(*end_container())) {
auto const& text = static_cast<Text const&>(*end_container());
builder.append(MUST(text.substring_data(0, end_offset())));
}
// 6. Return s.
return MUST(builder.to_string());