1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 23:47:45 +00:00

LibWeb: Use UTF-16 code unit offsets and lengths in CharacterData

We were previously assuming that the input offsets and lengths were all
in raw byte offsets into a UTF-8 string. While internally our String
representation may be in UTF-8 from the external world it is seen as
UTF-16, with code unit offsets passed through, and used as the returned
length.

Beforehand, the included test included in this commit would crash
ladybird (and otherwise return wrong values).

The implementation here is very inefficient, I am sure there is a
much smarter way to write it so that we would not need a conversion
from UTF-8 to a UTF-16 string (and then back again).

Fixes: #20971
This commit is contained in:
Shannon Booth 2023-12-22 20:41:34 +13:00 committed by Andreas Kling
parent d51f84501a
commit d8759d9656
6 changed files with 54 additions and 24 deletions

View file

@ -13,6 +13,7 @@
namespace Web::DOM {
// https://dom.spec.whatwg.org/#characterdata
class CharacterData
: public Node
, public ChildNode<CharacterData>
@ -26,14 +27,18 @@ public:
String const& data() const { return m_data; }
void set_data(String const&);
// FIXME: This should be in UTF-16 code units, not byte size.
unsigned length() const { return m_data.bytes().size(); }
unsigned length_in_utf16_code_units() const
{
// FIXME: This is inefficient!
auto utf16_data = MUST(AK::utf8_to_utf16(m_data));
return Utf16View { utf16_data }.length_in_code_units();
}
WebIDL::ExceptionOr<String> substring_data(size_t offset, size_t count) const;
WebIDL::ExceptionOr<String> substring_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units) const;
WebIDL::ExceptionOr<void> append_data(String const&);
WebIDL::ExceptionOr<void> insert_data(size_t offset, String const&);
WebIDL::ExceptionOr<void> delete_data(size_t offset, size_t count);
WebIDL::ExceptionOr<void> replace_data(size_t offset, size_t count, String const&);
WebIDL::ExceptionOr<void> insert_data(size_t offset_in_utf16_code_units, String const&);
WebIDL::ExceptionOr<void> delete_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units);
WebIDL::ExceptionOr<void> replace_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units, String const&);
protected:
CharacterData(Document&, NodeType, String const&);