diff --git a/Tests/LibWeb/Text/expected/DOM/Text-methods.txt b/Tests/LibWeb/Text/expected/DOM/Text-methods.txt new file mode 100644 index 0000000000..4ef31b75e7 --- /dev/null +++ b/Tests/LibWeb/Text/expected/DOM/Text-methods.txt @@ -0,0 +1,6 @@ +text.data = '🙃', length = 2 +text.data = '🙃🙃', length = 4 +text.data = '🙃hi🙃🙃', length = 8 +text.data = '🙃i🙃🙃', length = 7 +text.data = '🙃replaced!', length = 11 +repla diff --git a/Tests/LibWeb/Text/input/DOM/Text-methods.html b/Tests/LibWeb/Text/input/DOM/Text-methods.html new file mode 100644 index 0000000000..2fb149be2b --- /dev/null +++ b/Tests/LibWeb/Text/input/DOM/Text-methods.html @@ -0,0 +1,20 @@ + + diff --git a/Userland/Libraries/LibWeb/DOM/CharacterData.cpp b/Userland/Libraries/LibWeb/DOM/CharacterData.cpp index a3fc908941..f8eae93109 100644 --- a/Userland/Libraries/LibWeb/DOM/CharacterData.cpp +++ b/Userland/Libraries/LibWeb/DOM/CharacterData.cpp @@ -35,37 +35,39 @@ void CharacterData::set_data(String const& data) // NOTE: Since the offset is 0, it can never be above data's length, so this can never throw. // NOTE: Setting the data to the same value as the current data still causes a mutation observer callback. // FIXME: Figure out a way to make this a no-op again if the passed in data is the same as the current data. - MUST(replace_data(0, this->length(), data)); + MUST(replace_data(0, this->length_in_utf16_code_units(), data)); } // https://dom.spec.whatwg.org/#concept-cd-substring WebIDL::ExceptionOr CharacterData::substring_data(size_t offset, size_t count) const { // 1. Let length be node’s length. - auto length = this->length(); + // FIXME: This is very inefficient! + auto utf16_data = MUST(AK::utf8_to_utf16(m_data)); + Utf16View utf16_view { utf16_data }; + auto length = utf16_view.length_in_code_units(); // 2. If offset is greater than length, then throw an "IndexSizeError" DOMException. if (offset > length) return WebIDL::IndexSizeError::create(realm(), "Substring offset out of range."_fly_string); - // FIXME: The offset and count we are given here is in UTF-16 code units, but we are incorrectly assuming it is a byte offset. - // 3. If offset plus count is greater than length, return a string whose value is the code units from the offsetth code unit // to the end of node’s data, and then return. if (offset + count > length) - return MUST(m_data.substring_from_byte_offset(offset)); + return MUST(utf16_view.substring_view(offset).to_utf8()); // 4. Return a string whose value is the code units from the offsetth code unit to the offset+countth code unit in node’s data. - return MUST(m_data.substring_from_byte_offset(offset, count)); + return MUST(utf16_view.substring_view(offset, count).to_utf8()); } // https://dom.spec.whatwg.org/#concept-cd-replace WebIDL::ExceptionOr CharacterData::replace_data(size_t offset, size_t count, String const& data) { - // FIXME: The offset and count we are given here is in UTF-16 code units, but we are incorrectly assuming it is a byte offset. - // 1. Let length be node’s length. - auto length = this->length(); + // FIXME: This is very inefficient! + auto utf16_data = MUST(AK::utf8_to_utf16(m_data)); + Utf16View utf16_view { utf16_data }; + auto length = utf16_view.length_in_code_units(); // 2. If offset is greater than length, then throw an "IndexSizeError" DOMException. if (offset > length) @@ -82,9 +84,9 @@ WebIDL::ExceptionOr CharacterData::replace_data(size_t offset, size_t coun // 6. Let delete offset be offset + data’s length. // 7. Starting from delete offset code units, remove count code units from node’s data. StringBuilder builder; - builder.append(this->data().bytes_as_string_view().substring_view(0, offset)); + builder.append(MUST(utf16_view.substring_view(0, offset).to_utf8())); builder.append(data); - builder.append(this->data().bytes_as_string_view().substring_view(offset + count)); + builder.append(MUST(utf16_view.substring_view(offset + count).to_utf8())); m_data = MUST(builder.to_string()); // 8. For each live range whose start node is node and start offset is greater than offset but less than or equal to offset plus count, set its start offset to offset. @@ -130,7 +132,7 @@ WebIDL::ExceptionOr CharacterData::replace_data(size_t offset, size_t coun WebIDL::ExceptionOr CharacterData::append_data(String const& data) { // The appendData(data) method steps are to replace data with node this, offset this’s length, count 0, and data data. - return replace_data(this->length(), 0, data); + return replace_data(this->length_in_utf16_code_units(), 0, data); } // https://dom.spec.whatwg.org/#dom-characterdata-insertdata diff --git a/Userland/Libraries/LibWeb/DOM/CharacterData.h b/Userland/Libraries/LibWeb/DOM/CharacterData.h index 2897adeb2b..bd90a0d369 100644 --- a/Userland/Libraries/LibWeb/DOM/CharacterData.h +++ b/Userland/Libraries/LibWeb/DOM/CharacterData.h @@ -13,6 +13,7 @@ namespace Web::DOM { +// https://dom.spec.whatwg.org/#characterdata class CharacterData : public Node , public ChildNode @@ -26,14 +27,18 @@ public: String const& data() const { return m_data; } void set_data(String const&); - // FIXME: This should be in UTF-16 code units, not byte size. - unsigned length() const { return m_data.bytes().size(); } + unsigned length_in_utf16_code_units() const + { + // FIXME: This is inefficient! + auto utf16_data = MUST(AK::utf8_to_utf16(m_data)); + return Utf16View { utf16_data }.length_in_code_units(); + } - WebIDL::ExceptionOr substring_data(size_t offset, size_t count) const; + WebIDL::ExceptionOr substring_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units) const; WebIDL::ExceptionOr append_data(String const&); - WebIDL::ExceptionOr insert_data(size_t offset, String const&); - WebIDL::ExceptionOr delete_data(size_t offset, size_t count); - WebIDL::ExceptionOr replace_data(size_t offset, size_t count, String const&); + WebIDL::ExceptionOr insert_data(size_t offset_in_utf16_code_units, String const&); + WebIDL::ExceptionOr delete_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units); + WebIDL::ExceptionOr replace_data(size_t offset_in_utf16_code_units, size_t count_in_utf16_code_units, String const&); protected: CharacterData(Document&, NodeType, String const&); diff --git a/Userland/Libraries/LibWeb/DOM/CharacterData.idl b/Userland/Libraries/LibWeb/DOM/CharacterData.idl index 9d539619f7..5213fdb855 100644 --- a/Userland/Libraries/LibWeb/DOM/CharacterData.idl +++ b/Userland/Libraries/LibWeb/DOM/CharacterData.idl @@ -6,7 +6,7 @@ [Exposed=Window] interface CharacterData : Node { [LegacyNullToEmptyString] attribute DOMString data; - readonly attribute unsigned long length; + [ImplementedAs=length_in_utf16_code_units] readonly attribute unsigned long length; DOMString substringData(unsigned long offset, unsigned long count); undefined appendData(DOMString data); diff --git a/Userland/Libraries/LibWeb/DOM/Node.cpp b/Userland/Libraries/LibWeb/DOM/Node.cpp index 0a908a165d..57f26e3120 100644 --- a/Userland/Libraries/LibWeb/DOM/Node.cpp +++ b/Userland/Libraries/LibWeb/DOM/Node.cpp @@ -1492,11 +1492,8 @@ size_t Node::length() const return 0; // 2. If node is a CharacterData node, then return node’s data’s length. - if (is_character_data()) { - auto* character_data_node = verify_cast(this); - // FIXME: This should be in UTF-16 code units, not byte size. - return character_data_node->data().bytes().size(); - } + if (is_character_data()) + return verify_cast(*this).length_in_utf16_code_units(); // 3. Return the number of node’s children. return child_count();