1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 23:28:11 +00:00

LibWeb: Implement Attribute closer to the spec and with an IDL file

Note our Attribute class is what the spec refers to as just "Attr". The
main differences between the existing implementation and the spec are
just that the spec defines more fields.

Attributes can contain namespace URIs and prefixes. However, note that
these are not parsed in HTML documents unless the document content-type
is XML. So for now, these are initialized to null. Web pages are able to
set the namespace via JavaScript (setAttributeNS), so these fields may
be filled in when the corresponding APIs are implemented.

The main change to be aware of is that an attribute is a node. This has
implications on how attributes are stored in the Element class. Nodes
are non-copyable and non-movable because these constructors are deleted
by the EventTarget base class. This means attributes cannot be stored in
a Vector or HashMap as these containers assume copyability / movability.
So for now, the Vector holding attributes is changed to hold RefPtrs to
attributes instead. This might change when attribute storage is
implemented according to the spec (by way of NamedNodeMap).
This commit is contained in:
Timothy Flynn 2021-10-15 09:57:07 -04:00 committed by Linus Groh
parent 8d27292fac
commit e01dfaac9a
11 changed files with 106 additions and 42 deletions

View file

@ -9,6 +9,8 @@
#include <AK/StringView.h>
#include <AK/Utf8View.h>
#include <LibTextCodec/Decoder.h>
#include <LibWeb/DOM/Attribute.h>
#include <LibWeb/DOM/Document.h>
#include <LibWeb/HTML/Parser/HTMLEncodingDetection.h>
#include <ctype.h>
@ -94,7 +96,7 @@ Optional<String> extract_character_encoding_from_meta_element(String const& stri
return TextCodec::get_standardized_encoding(encoding);
}
Optional<DOM::Attribute> prescan_get_attribute(const ByteBuffer& input, size_t& position)
RefPtr<DOM::Attribute> prescan_get_attribute(DOM::Document& document, const ByteBuffer& input, size_t& position)
{
if (!prescan_skip_whitespace_and_slashes(input, position))
return {};
@ -109,7 +111,7 @@ Optional<DOM::Attribute> prescan_get_attribute(const ByteBuffer& input, size_t&
} else if (input[position] == '\t' || input[position] == '\n' || input[position] == '\f' || input[position] == '\r' || input[position] == ' ')
goto spaces;
else if (input[position] == '/' || input[position] == '>')
return DOM::Attribute(attribute_name.to_string(), "");
return DOM::Attribute::create(document, attribute_name.to_string(), "");
else
attribute_name.append_as_lowercase(input[position]);
++position;
@ -121,7 +123,7 @@ spaces:
if (!prescan_skip_whitespace_and_slashes(input, position))
return {};
if (input[position] != '=')
return DOM::Attribute(attribute_name.to_string(), "");
return DOM::Attribute::create(document, attribute_name.to_string(), "");
++position;
value:
@ -134,13 +136,13 @@ value:
++position;
for (; !prescan_should_abort(input, position); ++position) {
if (input[position] == quote_character)
return DOM::Attribute(attribute_name.to_string(), attribute_value.to_string());
return DOM::Attribute::create(document, attribute_name.to_string(), attribute_value.to_string());
else
attribute_value.append_as_lowercase(input[position]);
}
return {};
} else if (input[position] == '>')
return DOM::Attribute(attribute_name.to_string(), "");
return DOM::Attribute::create(document, attribute_name.to_string(), "");
else
attribute_value.append_as_lowercase(input[position]);
@ -150,7 +152,7 @@ value:
for (; !prescan_should_abort(input, position); ++position) {
if (input[position] == '\t' || input[position] == '\n' || input[position] == '\f' || input[position] == '\r' || input[position] == ' ' || input[position] == '>')
return DOM::Attribute(attribute_name.to_string(), attribute_value.to_string());
return DOM::Attribute::create(document, attribute_name.to_string(), attribute_value.to_string());
else
attribute_value.append_as_lowercase(input[position]);
}
@ -158,7 +160,7 @@ value:
}
// https://html.spec.whatwg.org/multipage/parsing.html#prescan-a-byte-stream-to-determine-its-encoding
Optional<String> run_prescan_byte_stream_algorithm(const ByteBuffer& input)
Optional<String> run_prescan_byte_stream_algorithm(DOM::Document& document, const ByteBuffer& input)
{
// https://html.spec.whatwg.org/multipage/parsing.html#prescan-a-byte-stream-to-determine-its-encoding
@ -194,24 +196,24 @@ Optional<String> run_prescan_byte_stream_algorithm(const ByteBuffer& input)
Optional<String> charset {};
while (true) {
auto attribute = prescan_get_attribute(input, position);
if (!attribute.has_value())
auto attribute = prescan_get_attribute(document, input, position);
if (!attribute)
break;
if (attribute_list.contains_slow(attribute.value().name()))
if (attribute_list.contains_slow(attribute->name()))
continue;
auto& attribute_name = attribute.value().name();
attribute_list.append(attribute.value().name());
auto& attribute_name = attribute->name();
attribute_list.append(attribute->name());
if (attribute_name == "http-equiv") {
got_pragma = attribute.value().value() == "content-type";
got_pragma = attribute->value() == "content-type";
} else if (attribute_name == "content") {
auto encoding = extract_character_encoding_from_meta_element(attribute.value().value());
auto encoding = extract_character_encoding_from_meta_element(attribute->value());
if (encoding.has_value() && !charset.has_value()) {
charset = encoding.value();
need_pragma = true;
}
} else if (attribute_name == "charset") {
auto maybe_charset = TextCodec::get_standardized_encoding(attribute.value().value());
auto maybe_charset = TextCodec::get_standardized_encoding(attribute->value());
if (maybe_charset.has_value()) {
charset = Optional<String> { maybe_charset };
need_pragma = { false };
@ -231,7 +233,7 @@ Optional<String> run_prescan_byte_stream_algorithm(const ByteBuffer& input)
&& ((input[position + 1] == '/' && isalpha(input[position + 2])) || isalpha(input[position + 1]))) {
position += 2;
prescan_skip_whitespace_and_slashes(input, position);
while (prescan_get_attribute(input, position).has_value()) { };
while (prescan_get_attribute(document, input, position)) { };
} else if (!prescan_should_abort(input, position + 1) && input[position] == '<' && (input[position + 1] == '!' || input[position + 1] == '/' || input[position + 1] == '?')) {
position += 2;
while (input[position] != '>') {
@ -247,7 +249,7 @@ Optional<String> run_prescan_byte_stream_algorithm(const ByteBuffer& input)
}
// https://html.spec.whatwg.org/multipage/parsing.html#determining-the-character-encoding
String run_encoding_sniffing_algorithm(const ByteBuffer& input)
String run_encoding_sniffing_algorithm(DOM::Document& document, const ByteBuffer& input)
{
if (input.size() >= 2) {
if (input[0] == 0xFE && input[1] == 0xFF) {
@ -265,7 +267,7 @@ String run_encoding_sniffing_algorithm(const ByteBuffer& input)
// at any later step in this algorithm.
// FIXME: If the transport layer specifies a character encoding, and it is supported.
auto optional_encoding = run_prescan_byte_stream_algorithm(input);
auto optional_encoding = run_prescan_byte_stream_algorithm(document, input);
if (optional_encoding.has_value()) {
return optional_encoding.value();
}

View file

@ -8,7 +8,7 @@
#include <AK/Optional.h>
#include <AK/String.h>
#include <LibWeb/DOM/Attribute.h>
#include <LibWeb/Forward.h>
namespace Web::HTML {
@ -16,8 +16,8 @@ bool prescan_should_abort(const ByteBuffer& input, const size_t& position);
bool prescan_is_whitespace_or_slash(const u8& byte);
bool prescan_skip_whitespace_and_slashes(const ByteBuffer& input, size_t& position);
Optional<String> extract_character_encoding_from_meta_element(String const&);
Optional<DOM::Attribute> prescan_get_attribute(const ByteBuffer& input, size_t& position);
Optional<String> run_prescan_byte_stream_algorithm(const ByteBuffer& input);
String run_encoding_sniffing_algorithm(const ByteBuffer& input);
RefPtr<DOM::Attribute> prescan_get_attribute(DOM::Document&, const ByteBuffer& input, size_t& position);
Optional<String> run_prescan_byte_stream_algorithm(DOM::Document&, const ByteBuffer& input);
String run_encoding_sniffing_algorithm(DOM::Document&, const ByteBuffer& input);
}

View file

@ -2810,7 +2810,7 @@ void HTMLParser::handle_in_frameset(HTMLToken& token)
}
if (token.is_end_of_file()) {
//FIXME: If the current node is not the root html element, then this is a parse error.
// FIXME: If the current node is not the root html element, then this is a parse error.
stop_parsing();
return;
@ -3162,7 +3162,7 @@ NonnullOwnPtr<HTMLParser> HTMLParser::create_with_uncertain_encoding(DOM::Docume
{
if (document.has_encoding())
return make<HTMLParser>(document, input, document.encoding().value());
auto encoding = run_encoding_sniffing_algorithm(input);
auto encoding = run_encoding_sniffing_algorithm(document, input);
dbgln("The encoding sniffing algorithm returned encoding '{}'", encoding);
return make<HTMLParser>(document, input, encoding);
}