1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 12:28:12 +00:00

LibWeb: Switch HTMLToken and HTMLTokenizer to String & FlyString

This commit is contained in:
Andreas Kling 2023-11-04 10:08:07 +01:00
parent 0902f552a3
commit b341aeb5c1
4 changed files with 89 additions and 86 deletions

View file

@ -454,29 +454,29 @@ DOM::QuirksMode HTMLParser::which_quirks_mode(HTMLToken const& doctype_token) co
return DOM::QuirksMode::Yes; return DOM::QuirksMode::Yes;
for (auto& public_id : s_quirks_public_ids) { for (auto& public_id : s_quirks_public_ids) {
if (public_identifier.starts_with(public_id, CaseSensitivity::CaseInsensitive)) if (public_identifier.starts_with_bytes(public_id, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Yes; return DOM::QuirksMode::Yes;
} }
if (doctype_token.doctype_data().missing_system_identifier) { if (doctype_token.doctype_data().missing_system_identifier) {
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive)) if (public_identifier.starts_with_bytes("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Yes; return DOM::QuirksMode::Yes;
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive)) if (public_identifier.starts_with_bytes("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Yes; return DOM::QuirksMode::Yes;
} }
if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Frameset//"sv, CaseSensitivity::CaseInsensitive)) if (public_identifier.starts_with_bytes("-//W3C//DTD XHTML 1.0 Frameset//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited; return DOM::QuirksMode::Limited;
if (public_identifier.starts_with("-//W3C//DTD XHTML 1.0 Transitional//"sv, CaseSensitivity::CaseInsensitive)) if (public_identifier.starts_with_bytes("-//W3C//DTD XHTML 1.0 Transitional//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited; return DOM::QuirksMode::Limited;
if (!doctype_token.doctype_data().missing_system_identifier) { if (!doctype_token.doctype_data().missing_system_identifier) {
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive)) if (public_identifier.starts_with_bytes("-//W3C//DTD HTML 4.01 Frameset//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited; return DOM::QuirksMode::Limited;
if (public_identifier.starts_with("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive)) if (public_identifier.starts_with_bytes("-//W3C//DTD HTML 4.01 Transitional//"sv, CaseSensitivity::CaseInsensitive))
return DOM::QuirksMode::Limited; return DOM::QuirksMode::Limited;
} }
@ -490,16 +490,16 @@ void HTMLParser::handle_initial(HTMLToken& token)
} }
if (token.is_comment()) { if (token.is_comment()) {
auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), MUST(String::from_deprecated_string(token.comment()))); auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment());
MUST(document().append_child(*comment)); MUST(document().append_child(*comment));
return; return;
} }
if (token.is_doctype()) { if (token.is_doctype()) {
auto doctype = realm().heap().allocate<DOM::DocumentType>(realm(), document()); auto doctype = realm().heap().allocate<DOM::DocumentType>(realm(), document());
doctype->set_name(String::from_deprecated_string(token.doctype_data().name).release_value()); doctype->set_name(token.doctype_data().name);
doctype->set_public_id(String::from_deprecated_string(token.doctype_data().public_identifier).release_value()); doctype->set_public_id(token.doctype_data().public_identifier);
doctype->set_system_id(String::from_deprecated_string(token.doctype_data().system_identifier).release_value()); doctype->set_system_id(token.doctype_data().system_identifier);
MUST(document().append_child(*doctype)); MUST(document().append_child(*doctype));
document().set_quirks_mode(which_quirks_mode(token)); document().set_quirks_mode(which_quirks_mode(token));
m_insertion_mode = InsertionMode::BeforeHTML; m_insertion_mode = InsertionMode::BeforeHTML;
@ -525,7 +525,7 @@ void HTMLParser::handle_before_html(HTMLToken& token)
// -> A comment token // -> A comment token
if (token.is_comment()) { if (token.is_comment()) {
// Insert a comment as the last child of the Document object. // Insert a comment as the last child of the Document object.
auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), MUST(String::from_deprecated_string(token.comment()))); auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment());
MUST(document().append_child(*comment)); MUST(document().append_child(*comment));
return; return;
} }
@ -822,7 +822,7 @@ AnythingElse:
void HTMLParser::insert_comment(HTMLToken& token) void HTMLParser::insert_comment(HTMLToken& token)
{ {
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
adjusted_insertion_location.parent->insert_before(realm().heap().allocate<DOM::Comment>(realm(), document(), MUST(String::from_deprecated_string(token.comment()))), adjusted_insertion_location.insert_before_sibling); adjusted_insertion_location.parent->insert_before(realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment()), adjusted_insertion_location.insert_before_sibling);
} }
void HTMLParser::handle_in_head(HTMLToken& token) void HTMLParser::handle_in_head(HTMLToken& token)
@ -1142,7 +1142,7 @@ void HTMLParser::handle_after_body(HTMLToken& token)
if (token.is_comment()) { if (token.is_comment()) {
auto& insertion_location = m_stack_of_open_elements.first(); auto& insertion_location = m_stack_of_open_elements.first();
MUST(insertion_location.append_child(realm().heap().allocate<DOM::Comment>(realm(), document(), MUST(String::from_deprecated_string(token.comment()))))); MUST(insertion_location.append_child(realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment())));
return; return;
} }
@ -1178,7 +1178,7 @@ void HTMLParser::handle_after_body(HTMLToken& token)
void HTMLParser::handle_after_after_body(HTMLToken& token) void HTMLParser::handle_after_after_body(HTMLToken& token)
{ {
if (token.is_comment()) { if (token.is_comment()) {
auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), MUST(String::from_deprecated_string(token.comment()))); auto comment = realm().heap().allocate<DOM::Comment>(realm(), document(), token.comment());
MUST(document().append_child(*comment)); MUST(document().append_child(*comment));
return; return;
} }
@ -2025,7 +2025,7 @@ void HTMLParser::handle_in_body(HTMLToken& token)
if (token.is_start_tag() && token.tag_name() == HTML::TagNames::image) { if (token.is_start_tag() && token.tag_name() == HTML::TagNames::image) {
// Parse error. Change the token's tag name to HTML::TagNames::img and reprocess it. (Don't ask.) // Parse error. Change the token's tag name to HTML::TagNames::img and reprocess it. (Don't ask.)
log_parse_error(); log_parse_error();
token.set_tag_name("img"); token.set_tag_name("img"_fly_string);
process_using_the_rules_for(m_insertion_mode, token); process_using_the_rules_for(m_insertion_mode, token);
return; return;
} }
@ -2194,41 +2194,41 @@ void HTMLParser::adjust_mathml_attributes(HTMLToken& token)
void HTMLParser::adjust_svg_tag_names(HTMLToken& token) void HTMLParser::adjust_svg_tag_names(HTMLToken& token)
{ {
token.adjust_tag_name("altglyph", "altGlyph"); token.adjust_tag_name("altglyph"_fly_string, "altGlyph"_fly_string);
token.adjust_tag_name("altglyphdef", "altGlyphDef"); token.adjust_tag_name("altglyphdef"_fly_string, "altGlyphDef"_fly_string);
token.adjust_tag_name("altglyphitem", "altGlyphItem"); token.adjust_tag_name("altglyphitem"_fly_string, "altGlyphItem"_fly_string);
token.adjust_tag_name("animatecolor", "animateColor"); token.adjust_tag_name("animatecolor"_fly_string, "animateColor"_fly_string);
token.adjust_tag_name("animatemotion", "animateMotion"); token.adjust_tag_name("animatemotion"_fly_string, "animateMotion"_fly_string);
token.adjust_tag_name("animatetransform", "animateTransform"); token.adjust_tag_name("animatetransform"_fly_string, "animateTransform"_fly_string);
token.adjust_tag_name("clippath", "clipPath"); token.adjust_tag_name("clippath"_fly_string, "clipPath"_fly_string);
token.adjust_tag_name("feblend", "feBlend"); token.adjust_tag_name("feblend"_fly_string, "feBlend"_fly_string);
token.adjust_tag_name("fecolormatrix", "feColorMatrix"); token.adjust_tag_name("fecolormatrix"_fly_string, "feColorMatrix"_fly_string);
token.adjust_tag_name("fecomponenttransfer", "feComponentTransfer"); token.adjust_tag_name("fecomponenttransfer"_fly_string, "feComponentTransfer"_fly_string);
token.adjust_tag_name("fecomposite", "feComposite"); token.adjust_tag_name("fecomposite"_fly_string, "feComposite"_fly_string);
token.adjust_tag_name("feconvolvematrix", "feConvolveMatrix"); token.adjust_tag_name("feconvolvematrix"_fly_string, "feConvolveMatrix"_fly_string);
token.adjust_tag_name("fediffuselighting", "feDiffuseLighting"); token.adjust_tag_name("fediffuselighting"_fly_string, "feDiffuseLighting"_fly_string);
token.adjust_tag_name("fedisplacementmap", "feDisplacementMap"); token.adjust_tag_name("fedisplacementmap"_fly_string, "feDisplacementMap"_fly_string);
token.adjust_tag_name("fedistantlight", "feDistantLight"); token.adjust_tag_name("fedistantlight"_fly_string, "feDistantLight"_fly_string);
token.adjust_tag_name("fedropshadow", "feDropShadow"); token.adjust_tag_name("fedropshadow"_fly_string, "feDropShadow"_fly_string);
token.adjust_tag_name("feflood", "feFlood"); token.adjust_tag_name("feflood"_fly_string, "feFlood"_fly_string);
token.adjust_tag_name("fefunca", "feFuncA"); token.adjust_tag_name("fefunca"_fly_string, "feFuncA"_fly_string);
token.adjust_tag_name("fefuncb", "feFuncB"); token.adjust_tag_name("fefuncb"_fly_string, "feFuncB"_fly_string);
token.adjust_tag_name("fefuncg", "feFuncG"); token.adjust_tag_name("fefuncg"_fly_string, "feFuncG"_fly_string);
token.adjust_tag_name("fefuncr", "feFuncR"); token.adjust_tag_name("fefuncr"_fly_string, "feFuncR"_fly_string);
token.adjust_tag_name("fegaussianblur", "feGaussianBlur"); token.adjust_tag_name("fegaussianblur"_fly_string, "feGaussianBlur"_fly_string);
token.adjust_tag_name("feimage", "feImage"); token.adjust_tag_name("feimage"_fly_string, "feImage"_fly_string);
token.adjust_tag_name("femerge", "feMerge"); token.adjust_tag_name("femerge"_fly_string, "feMerge"_fly_string);
token.adjust_tag_name("femergenode", "feMergeNode"); token.adjust_tag_name("femergenode"_fly_string, "feMergeNode"_fly_string);
token.adjust_tag_name("femorphology", "feMorphology"); token.adjust_tag_name("femorphology"_fly_string, "feMorphology"_fly_string);
token.adjust_tag_name("feoffset", "feOffset"); token.adjust_tag_name("feoffset"_fly_string, "feOffset"_fly_string);
token.adjust_tag_name("fepointlight", "fePointLight"); token.adjust_tag_name("fepointlight"_fly_string, "fePointLight"_fly_string);
token.adjust_tag_name("fespecularlighting", "feSpecularLighting"); token.adjust_tag_name("fespecularlighting"_fly_string, "feSpecularLighting"_fly_string);
token.adjust_tag_name("fespotlight", "feSpotlight"); token.adjust_tag_name("fespotlight"_fly_string, "feSpotlight"_fly_string);
token.adjust_tag_name("foreignobject", "foreignObject"); token.adjust_tag_name("foreignobject"_fly_string, "foreignObject"_fly_string);
token.adjust_tag_name("glyphref", "glyphRef"); token.adjust_tag_name("glyphref"_fly_string, "glyphRef"_fly_string);
token.adjust_tag_name("lineargradient", "linearGradient"); token.adjust_tag_name("lineargradient"_fly_string, "linearGradient"_fly_string);
token.adjust_tag_name("radialgradient", "radialGradient"); token.adjust_tag_name("radialgradient"_fly_string, "radialGradient"_fly_string);
token.adjust_tag_name("textpath", "textPath"); token.adjust_tag_name("textpath"_fly_string, "textPath"_fly_string);
} }
void HTMLParser::adjust_svg_attributes(HTMLToken& token) void HTMLParser::adjust_svg_attributes(HTMLToken& token)
@ -3407,7 +3407,7 @@ void HTMLParser::handle_after_frameset(HTMLToken& token)
void HTMLParser::handle_after_after_frameset(HTMLToken& token) void HTMLParser::handle_after_after_frameset(HTMLToken& token)
{ {
if (token.is_comment()) { if (token.is_comment()) {
auto comment = document().heap().allocate<DOM::Comment>(document().realm(), document(), MUST(String::from_deprecated_string(token.comment()))); auto comment = document().heap().allocate<DOM::Comment>(document().realm(), document(), token.comment());
MUST(document().append_child(comment)); MUST(document().append_child(comment));
return; return;
} }

View file

@ -54,9 +54,9 @@ public:
struct DoctypeData { struct DoctypeData {
// NOTE: "Missing" is a distinct state from the empty string. // NOTE: "Missing" is a distinct state from the empty string.
DeprecatedString name; String name;
DeprecatedString public_identifier; String public_identifier;
DeprecatedString system_identifier; String system_identifier;
bool missing_name { true }; bool missing_name { true };
bool missing_public_identifier { true }; bool missing_public_identifier { true };
bool missing_system_identifier { true }; bool missing_system_identifier { true };
@ -73,7 +73,7 @@ public:
static HTMLToken make_start_tag(FlyString const& tag_name) static HTMLToken make_start_tag(FlyString const& tag_name)
{ {
HTMLToken token { Type::StartTag }; HTMLToken token { Type::StartTag };
token.set_tag_name(tag_name.to_deprecated_fly_string()); token.set_tag_name(tag_name);
return token; return token;
} }
@ -134,25 +134,25 @@ public:
m_data.get<u32>() = code_point; m_data.get<u32>() = code_point;
} }
DeprecatedFlyString const& comment() const String const& comment() const
{ {
VERIFY(is_comment()); VERIFY(is_comment());
return m_comment_data;
}
void set_comment(String comment)
{
VERIFY(is_comment());
m_comment_data = move(comment);
}
FlyString const& tag_name() const
{
VERIFY(is_start_tag() || is_end_tag());
return m_string_data; return m_string_data;
} }
void set_comment(DeprecatedString comment) void set_tag_name(FlyString name)
{
VERIFY(is_comment());
m_string_data = move(comment);
}
FlyString tag_name() const
{
VERIFY(is_start_tag() || is_end_tag());
return MUST(FlyString::from_deprecated_fly_string(m_string_data));
}
void set_tag_name(DeprecatedString name)
{ {
VERIFY(is_start_tag() || is_end_tag()); VERIFY(is_start_tag() || is_end_tag());
m_string_data = move(name); m_string_data = move(name);
@ -273,10 +273,10 @@ public:
return !attribute(attribute_name).is_null(); return !attribute(attribute_name).is_null();
} }
void adjust_tag_name(DeprecatedFlyString const& old_name, DeprecatedFlyString const& new_name) void adjust_tag_name(FlyString const& old_name, FlyString const& new_name)
{ {
VERIFY(is_start_tag() || is_end_tag()); VERIFY(is_start_tag() || is_end_tag());
if (old_name == tag_name().to_deprecated_fly_string()) if (old_name == tag_name())
set_tag_name(new_name); set_tag_name(new_name);
} }
@ -356,8 +356,11 @@ private:
bool m_tag_self_closing { false }; bool m_tag_self_closing { false };
bool m_tag_self_closing_acknowledged { false }; bool m_tag_self_closing_acknowledged { false };
// Type::Comment (comment data), Type::StartTag and Type::EndTag (tag name) // Type::StartTag and Type::EndTag (tag name)
DeprecatedFlyString m_string_data; FlyString m_string_data;
// Type::Comment (comment data)
String m_comment_data;
Variant<Empty, u32, OwnPtr<DoctypeData>, OwnPtr<Vector<Attribute>>> m_data {}; Variant<Empty, u32, OwnPtr<DoctypeData>, OwnPtr<Vector<Attribute>>> m_data {};

View file

@ -1108,31 +1108,31 @@ _StartOfFunction:
ON_WHITESPACE ON_WHITESPACE
{ {
m_current_token.last_attribute().name_end_position = nth_last_position(1); m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = MUST(FlyString::from_deprecated_fly_string(consume_current_builder())); m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName); RECONSUME_IN(AfterAttributeName);
} }
ON('/') ON('/')
{ {
m_current_token.last_attribute().name_end_position = nth_last_position(1); m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = MUST(FlyString::from_deprecated_fly_string(consume_current_builder())); m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName); RECONSUME_IN(AfterAttributeName);
} }
ON('>') ON('>')
{ {
m_current_token.last_attribute().name_end_position = nth_last_position(1); m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = MUST(FlyString::from_deprecated_fly_string(consume_current_builder())); m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName); RECONSUME_IN(AfterAttributeName);
} }
ON_EOF ON_EOF
{ {
m_current_token.last_attribute().name_end_position = nth_last_position(1); m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = MUST(FlyString::from_deprecated_fly_string(consume_current_builder())); m_current_token.last_attribute().local_name = consume_current_builder();
RECONSUME_IN(AfterAttributeName); RECONSUME_IN(AfterAttributeName);
} }
ON('=') ON('=')
{ {
m_current_token.last_attribute().name_end_position = nth_last_position(1); m_current_token.last_attribute().name_end_position = nth_last_position(1);
m_current_token.last_attribute().local_name = MUST(FlyString::from_deprecated_fly_string(consume_current_builder())); m_current_token.last_attribute().local_name = consume_current_builder();
SWITCH_TO(BeforeAttributeValue); SWITCH_TO(BeforeAttributeValue);
} }
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
@ -1238,7 +1238,7 @@ _StartOfFunction:
{ {
ON('"') ON('"')
{ {
m_current_token.last_attribute().value = MUST(String::from_deprecated_string(consume_current_builder())); m_current_token.last_attribute().value = consume_current_builder();
SWITCH_TO(AfterAttributeValueQuoted); SWITCH_TO(AfterAttributeValueQuoted);
} }
ON('&') ON('&')
@ -1270,7 +1270,7 @@ _StartOfFunction:
{ {
ON('\'') ON('\'')
{ {
m_current_token.last_attribute().value = MUST(String::from_deprecated_string(consume_current_builder())); m_current_token.last_attribute().value = consume_current_builder();
SWITCH_TO(AfterAttributeValueQuoted); SWITCH_TO(AfterAttributeValueQuoted);
} }
ON('&') ON('&')
@ -1302,7 +1302,7 @@ _StartOfFunction:
{ {
ON_WHITESPACE ON_WHITESPACE
{ {
m_current_token.last_attribute().value = MUST(String::from_deprecated_string(consume_current_builder())); m_current_token.last_attribute().value = consume_current_builder();
m_current_token.last_attribute().value_end_position = nth_last_position(1); m_current_token.last_attribute().value_end_position = nth_last_position(1);
SWITCH_TO(BeforeAttributeName); SWITCH_TO(BeforeAttributeName);
} }
@ -1313,7 +1313,7 @@ _StartOfFunction:
} }
ON('>') ON('>')
{ {
m_current_token.last_attribute().value = MUST(String::from_deprecated_string(consume_current_builder())); m_current_token.last_attribute().value = consume_current_builder();
m_current_token.last_attribute().value_end_position = nth_last_position(1); m_current_token.last_attribute().value_end_position = nth_last_position(1);
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
} }
@ -2879,9 +2879,9 @@ void HTMLTokenizer::restore_to(Utf8CodePointIterator const& new_iterator)
m_utf8_iterator = new_iterator; m_utf8_iterator = new_iterator;
} }
DeprecatedString HTMLTokenizer::consume_current_builder() String HTMLTokenizer::consume_current_builder()
{ {
auto string = m_current_builder.to_deprecated_string(); auto string = MUST(m_current_builder.to_string());
m_current_builder.clear(); m_current_builder.clear();
return string; return string;
} }

View file

@ -154,7 +154,7 @@ private:
bool consume_next_if_match(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive); bool consume_next_if_match(StringView, CaseSensitivity = CaseSensitivity::CaseSensitive);
void create_new_token(HTMLToken::Type); void create_new_token(HTMLToken::Type);
bool current_end_tag_token_is_appropriate() const; bool current_end_tag_token_is_appropriate() const;
DeprecatedString consume_current_builder(); String consume_current_builder();
static char const* state_name(State state) static char const* state_name(State state)
{ {