diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index d39dfcfc40..d5c650689b 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -5,6 +5,7 @@ */ #include +#include #include #include #include @@ -16,15 +17,6 @@ namespace Web::HTML { #pragma GCC diagnostic ignored "-Wunused-label" -#if TOKENIZER_TRACE_DEBUG -# define PARSE_ERROR() \ - do { \ - dbgln("Parse error (tokenization) {} @ {}", __PRETTY_FUNCTION__, __LINE__); \ - } while (0) -#else -# define PARSE_ERROR() -#endif - #define CONSUME_NEXT_INPUT_CHARACTER \ current_input_character = next_code_point(); @@ -175,6 +167,11 @@ namespace Web::HTML { } \ } +static inline void log_parse_error(const SourceLocation& location = SourceLocation::current()) +{ + dbgln_if(TOKENIZER_TRACE_DEBUG, "Parse error (tokenization) {}", location); +} + static inline bool is_surrogate(u32 code_point) { return (code_point & 0xfffff800) == 0xd800; @@ -237,7 +234,7 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); EMIT_CURRENT_CHARACTER; } ON_EOF @@ -268,19 +265,19 @@ _StartOfFunction: } ON('?') { - PARSE_ERROR(); + log_parse_error(); create_new_token(HTMLToken::Type::Comment); RECONSUME_IN(BogusComment); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_queued_tokens.enqueue(HTMLToken::make_character('<')); EMIT_EOF; } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); EMIT_CHARACTER_AND_RECONSUME_IN('<', Data); } } @@ -307,13 +304,13 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_tag.tag_name.append_code_point(0xFFFD); continue; } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE @@ -333,19 +330,19 @@ _StartOfFunction: } ON('>') { - PARSE_ERROR(); + log_parse_error(); SWITCH_TO(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_queued_tokens.enqueue(HTMLToken::make_character('<')); m_queued_tokens.enqueue(HTMLToken::make_character('/')); EMIT_EOF; } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); create_new_token(HTMLToken::Type::Comment); RECONSUME_IN(BogusComment); } @@ -367,7 +364,7 @@ _StartOfFunction: } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); create_new_token(HTMLToken::Type::Comment); SWITCH_TO(BogusComment); } @@ -387,7 +384,7 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_comment_or_character.data.append_code_point(0xFFFD); continue; } @@ -411,7 +408,7 @@ _StartOfFunction: } ON_EOF { - PARSE_ERROR(); + log_parse_error(); create_new_token(HTMLToken::Type::DOCTYPE); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); @@ -419,7 +416,7 @@ _StartOfFunction: } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); RECONSUME_IN(BeforeDOCTYPEName); } } @@ -440,7 +437,7 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); create_new_token(HTMLToken::Type::DOCTYPE); m_current_token.m_doctype.name.append_code_point(0xFFFD); m_current_token.m_doctype.missing_name = false; @@ -448,14 +445,14 @@ _StartOfFunction: } ON('>') { - PARSE_ERROR(); + log_parse_error(); create_new_token(HTMLToken::Type::DOCTYPE); m_current_token.m_doctype.force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); create_new_token(HTMLToken::Type::DOCTYPE); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); @@ -488,13 +485,13 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.name.append_code_point(0xFFFD); continue; } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; @@ -519,7 +516,7 @@ _StartOfFunction: } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; @@ -532,7 +529,7 @@ _StartOfFunction: if (toupper(current_input_character.value()) == 'S' && consume_next_if_match("YSTEM", CaseSensitivity::CaseInsensitive)) { SWITCH_TO(AfterDOCTYPESystemKeyword); } - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } @@ -547,34 +544,34 @@ _StartOfFunction: } ON('"') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.public_identifier.clear(); m_current_token.m_doctype.missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted); } ON('\'') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.public_identifier.clear(); m_current_token.m_doctype.missing_public_identifier = false; SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted); } ON('>') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } @@ -589,34 +586,34 @@ _StartOfFunction: } ON('"') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.system_identifier.clear(); m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.system_identifier.clear(); m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON('>') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } @@ -643,20 +640,20 @@ _StartOfFunction: } ON('>') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } @@ -683,20 +680,20 @@ _StartOfFunction: } ON('>') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } @@ -711,19 +708,19 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.public_identifier.append_code_point(0xFFFD); continue; } ON('>') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; @@ -744,19 +741,19 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.public_identifier.append_code_point(0xFFFD); continue; } ON('>') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; @@ -777,19 +774,19 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.system_identifier.append_code_point(0xFFFD); continue; } ON('>') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; @@ -810,19 +807,19 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.system_identifier.append_code_point(0xFFFD); continue; } ON('>') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; @@ -847,28 +844,28 @@ _StartOfFunction: } ON('"') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.system_identifier.clear(); m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted); } ON('\'') { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.system_identifier.clear(); m_current_token.m_doctype.missing_system_identifier = false; SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } @@ -899,14 +896,14 @@ _StartOfFunction: } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; RECONSUME_IN(BogusDOCTYPE); } @@ -925,14 +922,14 @@ _StartOfFunction: } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_doctype.force_quirks = true; m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); RECONSUME_IN(BogusDOCTYPE); } } @@ -946,7 +943,7 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); continue; } ON_EOF @@ -981,7 +978,7 @@ _StartOfFunction: } ON('=') { - PARSE_ERROR(); + log_parse_error(); auto new_attribute = HTMLToken::AttributeBuilder(); new_attribute.local_name_builder.append_code_point(current_input_character.value()); m_current_token.m_tag.attributes.append(new_attribute); @@ -1004,12 +1001,12 @@ _StartOfFunction: } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); RECONSUME_IN(BeforeAttributeName); } } @@ -1044,23 +1041,23 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_tag.attributes.last().local_name_builder.append_code_point(0xFFFD); continue; } ON('"') { - PARSE_ERROR(); + log_parse_error(); goto AnythingElseAttributeName; } ON('\'') { - PARSE_ERROR(); + log_parse_error(); goto AnythingElseAttributeName; } ON('<') { - PARSE_ERROR(); + log_parse_error(); goto AnythingElseAttributeName; } ANYTHING_ELSE @@ -1092,7 +1089,7 @@ _StartOfFunction: } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE @@ -1119,7 +1116,7 @@ _StartOfFunction: } ON('>') { - PARSE_ERROR(); + log_parse_error(); SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ANYTHING_ELSE @@ -1142,13 +1139,13 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_tag.attributes.last().value_builder.append_code_point(0xFFFD); continue; } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE @@ -1172,13 +1169,13 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_tag.attributes.last().value_builder.append_code_point(0xFFFD); continue; } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE @@ -1206,38 +1203,38 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_tag.attributes.last().value_builder.append_code_point(0xFFFD); continue; } ON('"') { - PARSE_ERROR(); + log_parse_error(); goto AnythingElseAttributeValueUnquoted; } ON('\'') { - PARSE_ERROR(); + log_parse_error(); goto AnythingElseAttributeValueUnquoted; } ON('<') { - PARSE_ERROR(); + log_parse_error(); goto AnythingElseAttributeValueUnquoted; } ON('=') { - PARSE_ERROR(); + log_parse_error(); goto AnythingElseAttributeValueUnquoted; } ON('`') { - PARSE_ERROR(); + log_parse_error(); goto AnythingElseAttributeValueUnquoted; } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE @@ -1265,12 +1262,12 @@ _StartOfFunction: } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); RECONSUME_IN(BeforeAttributeName); } } @@ -1284,7 +1281,7 @@ _StartOfFunction: } ON('>') { - PARSE_ERROR(); + log_parse_error(); SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ANYTHING_ELSE @@ -1302,12 +1299,12 @@ _StartOfFunction: } ON('>') { - PARSE_ERROR(); + log_parse_error(); SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } @@ -1332,13 +1329,13 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); m_current_token.m_comment_or_character.data.append_code_point(0xFFFD); continue; } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } @@ -1367,7 +1364,7 @@ _StartOfFunction: } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } @@ -1388,12 +1385,12 @@ _StartOfFunction: } ON('>') { - PARSE_ERROR(); + log_parse_error(); SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } @@ -1413,7 +1410,7 @@ _StartOfFunction: } ON_EOF { - PARSE_ERROR(); + log_parse_error(); m_queued_tokens.enqueue(m_current_token); EMIT_EOF; } @@ -1482,7 +1479,7 @@ _StartOfFunction: } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); RECONSUME_IN(CommentEnd); } } @@ -1533,7 +1530,7 @@ _StartOfFunction: } if (!match.value().entity.ends_with(';')) { - PARSE_ERROR(); + log_parse_error(); } m_temporary_buffer.clear(); @@ -1563,7 +1560,7 @@ _StartOfFunction: } ON(';') { - PARSE_ERROR(); + log_parse_error(); RECONSUME_IN_RETURN_STATE; } ANYTHING_ELSE @@ -1602,7 +1599,7 @@ _StartOfFunction: } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; RECONSUME_IN_RETURN_STATE; } @@ -1617,7 +1614,7 @@ _StartOfFunction: } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; RECONSUME_IN_RETURN_STATE; } @@ -1650,7 +1647,7 @@ _StartOfFunction: } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); RECONSUME_IN(NumericCharacterReferenceEnd); } } @@ -1670,7 +1667,7 @@ _StartOfFunction: } ANYTHING_ELSE { - PARSE_ERROR(); + log_parse_error(); RECONSUME_IN(NumericCharacterReferenceEnd); } } @@ -1681,22 +1678,22 @@ _StartOfFunction: DONT_CONSUME_NEXT_INPUT_CHARACTER; if (m_character_reference_code == 0) { - PARSE_ERROR(); + log_parse_error(); m_character_reference_code = 0xFFFD; } if (m_character_reference_code > 0x10ffff) { - PARSE_ERROR(); + log_parse_error(); m_character_reference_code = 0xFFFD; } if (is_surrogate(m_character_reference_code)) { - PARSE_ERROR(); + log_parse_error(); m_character_reference_code = 0xFFFD; } if (is_noncharacter(m_character_reference_code)) { - PARSE_ERROR(); + log_parse_error(); } if (m_character_reference_code == 0xd || (is_control(m_character_reference_code) && !isspace(m_character_reference_code))) { - PARSE_ERROR(); + log_parse_error(); constexpr struct { u32 number; u32 code_point; @@ -1757,7 +1754,7 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); EMIT_CHARACTER(0xFFFD); } ON_EOF @@ -1867,7 +1864,7 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); EMIT_CHARACTER(0xFFFD); } ON_EOF @@ -1977,7 +1974,7 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); EMIT_CHARACTER(0xFFFD); } ON_EOF @@ -1995,7 +1992,7 @@ _StartOfFunction: { ON(0) { - PARSE_ERROR(); + log_parse_error(); EMIT_CHARACTER(0xFFFD); } ON_EOF @@ -2071,12 +2068,12 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); SWITCH_TO_AND_EMIT_CHARACTER(0xFFFD, ScriptDataEscaped); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE @@ -2242,12 +2239,12 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); EMIT_CHARACTER(0xFFFD); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE @@ -2269,12 +2266,12 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); SWITCH_TO_AND_EMIT_CHARACTER(0xFFFD, ScriptDataDoubleEscaped); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE @@ -2300,12 +2297,12 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); SWITCH_TO_AND_EMIT_CHARACTER(0xFFFD, ScriptDataDoubleEscaped); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE @@ -2388,12 +2385,12 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); SWITCH_TO_AND_EMIT_CHARACTER(0xFFFD, ScriptDataEscaped); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE @@ -2415,12 +2412,12 @@ _StartOfFunction: } ON(0) { - PARSE_ERROR(); + log_parse_error(); EMIT_CHARACTER(0xFFFD); } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE @@ -2509,7 +2506,7 @@ _StartOfFunction: } ON_EOF { - PARSE_ERROR(); + log_parse_error(); EMIT_EOF; } ANYTHING_ELSE