1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 04:08:11 +00:00

LibWeb: Fully implement all DOCTYPE tokenizer states

Also fixes TagOpen having a seperate emit and reconsume in
ANYTHING_ELSE.
This commit is contained in:
Luke 2020-06-11 05:00:45 +01:00 committed by Andreas Kling
parent ab1df177d8
commit 821312729a
2 changed files with 178 additions and 47 deletions

View file

@ -170,6 +170,7 @@ void HTMLDocumentParser::handle_initial(HTMLToken& token)
auto doctype = adopt(*new DocumentType(document())); auto doctype = adopt(*new DocumentType(document()));
doctype->set_name(token.m_doctype.name.to_string()); doctype->set_name(token.m_doctype.name.to_string());
document().append_child(move(doctype)); document().append_child(move(doctype));
document().set_quirks_mode(token.m_doctype.force_quirks);
m_insertion_mode = InsertionMode::BeforeHTML; m_insertion_mode = InsertionMode::BeforeHTML;
return; return;
} }

View file

@ -299,8 +299,7 @@ _StartOfFunction:
ANYTHING_ELSE ANYTHING_ELSE
{ {
PARSE_ERROR(); PARSE_ERROR();
EMIT_CHARACTER('<'); EMIT_CHARACTER_AND_RECONSUME_IN('<', Data);
RECONSUME_IN(Data);
} }
} }
END_STATE END_STATE
@ -429,11 +428,16 @@ _StartOfFunction:
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
TODO(); PARSE_ERROR();
RECONSUME_IN(BeforeDOCTYPEName);
} }
} }
END_STATE END_STATE
@ -452,15 +456,25 @@ _StartOfFunction:
} }
ON(0) ON(0)
{ {
TODO(); PARSE_ERROR();
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.name.append_codepoint(0xFFFD);
SWITCH_TO(DOCTYPEName);
} }
ON('>') ON('>')
{ {
TODO(); PARSE_ERROR();
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
create_new_token(HTMLToken::Type::DOCTYPE);
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
@ -484,14 +498,20 @@ _StartOfFunction:
ON_ASCII_UPPER_ALPHA ON_ASCII_UPPER_ALPHA
{ {
m_current_token.m_doctype.name.append(tolower(current_input_character.value())); m_current_token.m_doctype.name.append(tolower(current_input_character.value()));
continue;
} }
ON(0) ON(0)
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.name.append_codepoint(0xFFFD);
continue;
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
@ -513,7 +533,10 @@ _StartOfFunction:
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
@ -523,7 +546,9 @@ _StartOfFunction:
if (toupper(current_input_character.value()) == 'S' && consume_next_if_match("YSTEM", CaseSensitivity::CaseInsensitive)) { if (toupper(current_input_character.value()) == 'S' && consume_next_if_match("YSTEM", CaseSensitivity::CaseInsensitive)) {
SWITCH_TO(AfterDOCTYPESystemKeyword); SWITCH_TO(AfterDOCTYPESystemKeyword);
} }
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
} }
} }
END_STATE END_STATE
@ -536,23 +561,34 @@ _StartOfFunction:
} }
ON('"') ON('"')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.public_identifier.clear();
SWITCH_TO(DOCTYPEPublicIdentifierDoubleQuoted);
} }
ON('\'') ON('\'')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.public_identifier.clear();
SWITCH_TO(DOCTYPEPublicIdentifierSingleQuoted);
} }
ON('>') ON('>')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
} }
} }
END_STATE END_STATE
@ -565,23 +601,34 @@ _StartOfFunction:
} }
ON('"') ON('"')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.system_identifier.clear();
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
} }
ON('\'') ON('\'')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.system_identifier.clear();
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
} }
ON('>') ON('>')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
} }
} }
END_STATE END_STATE
@ -604,15 +651,22 @@ _StartOfFunction:
} }
ON('>') ON('>')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
} }
} }
END_STATE END_STATE
@ -635,15 +689,22 @@ _StartOfFunction:
} }
ON('>') ON('>')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
} }
} }
END_STATE END_STATE
@ -656,15 +717,22 @@ _StartOfFunction:
} }
ON(0) ON(0)
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.public_identifier.append_codepoint(0xFFFD);
continue;
} }
ON('>') ON('>')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
@ -682,15 +750,22 @@ _StartOfFunction:
} }
ON(0) ON(0)
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.public_identifier.append_codepoint(0xFFFD);
continue;
} }
ON('>') ON('>')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
@ -708,15 +783,22 @@ _StartOfFunction:
} }
ON(0) ON(0)
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.system_identifier.append_codepoint(0xFFFD);
continue;
} }
ON('>') ON('>')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
@ -734,15 +816,22 @@ _StartOfFunction:
} }
ON(0) ON(0)
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.system_identifier.append_codepoint(0xFFFD);
continue;
} }
ON('>') ON('>')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
@ -764,19 +853,28 @@ _StartOfFunction:
} }
ON('"') ON('"')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.system_identifier.clear();
SWITCH_TO(DOCTYPESystemIdentifierDoubleQuoted);
} }
ON('\'') ON('\'')
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.system_identifier.clear();
SWITCH_TO(DOCTYPESystemIdentifierSingleQuoted);
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
} }
} }
END_STATE END_STATE
@ -803,11 +901,16 @@ _StartOfFunction:
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
RECONSUME_IN(BogusDOCTYPE);
} }
} }
END_STATE END_STATE
@ -824,11 +927,38 @@ _StartOfFunction:
} }
ON_EOF ON_EOF
{ {
TODO(); PARSE_ERROR();
m_current_token.m_doctype.force_quirks = true;
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
} }
ANYTHING_ELSE ANYTHING_ELSE
{ {
TODO(); PARSE_ERROR();
RECONSUME_IN(BogusDOCTYPE);
}
}
END_STATE
BEGIN_STATE(BogusDOCTYPE)
{
ON('>')
{
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
}
ON(0)
{
PARSE_ERROR();
continue;
}
ON_EOF
{
m_queued_tokens.enqueue(m_current_token);
EMIT_EOF;
}
ANYTHING_ELSE
{
continue;
} }
} }
END_STATE END_STATE