diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp index 404237f19a..bbf61d4c26 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp @@ -230,6 +230,11 @@ _StartOfFunction: { SWITCH_TO(TagOpen); } + ON(0) + { + PARSE_ERROR(); + EMIT_CURRENT_CHARACTER; + } ON_EOF { EMIT_EOF; @@ -257,12 +262,20 @@ _StartOfFunction: RECONSUME_IN(TagName); } ON('?') + { + PARSE_ERROR(); + create_new_token(HTMLToken::Type::Comment); + RECONSUME_IN(BogusComment); + } + ON_EOF { TODO(); } ANYTHING_ELSE { - TODO(); + PARSE_ERROR(); + EMIT_CHARACTER('<'); + RECONSUME_IN(Data); } } END_STATE @@ -281,6 +294,22 @@ _StartOfFunction: { SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } + ON_ASCII_UPPER_ALPHA + { + m_current_token.m_tag.tag_name.append(tolower(current_input_character.value())); + continue; + } + ON(0) + { + PARSE_ERROR(); + m_current_token.m_tag.tag_name.append("\uFFFD"); + continue; + } + ON_EOF + { + PARSE_ERROR(); + EMIT_EOF; + } ANYTHING_ELSE { m_current_token.m_tag.tag_name.append(current_input_character.value()); @@ -296,6 +325,23 @@ _StartOfFunction: create_new_token(HTMLToken::Type::EndTag); RECONSUME_IN(TagName); } + ON('>') + { + PARSE_ERROR(); + SWITCH_TO(Data); + } + ON_EOF + { + PARSE_ERROR(); + // FIXME: Emit a U+003C LESS-THAN SIGN character token, a U+002F SOLIDUS character token and an end-of-file token. + continue; + } + ANYTHING_ELSE + { + PARSE_ERROR(); + create_new_token(HTMLToken::Type::Comment); + RECONSUME_IN(BogusComment); + } } END_STATE @@ -312,6 +358,27 @@ _StartOfFunction: } END_STATE + BEGIN_STATE(BogusComment) + { + ON('>') + { + TODO(); + } + ON_EOF + { + TODO(); + } + ON(0) + { + TODO(); + } + ANYTHING_ELSE + { + TODO(); + } + } + END_STATE + BEGIN_STATE(DOCTYPE) { ON_WHITESPACE @@ -1415,7 +1482,8 @@ _StartOfFunction: } ON(0) { - TODO(); + PARSE_ERROR(); + EMIT_CHARACTER("\uFFFD"); } ON_EOF { @@ -1462,11 +1530,19 @@ _StartOfFunction: { ON_WHITESPACE { - TODO(); + if (!current_end_tag_token_is_appropriate()) { + // FIXME: Otherwise, treat it as per the "anything else" entry below. + TODO(); + } + SWITCH_TO(BeforeAttributeName); } ON('/') { - TODO(); + if (!current_end_tag_token_is_appropriate()) { + // FIXME: Otherwise, treat it as per the "anything else" entry below. + TODO(); + } + SWITCH_TO(SelfClosingStartTag); } ON('>') { @@ -1503,7 +1579,8 @@ _StartOfFunction: } ON(0) { - TODO(); + PARSE_ERROR(); + EMIT_CHARACTER("\uFFFD"); } ON_EOF { @@ -1550,11 +1627,19 @@ _StartOfFunction: { ON_WHITESPACE { - TODO(); + if (!current_end_tag_token_is_appropriate()) { + // FIXME: Otherwise, treat it as per the "anything else" entry below. + TODO(); + } + SWITCH_TO(BeforeAttributeName); } ON('/') { - TODO(); + if (!current_end_tag_token_is_appropriate()) { + // FIXME: Otherwise, treat it as per the "anything else" entry below. + TODO(); + } + SWITCH_TO(SelfClosingStartTag); } ON('>') { @@ -1591,7 +1676,26 @@ _StartOfFunction: } ON(0) { - TODO(); + PARSE_ERROR(); + EMIT_CHARACTER("\uFFFD"); + } + ON_EOF + { + EMIT_EOF; + } + ANYTHING_ELSE + { + EMIT_CURRENT_CHARACTER; + } + } + END_STATE + + BEGIN_STATE(PLAINTEXT) + { + ON(0) + { + PARSE_ERROR(); + EMIT_CHARACTER("\uFFFD"); } ON_EOF {