diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
index 12e3f3a48e..dbbf83b802 100644
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
@@ -168,6 +168,16 @@
#define EMIT_CURRENT_CHARACTER \
EMIT_CHARACTER(current_input_character.value());
+#define SWITCH_TO_AND_EMIT_CHARACTER(codepoint, new_state) \
+ do { \
+ will_switch_to(State::new_state); \
+ m_state = State::new_state; \
+ EMIT_CHARACTER(codepoint); \
+ } while (0)
+
+#define SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(new_state) \
+ SWITCH_TO_AND_EMIT_CHARACTER(current_input_character.value(), new_state)
+
#define BEGIN_STATE(state) \
state: \
case State::state: { \
@@ -1560,7 +1570,6 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
- // FIXME: Emit a U+003C LESS-THAN SIGN character token and a U+002F SOLIDUS character token. Reconsume in the RCDATA state.
m_queued_tokens.enqueue(HTMLToken::make_character('<'));
m_queued_tokens.enqueue(HTMLToken::make_character('/'));
RECONSUME_IN(RCDATA);
@@ -1657,8 +1666,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
- EMIT_CHARACTER('<');
- RECONSUME_IN(RAWTEXT);
+ EMIT_CHARACTER_AND_RECONSUME_IN('<', RAWTEXT);
}
}
END_STATE
@@ -1801,8 +1809,7 @@ _StartOfFunction:
{
ON('-')
{
- m_queued_tokens.enqueue(HTMLToken::make_character('-'));
- SWITCH_TO(ScriptDataEscapeStartDash);
+ SWITCH_TO_AND_EMIT_CHARACTER('-', ScriptDataEscapeStartDash);
}
ANYTHING_ELSE
{
@@ -1815,8 +1822,7 @@ _StartOfFunction:
{
ON('-')
{
- m_queued_tokens.enqueue(HTMLToken::make_character('-'));
- SWITCH_TO(ScriptDataEscapedDashDash);
+ SWITCH_TO_AND_EMIT_CHARACTER('-', ScriptDataEscapedDashDash);
}
ANYTHING_ELSE
{
@@ -1837,20 +1843,21 @@ _StartOfFunction:
}
ON('>')
{
- m_queued_tokens.enqueue(HTMLToken::make_character('>'));
- SWITCH_TO(ScriptData);
+ SWITCH_TO_AND_EMIT_CHARACTER('>', ScriptData);
}
ON(0)
{
- TODO();
+ PARSE_ERROR();
+ SWITCH_TO_AND_EMIT_CHARACTER(0xFFFD, ScriptDataEscaped);
}
ON_EOF
{
- TODO();
+ PARSE_ERROR();
+ EMIT_EOF;
}
ANYTHING_ELSE
{
- SWITCH_TO_AND_EMIT_CURRENT_TOKEN(ScriptDataEscaped);
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataEscaped);
}
}
END_STATE
@@ -1894,27 +1901,39 @@ _StartOfFunction:
{
ON_WHITESPACE
{
- if (current_end_tag_token_is_appropriate()) {
+ if (current_end_tag_token_is_appropriate())
SWITCH_TO(BeforeAttributeName);
- } else {
- TODO();
+
+ m_queued_tokens.enqueue(HTMLToken::make_character('<'));
+ m_queued_tokens.enqueue(HTMLToken::make_character('/'));
+ for (auto codepoint : m_temporary_buffer) {
+ m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
}
+ RECONSUME_IN(ScriptDataEscaped);
}
ON('/')
{
- if (current_end_tag_token_is_appropriate()) {
+ if (current_end_tag_token_is_appropriate())
SWITCH_TO(SelfClosingStartTag);
- } else {
- TODO();
+
+ m_queued_tokens.enqueue(HTMLToken::make_character('<'));
+ m_queued_tokens.enqueue(HTMLToken::make_character('/'));
+ for (auto codepoint : m_temporary_buffer) {
+ m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
}
+ RECONSUME_IN(ScriptDataEscaped);
}
ON('>')
{
- if (current_end_tag_token_is_appropriate()) {
+ if (current_end_tag_token_is_appropriate())
SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
- } else {
- TODO();
+
+ m_queued_tokens.enqueue(HTMLToken::make_character('<'));
+ m_queued_tokens.enqueue(HTMLToken::make_character('/'));
+ for (auto codepoint : m_temporary_buffer) {
+ m_queued_tokens.enqueue(HTMLToken::make_character(codepoint));
}
+ RECONSUME_IN(ScriptDataEscaped);
}
ON_ASCII_UPPER_ALPHA
{
@@ -1942,7 +1961,204 @@ _StartOfFunction:
BEGIN_STATE(ScriptDataDoubleEscapeStart)
{
- TODO();
+ auto temporary_buffer_equal_to_script = [this]() -> bool {
+ if (m_temporary_buffer.size() != 6)
+ return false;
+
+ // FIXME: Is there a better way of doing this?
+ return m_temporary_buffer[0] == 's' &&
+ m_temporary_buffer[1] == 'c' &&
+ m_temporary_buffer[2] == 'r' &&
+ m_temporary_buffer[3] == 'i' &&
+ m_temporary_buffer[4] == 'p' &&
+ m_temporary_buffer[5] == 't';
+ };
+ ON_WHITESPACE
+ {
+ if (temporary_buffer_equal_to_script())
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataDoubleEscaped);
+ else
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataEscaped);
+ }
+ ON('/')
+ {
+ if (temporary_buffer_equal_to_script())
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataDoubleEscaped);
+ else
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataEscaped);
+ }
+ ON('>')
+ {
+ if (temporary_buffer_equal_to_script())
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataDoubleEscaped);
+ else
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataEscaped);
+ }
+ ON_ASCII_UPPER_ALPHA
+ {
+ m_temporary_buffer.append(tolower(current_input_character.value()));
+ EMIT_CURRENT_CHARACTER;
+ }
+ ON_ASCII_LOWER_ALPHA
+ {
+ m_temporary_buffer.append(current_input_character.value());
+ EMIT_CURRENT_CHARACTER;
+ }
+ ANYTHING_ELSE
+ {
+ RECONSUME_IN(ScriptDataEscaped);
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(ScriptDataDoubleEscaped)
+ {
+ ON('-')
+ {
+ SWITCH_TO_AND_EMIT_CHARACTER('-', ScriptDataDoubleEscapedDash);
+ }
+ ON('<')
+ {
+ SWITCH_TO_AND_EMIT_CHARACTER('<', ScriptDataDoubleEscapedLessThanSign);
+ }
+ ON(0)
+ {
+ PARSE_ERROR();
+ EMIT_CHARACTER(0xFFFD);
+ }
+ ON_EOF
+ {
+ PARSE_ERROR();
+ EMIT_EOF;
+ }
+ ANYTHING_ELSE
+ {
+ EMIT_CURRENT_CHARACTER;
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(ScriptDataDoubleEscapedDash)
+ {
+ ON('-')
+ {
+ SWITCH_TO_AND_EMIT_CHARACTER('-', ScriptDataDoubleEscapedDashDash);
+ }
+ ON('<')
+ {
+ SWITCH_TO_AND_EMIT_CHARACTER('<', ScriptDataDoubleEscapedLessThanSign);
+ }
+ ON(0)
+ {
+ PARSE_ERROR();
+ SWITCH_TO_AND_EMIT_CHARACTER(0xFFFD, ScriptDataDoubleEscaped);
+ }
+ ON_EOF
+ {
+ PARSE_ERROR();
+ EMIT_EOF;
+ }
+ ANYTHING_ELSE
+ {
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataDoubleEscaped);
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(ScriptDataDoubleEscapedDashDash)
+ {
+ ON('-')
+ {
+ EMIT_CHARACTER('-');
+ }
+ ON('<')
+ {
+ SWITCH_TO_AND_EMIT_CHARACTER('<', ScriptDataDoubleEscapedLessThanSign);
+ }
+ ON('>')
+ {
+ SWITCH_TO_AND_EMIT_CHARACTER('>', ScriptData);
+ }
+ ON(0)
+ {
+ PARSE_ERROR();
+ SWITCH_TO_AND_EMIT_CHARACTER(0xFFFD, ScriptDataDoubleEscaped);
+ }
+ ON_EOF
+ {
+ PARSE_ERROR();
+ EMIT_EOF;
+ }
+ ANYTHING_ELSE
+ {
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataDoubleEscaped);
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(ScriptDataDoubleEscapedLessThanSign)
+ {
+ ON('/')
+ {
+ m_temporary_buffer.clear();
+ SWITCH_TO_AND_EMIT_CHARACTER('/', ScriptDataDoubleEscapeEnd);
+ }
+ ANYTHING_ELSE
+ {
+ RECONSUME_IN(ScriptDataDoubleEscaped);
+ }
+ }
+ END_STATE
+
+ BEGIN_STATE(ScriptDataDoubleEscapeEnd)
+ {
+ auto temporary_buffer_equal_to_script = [this]() -> bool {
+ if (m_temporary_buffer.size() != 6)
+ return false;
+
+ // FIXME: Is there a better way of doing this?
+ return m_temporary_buffer[0] == 's' &&
+ m_temporary_buffer[1] == 'c' &&
+ m_temporary_buffer[2] == 'r' &&
+ m_temporary_buffer[3] == 'i' &&
+ m_temporary_buffer[4] == 'p' &&
+ m_temporary_buffer[5] == 't';
+ };
+ ON_WHITESPACE
+ {
+ if (temporary_buffer_equal_to_script())
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataEscaped);
+ else
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataDoubleEscaped);
+ }
+ ON('/')
+ {
+ if (temporary_buffer_equal_to_script())
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataEscaped);
+ else
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataDoubleEscaped);
+ }
+ ON('>')
+ {
+ if (temporary_buffer_equal_to_script())
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataEscaped);
+ else
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataDoubleEscaped);
+ }
+ ON_ASCII_UPPER_ALPHA
+ {
+ m_temporary_buffer.append(tolower(current_input_character.value()));
+ EMIT_CURRENT_CHARACTER;
+ }
+ ON_ASCII_LOWER_ALPHA
+ {
+ m_temporary_buffer.append(current_input_character.value());
+ EMIT_CURRENT_CHARACTER;
+ }
+ ANYTHING_ELSE
+ {
+ RECONSUME_IN(ScriptDataDoubleEscaped);
+ }
}
END_STATE
@@ -1950,7 +2166,7 @@ _StartOfFunction:
{
ON('-')
{
- SWITCH_TO_AND_EMIT_CURRENT_TOKEN(ScriptDataEscapedDashDash);
+ SWITCH_TO_AND_EMIT_CHARACTER('-', ScriptDataEscapedDashDash);
}
ON('<')
{
@@ -1958,15 +2174,17 @@ _StartOfFunction:
}
ON(0)
{
- TODO();
+ PARSE_ERROR();
+ SWITCH_TO_AND_EMIT_CHARACTER(0xFFFD, ScriptDataEscaped);
}
ON_EOF
{
- TODO();
+ PARSE_ERROR();
+ EMIT_EOF;
}
ANYTHING_ELSE
{
- SWITCH_TO_AND_EMIT_CURRENT_TOKEN(ScriptDataEscaped);
+ SWITCH_TO_AND_EMIT_CURRENT_CHARACTER(ScriptDataEscaped);
}
}
END_STATE
@@ -1975,7 +2193,7 @@ _StartOfFunction:
{
ON('-')
{
- SWITCH_TO_AND_EMIT_CURRENT_TOKEN(ScriptDataEscapedDash);
+ SWITCH_TO_AND_EMIT_CHARACTER('-', ScriptDataEscapedDash);
}
ON('<')
{
@@ -1983,11 +2201,13 @@ _StartOfFunction:
}
ON(0)
{
- TODO();
+ PARSE_ERROR();
+ EMIT_CHARACTER(0xFFFD);
}
ON_EOF
{
- TODO();
+ PARSE_ERROR();
+ EMIT_EOF;
}
ANYTHING_ELSE
{
@@ -2005,7 +2225,9 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
- TODO();
+ m_queued_tokens.enqueue(HTMLToken::make_character('<'));
+ m_queued_tokens.enqueue(HTMLToken::make_character('/'));
+ RECONSUME_IN(ScriptData);
}
}
END_STATE