diff --git a/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
index 70bf9920de..1bf1dab3c3 100644
--- a/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
+++ b/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
@@ -71,11 +71,13 @@ namespace Web::HTML {
goto _StartOfFunction; \
} while (0)
-#define RECONSUME_IN_RETURN_STATE \
- do { \
- will_reconsume_in(m_return_state); \
- m_state = m_return_state; \
- goto _StartOfFunction; \
+#define RECONSUME_IN_RETURN_STATE \
+ do { \
+ will_reconsume_in(m_return_state); \
+ m_state = m_return_state; \
+ if (current_input_character.has_value()) \
+ m_utf8_iterator = m_prev_utf8_iterator; \
+ goto _StartOfFunction; \
} while (0)
#define SWITCH_TO_AND_EMIT_CURRENT_TOKEN(new_state) \
@@ -1525,6 +1527,7 @@ _StartOfFunction:
}
ANYTHING_ELSE
{
+ FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
RECONSUME_IN_RETURN_STATE;
}
}
@@ -1544,17 +1547,9 @@ _StartOfFunction:
for (auto ch : match.value().entity)
m_temporary_buffer.append(ch);
- if (consumed_as_part_of_an_attribute() && match.value().code_points.last() != ';') {
- auto next = peek_code_point(0);
- if (next.has_value() && (next.value() == '=' || isalnum(next.value()))) {
- FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
- SWITCH_TO_RETURN_STATE;
- }
- }
-
- if (consumed_as_part_of_an_attribute() && match.value().entity.ends_with(';')) {
+ if (consumed_as_part_of_an_attribute() && !match.value().entity.ends_with(';')) {
auto next_code_point = peek_code_point(0);
- if (next_code_point.has_value() && next_code_point.value() == '=') {
+ if (next_code_point.has_value() && (next_code_point.value() == '=' || isalnum(next_code_point.value()))) {
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
SWITCH_TO_RETURN_STATE;
}
@@ -1571,7 +1566,9 @@ _StartOfFunction:
SWITCH_TO_RETURN_STATE;
} else {
FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE;
- SWITCH_TO(AmbiguousAmpersand);
+ // FIXME: This should be SWITCH_TO, but we always lose the first character on this path, so just reconsume it.
+ // I can't wrap my head around how to do it as the spec says.
+ RECONSUME_IN(AmbiguousAmpersand);
}
}
END_STATE