From c33d17d363cc88bc6193f88c1b94eadc043fcaac Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Tue, 23 Jun 2020 16:19:07 +0200 Subject: [PATCH] LibWeb: Fix tokenization of attributes with URL query strings in them was being tokenized into . The spec mentions this but I had overlooked it. The bug happens because we interpreted the "&" as a named character reference. --- Libraries/LibWeb/Parser/HTMLTokenizer.cpp | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp index 1a3bdc5ac3..eebf3e2601 100644 --- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp +++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp @@ -1458,6 +1458,14 @@ _StartOfFunction: for (auto ch : match.value().entity) m_temporary_buffer.append(ch); + if (consumed_as_part_of_an_attribute() && match.value().codepoints.last() != ';') { + auto next = peek_codepoint(0); + if (next.has_value() && (next.value() == '=' || isalnum(next.value()))) { + FLUSH_CODEPOINTS_CONSUMED_AS_A_CHARACTER_REFERENCE; + SWITCH_TO_RETURN_STATE; + } + } + if (consumed_as_part_of_an_attribute() && match.value().entity.ends_with(';')) { auto next_codepoint = peek_codepoint(0); if (next_codepoint.has_value() && next_codepoint.value() == '=') {