From c6fcdd0f93cd8c49a6024a65d808b69b701e7ecc Mon Sep 17 00:00:00 2001 From: Adam Hodgen Date: Fri, 18 Feb 2022 22:12:47 +0000 Subject: [PATCH] LibWeb: Fix off by one error in HTML Tokenizer In 'NamedCharacterReference' we attempt to lookup the code point by a identifier, eg apos; becomes ' This is done by passing the entire rest of the document to the `HTML::code_points_from_entity` function. However, before this change we didn't sent the final character which meant if the document ended in a named character reference the lookup would fail. --- Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 6fad045f43..48190ad95f 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -1617,7 +1617,7 @@ _StartOfFunction: { size_t byte_offset = m_utf8_view.byte_offset_of(m_prev_utf8_iterator); - auto match = HTML::code_points_from_entity(m_decoded_input.substring_view(byte_offset, m_decoded_input.length() - byte_offset - 1)); + auto match = HTML::code_points_from_entity(m_decoded_input.substring_view(byte_offset, m_decoded_input.length() - byte_offset)); if (match.has_value()) { skip(match->entity.length() - 1);