From b193351a998dab06228bf6cb8c2b0828704839c1 Mon Sep 17 00:00:00 2001 From: MacDue Date: Sun, 13 Feb 2022 14:08:53 +0000 Subject: [PATCH] LibWeb: Fix off-by-one in HTMLTokenizer::restore_to() The difference should be between m_utf8_iterator and the the new position, if m_prev_utf8_iterator is used one fewer source position is popped than required. This issue was not apparent on most pages since restore_to used for tokens such that are normally followed by a newline that resets the column to zero, but it can be seen on pages with minified HTML. --- .../LibWeb/HTML/Parser/HTMLTokenizer.cpp | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 2a30d5acec..0770b47c99 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -2726,15 +2726,13 @@ bool HTMLTokenizer::consumed_as_part_of_an_attribute() const void HTMLTokenizer::restore_to(Utf8CodePointIterator const& new_iterator) { - if (new_iterator != m_prev_utf8_iterator) { - auto diff = m_prev_utf8_iterator - new_iterator; - if (diff > 0) { - for (ssize_t i = 0; i < diff; ++i) - m_source_positions.take_last(); - } else { - // Going forwards...? - TODO(); - } + auto diff = m_utf8_iterator - new_iterator; + if (diff > 0) { + for (ssize_t i = 0; i < diff; ++i) + m_source_positions.take_last(); + } else { + // Going forwards...? + TODO(); } m_utf8_iterator = new_iterator; }