From 932161e5817561028bdb5685182ad3f78db5e1b5 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Fri, 4 Jun 2021 11:31:43 +0200 Subject: [PATCH] LibWeb: Be more forgiving when adding source positions in HTMLTokenizer This patch changes HTMLTokenizer::nth_last_position to not fail if the requested position is not available. Rather, it will just return (0-0). While this is not the correct solution, it prevents the tokenizer from crashing just because it cannot find a source position. This should only affect SyntaxHighlighter. --- Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp | 8 +++++--- .../Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp | 11 ++++++++++- Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h | 2 +- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp index fe2ae1f057..7a81844fd2 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.cpp @@ -57,9 +57,11 @@ String HTMLToken::to_string() const builder.append("' }"); } - builder.appendff("@{}:{}-{}:{}", - m_start_position.line, m_start_position.column, - m_end_position.line, m_end_position.column); + if (type() == HTMLToken::Type::Character) { + builder.appendff("@{}:{}", m_start_position.line, m_start_position.column); + } else { + builder.appendff("@{}:{}-{}:{}", m_start_position.line, m_start_position.column, m_end_position.line, m_end_position.column); + } return builder.to_string(); } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 638ba37bea..7f9d17873c 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -207,6 +207,15 @@ Optional HTMLTokenizer::peek_code_point(size_t offset) const return *it; } +HTMLToken::Position HTMLTokenizer::nth_last_position(size_t n) +{ + if (n + 1 > m_source_positions.size()) { + dbgln_if(TOKENIZER_TRACE_DEBUG, "(Tokenizer::nth_last_position) Invalid position requested: {}th-last of {}. Returning (0-0).", n, m_source_positions.size()); + return HTMLToken::Position { 0, 0 }; + }; + return m_source_positions.at(m_source_positions.size() - 1 - n); +} + Optional HTMLTokenizer::next_token() { { @@ -2639,7 +2648,7 @@ void HTMLTokenizer::will_emit(HTMLToken& token) { if (token.is_start_tag()) m_last_emitted_start_tag = token; - token.m_end_position = m_source_positions.last(); + token.m_end_position = nth_last_position(0); } bool HTMLTokenizer::current_end_tag_token_is_appropriate() const diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h index 0024bcb08f..284b83e34b 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h @@ -147,7 +147,7 @@ private: bool consumed_as_part_of_an_attribute() const; void restore_to(const Utf8CodePointIterator& new_iterator); - auto& nth_last_position(size_t n = 0) { return m_source_positions.at(m_source_positions.size() - 1 - n); } + HTMLToken::Position nth_last_position(size_t n = 0); State m_state { State::Data }; State m_return_state { State::Data };