From c249fbd17c1fad27d6532842b965e5ef658b8469 Mon Sep 17 00:00:00 2001 From: Sam Atkins Date: Fri, 9 Jul 2021 19:48:51 +0100 Subject: [PATCH] LibWeb: Correct escape handling in CSS Tokenizer Calling is_valid_escape_sequence() with no arguments hides what it is operating on, so I have removed that, so that you must explicitly tell it what you are testing. The call from consume_a_token() was using the wrong tokens, so it returned false incorrectly. This was resulting in corrupted output when faced with this code from Acid2. (Abbreviated) ```css .parser { error: \}; } .parser { } ``` --- Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp | 11 +++-------- Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h | 1 - 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp index c01dd8bad7..5746278890 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.cpp @@ -504,7 +504,7 @@ Token Tokenizer::consume_a_url_token() } if (is_reverse_solidus(input)) { - if (is_valid_escape_sequence()) { + if (is_valid_escape_sequence(peek_twin())) { token.m_value.append_code_point(consume_escaped_code_point()); } else { log_parse_error(); @@ -534,7 +534,7 @@ void Tokenizer::consume_the_remnants_of_a_bad_url() return; } - if (is_valid_escape_sequence()) { + if (is_valid_escape_sequence(peek_twin())) { [[maybe_unused]] auto cp = consume_escaped_code_point(); } @@ -601,11 +601,6 @@ bool Tokenizer::starts_with_a_number(U32Triplet values) return false; } -bool Tokenizer::is_valid_escape_sequence() -{ - return is_valid_escape_sequence(peek_twin()); -} - bool Tokenizer::is_valid_escape_sequence(U32Twin values) { if (!is_reverse_solidus(values.first)) { @@ -864,7 +859,7 @@ Token Tokenizer::consume_a_token() if (is_reverse_solidus(input)) { dbgln_if(CSS_TOKENIZER_TRACE, "is reverse solidus"); - if (is_valid_escape_sequence()) { + if (is_valid_escape_sequence({ input, peek_code_point() })) { reconsume_current_input_code_point(); return consume_an_ident_like_token(); } diff --git a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h index 41872a64af..f05b8e267e 100644 --- a/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h +++ b/Userland/Libraries/LibWeb/CSS/Parser/Tokenizer.h @@ -92,7 +92,6 @@ private: void consume_the_remnants_of_a_bad_url(); void consume_comments(); void reconsume_current_input_code_point(); - [[nodiscard]] bool is_valid_escape_sequence(); [[nodiscard]] static bool is_valid_escape_sequence(U32Twin); [[nodiscard]] bool would_start_an_identifier(); [[nodiscard]] bool would_start_an_identifier(U32Triplet);