From c79e8aab0a18f5df0d99563bd15220e6663b375f Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Sat, 5 Nov 2022 00:20:15 +0100 Subject: [PATCH] LibWeb: Make ON_WHITESPACE less heavy in HTML tokenizer Once we know that the current code point is an ASCII character, we can just check if it's one of the HTML whitespace characters. Before this patch, we were using the generic StringView::contains(u32) path that splats a code point into a StringBuilder and then searches for it with memmem(). This reduces time spent in the HTML tokenizer from 16% to 6% when loading the ECMA-262 spec. --- Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index 5905561f9e..de1aaf9fc4 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -121,7 +121,7 @@ namespace Web::HTML { if (current_input_character.has_value() && is_ascii_hex_digit(current_input_character.value())) #define ON_WHITESPACE \ - if (current_input_character.has_value() && is_ascii(current_input_character.value()) && "\t\n\f "sv.contains(current_input_character.value())) + if (current_input_character.has_value() && is_ascii(*current_input_character) && first_is_one_of(static_cast(*current_input_character), '\t', '\n', '\f', ' ')) #define ANYTHING_ELSE if (1)