From d7578ddebb5bfc9237703be88cc2501f319ed47e Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Fri, 10 Sep 2021 23:18:00 +0200 Subject: [PATCH] LibJS: Share "parsed identifiers" between copied JS::Lexer instances When we save/load state in the parser, we preserve the lexer state by simply making a copy of it. This was made extremely heavy by the lexer keeping a cache of all parsed identifiers. It keeps the cache to ensure that StringViews into parsed Unicode escape sequences don't become dangling views when the Token goes out of scope. This patch solves the problem by replacing the Vector which was used to cache the identifiers with a ref-counted HashTable instead. Since the purpose of the cache is just to keep FlyStrings alive, it's fine for all Lexer instances to share the cache. And as a bonus, using a HashTable instead of a Vector replaces the O(n) accesses with O(1) ones. This makes a 1.9 MiB JavaScript file parse in 0.6s instead of 24s. :^) --- Userland/Libraries/LibJS/Lexer.cpp | 4 ++-- Userland/Libraries/LibJS/Lexer.h | 10 +++++++--- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/Userland/Libraries/LibJS/Lexer.cpp b/Userland/Libraries/LibJS/Lexer.cpp index 340eca7ec5..cd4dc8a415 100644 --- a/Userland/Libraries/LibJS/Lexer.cpp +++ b/Userland/Libraries/LibJS/Lexer.cpp @@ -27,6 +27,7 @@ Lexer::Lexer(StringView source, StringView filename, size_t line_number, size_t , m_filename(filename) , m_line_number(line_number) , m_line_column(line_column) + , m_parsed_identifiers(adopt_ref(*new ParsedIdentifiers)) { if (s_keywords.is_empty()) { s_keywords.set("await", TokenType::Await); @@ -602,8 +603,7 @@ Token Lexer::next() } while (code_point.has_value()); identifier = builder.build(); - if (!m_parsed_identifiers.contains_slow(*identifier)) - m_parsed_identifiers.append(*identifier); + m_parsed_identifiers->identifiers.set(*identifier); auto it = s_keywords.find(identifier->hash(), [&](auto& entry) { return entry.key == identifier; }); if (it == s_keywords.end()) diff --git a/Userland/Libraries/LibJS/Lexer.h b/Userland/Libraries/LibJS/Lexer.h index 03991f596d..1ecbac223b 100644 --- a/Userland/Libraries/LibJS/Lexer.h +++ b/Userland/Libraries/LibJS/Lexer.h @@ -82,9 +82,13 @@ private: static HashMap s_two_char_tokens; static HashMap s_single_char_tokens; - // Resolved identifiers must be kept alive for the duration of the parsing stage, otherwise - // the only references to these strings are deleted by the Token destructor. - Vector m_parsed_identifiers; + struct ParsedIdentifiers : public RefCounted { + // Resolved identifiers must be kept alive for the duration of the parsing stage, otherwise + // the only references to these strings are deleted by the Token destructor. + HashTable identifiers; + }; + + RefPtr m_parsed_identifiers; }; }