From 6e0f80fbe0f9ca5d411de4887d548c17cea11097 Mon Sep 17 00:00:00 2001 From: Andreas Kling Date: Mon, 17 Oct 2022 10:46:11 +0200 Subject: [PATCH] LibWeb: Make the HTMLParser GC-allocated This prevents a reference cycle between a HTMLParser opened via document.open() and the document. It was one of many things keeping some documents alive indefinitely. --- Userland/Libraries/LibWeb/DOM/Document.cpp | 4 ++- Userland/Libraries/LibWeb/DOM/Document.h | 4 +-- .../LibWeb/HTML/Parser/HTMLParser.cpp | 29 ++++++++++++++----- .../Libraries/LibWeb/HTML/Parser/HTMLParser.h | 25 +++++++++------- .../Parser/ListOfActiveFormattingElements.cpp | 14 ++++++--- .../Parser/ListOfActiveFormattingElements.h | 6 ++-- .../HTML/Parser/StackOfOpenElements.cpp | 12 ++++++-- .../LibWeb/HTML/Parser/StackOfOpenElements.h | 12 ++++---- 8 files changed, 71 insertions(+), 35 deletions(-) diff --git a/Userland/Libraries/LibWeb/DOM/Document.cpp b/Userland/Libraries/LibWeb/DOM/Document.cpp index 2e1338b71c..54f6156796 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.cpp +++ b/Userland/Libraries/LibWeb/DOM/Document.cpp @@ -340,6 +340,8 @@ void Document::visit_edges(Cell::Visitor& visitor) visitor.visit(m_all); visitor.visit(m_selection); + visitor.visit(m_parser); + for (auto& script : m_scripts_to_execute_when_parsing_has_finished) visitor.visit(script.ptr()); for (auto& script : m_scripts_to_execute_as_soon_as_possible) @@ -2149,7 +2151,7 @@ void Document::abort() } // https://html.spec.whatwg.org/multipage/dom.html#active-parser -RefPtr Document::active_parser() +JS::GCPtr Document::active_parser() { if (!m_parser) return nullptr; diff --git a/Userland/Libraries/LibWeb/DOM/Document.h b/Userland/Libraries/LibWeb/DOM/Document.h index c0b136b364..c8417fd514 100644 --- a/Userland/Libraries/LibWeb/DOM/Document.h +++ b/Userland/Libraries/LibWeb/DOM/Document.h @@ -428,7 +428,7 @@ public: void unload(bool recursive_flag = false, Optional = {}); // https://html.spec.whatwg.org/multipage/dom.html#active-parser - RefPtr active_parser(); + JS::GCPtr active_parser(); // https://html.spec.whatwg.org/multipage/dom.html#load-timing-info DocumentLoadTimingInfo& load_timing_info() { return m_load_timing_info; } @@ -478,7 +478,7 @@ private: RefPtr m_style_update_timer; RefPtr m_layout_update_timer; - RefPtr m_parser; + JS::GCPtr m_parser; bool m_active_parser_was_aborted { false }; String m_source; diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp index 24f7e5478e..1348665048 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, Andreas Kling + * Copyright (c) 2020-2022, Andreas Kling * Copyright (c) 2021, Luke Wilde * * SPDX-License-Identifier: BSD-2-Clause @@ -147,6 +147,19 @@ HTMLParser::~HTMLParser() m_document->set_should_invalidate_styles_on_attribute_changes(true); } +void HTMLParser::visit_edges(Cell::Visitor& visitor) +{ + Base::visit_edges(visitor); + visitor.visit(m_document); + visitor.visit(m_head_element); + visitor.visit(m_form_element); + visitor.visit(m_context_element); + visitor.visit(m_character_insertion_node); + + m_stack_of_open_elements.visit_edges(visitor); + m_list_of_active_formatting_elements.visit_edges(visitor); +} + void HTMLParser::run() { for (;;) { @@ -3426,23 +3439,23 @@ Vector> HTMLParser::parse_html_fragment(DOM::Element& cont return children; } -NonnullRefPtr HTMLParser::create_for_scripting(DOM::Document& document) +JS::NonnullGCPtr HTMLParser::create_for_scripting(DOM::Document& document) { - return adopt_ref(*new HTMLParser(document)); + return *document.heap().allocate_without_realm(document); } -NonnullRefPtr HTMLParser::create_with_uncertain_encoding(DOM::Document& document, ByteBuffer const& input) +JS::NonnullGCPtr HTMLParser::create_with_uncertain_encoding(DOM::Document& document, ByteBuffer const& input) { if (document.has_encoding()) - return adopt_ref(*new HTMLParser(document, input, document.encoding().value())); + return *document.heap().allocate_without_realm(document, input, document.encoding().value()); auto encoding = run_encoding_sniffing_algorithm(document, input); dbgln_if(HTML_PARSER_DEBUG, "The encoding sniffing algorithm returned encoding '{}'", encoding); - return adopt_ref(*new HTMLParser(document, input, encoding)); + return *document.heap().allocate_without_realm(document, input, encoding); } -NonnullRefPtr HTMLParser::create(DOM::Document& document, StringView input, String const& encoding) +JS::NonnullGCPtr HTMLParser::create(DOM::Document& document, StringView input, String const& encoding) { - return adopt_ref(*new HTMLParser(document, input, encoding)); + return *document.heap().allocate_without_realm(document, input, encoding); } // https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h index fe80150197..72f1a3337a 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h @@ -1,11 +1,12 @@ /* - * Copyright (c) 2020, Andreas Kling + * Copyright (c) 2020-2022, Andreas Kling * * SPDX-License-Identifier: BSD-2-Clause */ #pragma once +#include #include #include #include @@ -38,15 +39,17 @@ namespace Web::HTML { __ENUMERATE_INSERTION_MODE(AfterAfterBody) \ __ENUMERATE_INSERTION_MODE(AfterAfterFrameset) -class HTMLParser : public RefCounted { +class HTMLParser final : public JS::Cell { + JS_CELL(HTMLParser, JS::Cell); + friend class HTMLTokenizer; public: ~HTMLParser(); - static NonnullRefPtr create_for_scripting(DOM::Document&); - static NonnullRefPtr create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input); - static NonnullRefPtr create(DOM::Document&, StringView input, String const& encoding); + static JS::NonnullGCPtr create_for_scripting(DOM::Document&); + static JS::NonnullGCPtr create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input); + static JS::NonnullGCPtr create(DOM::Document&, StringView input, String const& encoding); void run(); void run(const AK::URL&); @@ -80,6 +83,8 @@ private: HTMLParser(DOM::Document&, StringView input, String const& encoding); HTMLParser(DOM::Document&); + virtual void visit_edges(Cell::Visitor&) override; + char const* insertion_mode_name() const; DOM::QuirksMode which_quirks_mode(HTMLToken const&) const; @@ -182,14 +187,14 @@ private: JS::Realm& realm(); - JS::Handle m_document; - JS::Handle m_head_element; - JS::Handle m_form_element; - JS::Handle m_context_element; + JS::GCPtr m_document; + JS::GCPtr m_head_element; + JS::GCPtr m_form_element; + JS::GCPtr m_context_element; Vector m_pending_table_character_tokens; - JS::Handle m_character_insertion_node; + JS::GCPtr m_character_insertion_node; StringBuilder m_character_insertion_builder; }; diff --git a/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.cpp b/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.cpp index e17a6abb48..91d842f7c5 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Andreas Kling + * Copyright (c) 2020-2022, Andreas Kling * * SPDX-License-Identifier: BSD-2-Clause */ @@ -11,15 +11,21 @@ namespace Web::HTML { ListOfActiveFormattingElements::~ListOfActiveFormattingElements() = default; +void ListOfActiveFormattingElements::visit_edges(JS::Cell::Visitor& visitor) +{ + for (auto& entry : m_entries) + visitor.visit(entry.element); +} + void ListOfActiveFormattingElements::add(DOM::Element& element) { // FIXME: Implement the Noah's Ark clause https://html.spec.whatwg.org/multipage/parsing.html#push-onto-the-list-of-active-formatting-elements - m_entries.append({ JS::make_handle(element) }); + m_entries.append({ element }); } void ListOfActiveFormattingElements::add_marker() { - m_entries.append({ JS::make_handle(nullptr) }); + m_entries.append({ nullptr }); } bool ListOfActiveFormattingElements::contains(const DOM::Element& element) const @@ -78,7 +84,7 @@ void ListOfActiveFormattingElements::replace(DOM::Element& to_remove, DOM::Eleme void ListOfActiveFormattingElements::insert_at(size_t index, DOM::Element& element) { - m_entries.insert(index, { JS::make_handle(element) }); + m_entries.insert(index, { element }); } } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.h b/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.h index 167e93a347..661a6094d5 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/ListOfActiveFormattingElements.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Andreas Kling + * Copyright (c) 2020-2022, Andreas Kling * * SPDX-License-Identifier: BSD-2-Clause */ @@ -20,7 +20,7 @@ public: struct Entry { bool is_marker() const { return !element; } - JS::Handle element; + JS::GCPtr element; }; bool is_empty() const { return m_entries.is_empty(); } @@ -43,6 +43,8 @@ public: Optional find_index(DOM::Element const&) const; + void visit_edges(JS::Cell::Visitor&); + private: Vector m_entries; }; diff --git a/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp b/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp index 091eb8cdc7..ab9906c262 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020, Andreas Kling + * Copyright (c) 2020-2022, Andreas Kling * * SPDX-License-Identifier: BSD-2-Clause */ @@ -14,6 +14,12 @@ static Vector s_base_list { "applet", "caption", "html", "table", "td StackOfOpenElements::~StackOfOpenElements() = default; +void StackOfOpenElements::visit_edges(JS::Cell::Visitor& visitor) +{ + for (auto& element : m_elements) + visitor.visit(element); +} + bool StackOfOpenElements::has_in_scope_impl(FlyString const& tag_name, Vector const& list) const { for (auto const& element : m_elements.in_reverse()) { @@ -162,7 +168,7 @@ void StackOfOpenElements::replace(DOM::Element const& to_remove, JS::NonnullGCPt for (size_t i = 0; i < m_elements.size(); i++) { if (m_elements[i].ptr() == &to_remove) { m_elements.remove(i); - m_elements.insert(i, JS::make_handle(*to_add)); + m_elements.insert(i, to_add); break; } } @@ -172,7 +178,7 @@ void StackOfOpenElements::insert_immediately_below(JS::NonnullGCPtr + * Copyright (c) 2020-2022, Andreas Kling * * SPDX-License-Identifier: BSD-2-Clause */ @@ -27,7 +27,7 @@ public: DOM::Element& last() { return *m_elements.last(); } bool is_empty() const { return m_elements.is_empty(); } - void push(JS::NonnullGCPtr element) { m_elements.append(JS::make_handle(*element)); } + void push(JS::NonnullGCPtr element) { m_elements.append(element); } JS::NonnullGCPtr pop() { return *m_elements.take_last(); } void remove(DOM::Element const& element); void replace(DOM::Element const& to_remove, JS::NonnullGCPtr to_add); @@ -47,8 +47,8 @@ public: bool contains(const DOM::Element&) const; bool contains(FlyString const& tag_name) const; - Vector> const& elements() const { return m_elements; } - Vector>& elements() { return m_elements; } + auto const& elements() const { return m_elements; } + auto& elements() { return m_elements; } void pop_until_an_element_with_tag_name_has_been_popped(FlyString const&); @@ -61,11 +61,13 @@ public: LastElementResult last_element_with_tag_name(FlyString const&); JS::GCPtr element_immediately_above(DOM::Element const&); + void visit_edges(JS::Cell::Visitor&); + private: bool has_in_scope_impl(FlyString const& tag_name, Vector const&) const; bool has_in_scope_impl(const DOM::Element& target_node, Vector const&) const; - Vector> m_elements; + Vector> m_elements; }; }