1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 01:07:35 +00:00

LibWeb: Make document.write() work while document is parsing

This necessitated making HTMLParser ref-counted, and having it register
itself with Document when created. That makes it possible for scripts to
add new input at the current parser insertion point.

There is now a reference cycle between Document and HTMLParser. This
cycle is explicitly broken by calling Document::detach_parser() at the
end of HTMLParser::run().

This is a huge progression on ACID3, from 31% to 49%! :^)
This commit is contained in:
Andreas Kling 2022-02-21 21:54:21 +01:00
parent bb1f26c149
commit 8b2499b112
7 changed files with 67 additions and 38 deletions

View file

@ -150,10 +150,10 @@ JS_DEFINE_NATIVE_FUNCTION(TestWebGlobalObject::wait_for_page_to_load)
loader.load_sync( loader.load_sync(
request, request,
[&](auto data, auto&, auto) { [&](auto data, auto&, auto) {
Web::HTML::HTMLParser parser(document, data, "utf-8"); auto parser = Web::HTML::HTMLParser::create(document, data, "utf-8");
// Now parse the HTML page. // Now parse the HTML page.
parser.run(next_page_to_load.value()); parser->run(next_page_to_load.value());
g_page_view->set_document(&parser.document()); g_page_view->set_document(&parser->document());
// Note: Unhandled exceptions are just dropped here. // Note: Unhandled exceptions are just dropped here.
// Run the "after" hooks // Run the "after" hooks

View file

@ -256,7 +256,7 @@ ExceptionOr<Document*> Document::open(String const&, String const&)
set_quirks_mode(QuirksMode::No); set_quirks_mode(QuirksMode::No);
// 16. Create a new HTML parser and associate it with document. This is a script-created parser (meaning that it can be closed by the document.open() and document.close() methods, and that the tokenizer will wait for an explicit call to document.close() before emitting an end-of-file token). The encoding confidence is irrelevant. // 16. Create a new HTML parser and associate it with document. This is a script-created parser (meaning that it can be closed by the document.open() and document.close() methods, and that the tokenizer will wait for an explicit call to document.close() before emitting an end-of-file token). The encoding confidence is irrelevant.
m_parser = make<HTML::HTMLParser>(*this); m_parser = HTML::HTMLParser::create_for_scripting(*this);
// 17. Set the insertion point to point at just before the end of the input stream (which at this point will be empty). // 17. Set the insertion point to point at just before the end of the input stream (which at this point will be empty).
m_parser->tokenizer().update_insertion_point(); m_parser->tokenizer().update_insertion_point();
@ -1324,4 +1324,14 @@ bool Document::has_focus() const
return true; return true;
} }
void Document::set_parser(Badge<HTML::HTMLParser>, HTML::HTMLParser& parser)
{
m_parser = parser;
}
void Document::detach_parser(Badge<HTML::HTMLParser>)
{
m_parser = nullptr;
}
} }

View file

@ -313,6 +313,9 @@ public:
bool has_focus() const; bool has_focus() const;
void set_parser(Badge<HTML::HTMLParser>, HTML::HTMLParser&);
void detach_parser(Badge<HTML::HTMLParser>);
private: private:
explicit Document(const AK::URL&); explicit Document(const AK::URL&);
@ -358,7 +361,7 @@ private:
RefPtr<Core::Timer> m_style_update_timer; RefPtr<Core::Timer> m_style_update_timer;
RefPtr<Core::Timer> m_layout_update_timer; RefPtr<Core::Timer> m_layout_update_timer;
OwnPtr<HTML::HTMLParser> m_parser; RefPtr<HTML::HTMLParser> m_parser;
bool m_active_parser_was_aborted { false }; bool m_active_parser_was_aborted { false };
String m_source; String m_source;

View file

@ -34,11 +34,11 @@ NonnullRefPtr<DOM::Document> DOMParser::parse_from_string(String const& string,
// 2. Create an HTML parser parser, associated with document. // 2. Create an HTML parser parser, associated with document.
// 3. Place string into the input stream for parser. The encoding confidence is irrelevant. // 3. Place string into the input stream for parser. The encoding confidence is irrelevant.
// FIXME: We don't have the concept of encoding confidence yet. // FIXME: We don't have the concept of encoding confidence yet.
HTMLParser parser(document, string, "UTF-8"); auto parser = HTMLParser::create(document, string, "UTF-8");
// 4. Start parser and let it run until it has consumed all the characters just inserted into the input stream. // 4. Start parser and let it run until it has consumed all the characters just inserted into the input stream.
// FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL. // FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL.
parser.run("about:blank"); parser->run("about:blank");
} else { } else {
// -> Otherwise // -> Otherwise
// FIXME: 1. Create an XML parser parse, associated with document, and with XML scripting support disabled. // FIXME: 1. Create an XML parser parse, associated with document, and with XML scripting support disabled.

View file

@ -121,8 +121,8 @@ static bool is_html_integration_point(DOM::Element const& element)
RefPtr<DOM::Document> parse_html_document(StringView data, const AK::URL& url, const String& encoding) RefPtr<DOM::Document> parse_html_document(StringView data, const AK::URL& url, const String& encoding)
{ {
auto document = DOM::Document::create(url); auto document = DOM::Document::create(url);
HTMLParser parser(document, data, encoding); auto parser = HTMLParser::create(document, data, encoding);
parser.run(url); parser->run(url);
return document; return document;
} }
@ -131,6 +131,7 @@ HTMLParser::HTMLParser(DOM::Document& document, StringView input, const String&
, m_document(document) , m_document(document)
{ {
m_tokenizer.set_parser({}, *this); m_tokenizer.set_parser({}, *this);
m_document->set_parser({}, *this);
m_document->set_should_invalidate_styles_on_attribute_changes(false); m_document->set_should_invalidate_styles_on_attribute_changes(false);
auto standardized_encoding = TextCodec::get_standardized_encoding(encoding); auto standardized_encoding = TextCodec::get_standardized_encoding(encoding);
VERIFY(standardized_encoding.has_value()); VERIFY(standardized_encoding.has_value());
@ -140,6 +141,7 @@ HTMLParser::HTMLParser(DOM::Document& document, StringView input, const String&
HTMLParser::HTMLParser(DOM::Document& document) HTMLParser::HTMLParser(DOM::Document& document)
: m_document(document) : m_document(document)
{ {
m_document->set_parser({}, *this);
m_tokenizer.set_parser({}, *this); m_tokenizer.set_parser({}, *this);
} }
@ -201,6 +203,7 @@ void HTMLParser::run(const AK::URL& url)
m_document->set_source(m_tokenizer.source()); m_document->set_source(m_tokenizer.source());
run(); run();
the_end(); the_end();
m_document->detach_parser({});
} }
// https://html.spec.whatwg.org/multipage/parsing.html#the-end // https://html.spec.whatwg.org/multipage/parsing.html#the-end
@ -3180,44 +3183,44 @@ DOM::Document& HTMLParser::document()
NonnullRefPtrVector<DOM::Node> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup) NonnullRefPtrVector<DOM::Node> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup)
{ {
auto temp_document = DOM::Document::create(); auto temp_document = DOM::Document::create();
HTMLParser parser(*temp_document, markup, "utf-8"); auto parser = HTMLParser::create(*temp_document, markup, "utf-8");
parser.m_context_element = context_element; parser->m_context_element = context_element;
parser.m_parsing_fragment = true; parser->m_parsing_fragment = true;
parser.document().set_quirks_mode(context_element.document().mode()); parser->document().set_quirks_mode(context_element.document().mode());
if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) { if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA); parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);
} else if (context_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes)) { } else if (context_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes)) {
parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT); parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
} else if (context_element.local_name().is_one_of(HTML::TagNames::script)) { } else if (context_element.local_name().is_one_of(HTML::TagNames::script)) {
parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData); parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData);
} else if (context_element.local_name().is_one_of(HTML::TagNames::noscript)) { } else if (context_element.local_name().is_one_of(HTML::TagNames::noscript)) {
if (context_element.document().is_scripting_enabled()) if (context_element.document().is_scripting_enabled())
parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT); parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
} else if (context_element.local_name().is_one_of(HTML::TagNames::plaintext)) { } else if (context_element.local_name().is_one_of(HTML::TagNames::plaintext)) {
parser.m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT); parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT);
} }
auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML); auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML);
parser.document().append_child(root); parser->document().append_child(root);
parser.m_stack_of_open_elements.push(root); parser->m_stack_of_open_elements.push(root);
if (context_element.local_name() == HTML::TagNames::template_) { if (context_element.local_name() == HTML::TagNames::template_) {
parser.m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate); parser->m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate);
} }
// FIXME: Create a start tag token whose name is the local name of context and whose attributes are the attributes of context. // FIXME: Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
parser.reset_the_insertion_mode_appropriately(); parser->reset_the_insertion_mode_appropriately();
for (auto* form_candidate = &context_element; form_candidate; form_candidate = form_candidate->parent_element()) { for (auto* form_candidate = &context_element; form_candidate; form_candidate = form_candidate->parent_element()) {
if (is<HTMLFormElement>(*form_candidate)) { if (is<HTMLFormElement>(*form_candidate)) {
parser.m_form_element = verify_cast<HTMLFormElement>(*form_candidate); parser->m_form_element = verify_cast<HTMLFormElement>(*form_candidate);
break; break;
} }
} }
parser.run(context_element.document().url()); parser->run(context_element.document().url());
NonnullRefPtrVector<DOM::Node> children; NonnullRefPtrVector<DOM::Node> children;
while (RefPtr<DOM::Node> child = root->first_child()) { while (RefPtr<DOM::Node> child = root->first_child()) {
@ -3228,13 +3231,23 @@ NonnullRefPtrVector<DOM::Node> HTMLParser::parse_html_fragment(DOM::Element& con
return children; return children;
} }
NonnullOwnPtr<HTMLParser> HTMLParser::create_with_uncertain_encoding(DOM::Document& document, const ByteBuffer& input) NonnullRefPtr<HTMLParser> HTMLParser::create_for_scripting(DOM::Document& document)
{
return adopt_ref(*new HTMLParser(document));
}
NonnullRefPtr<HTMLParser> HTMLParser::create_with_uncertain_encoding(DOM::Document& document, const ByteBuffer& input)
{ {
if (document.has_encoding()) if (document.has_encoding())
return make<HTMLParser>(document, input, document.encoding().value()); return adopt_ref(*new HTMLParser(document, input, document.encoding().value()));
auto encoding = run_encoding_sniffing_algorithm(document, input); auto encoding = run_encoding_sniffing_algorithm(document, input);
dbgln("The encoding sniffing algorithm returned encoding '{}'", encoding); dbgln("The encoding sniffing algorithm returned encoding '{}'", encoding);
return make<HTMLParser>(document, input, encoding); return adopt_ref(*new HTMLParser(document, input, encoding));
}
NonnullRefPtr<HTMLParser> HTMLParser::create(DOM::Document& document, StringView input, String const& encoding)
{
return adopt_ref(*new HTMLParser(document, input, encoding));
} }
// https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm // https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-serialisation-algorithm

View file

@ -41,15 +41,15 @@ namespace Web::HTML {
RefPtr<DOM::Document> parse_html_document(StringView, const AK::URL&, const String& encoding); RefPtr<DOM::Document> parse_html_document(StringView, const AK::URL&, const String& encoding);
class HTMLParser { class HTMLParser : public RefCounted<HTMLParser> {
friend class HTMLTokenizer; friend class HTMLTokenizer;
public: public:
HTMLParser(DOM::Document&, StringView input, const String& encoding);
HTMLParser(DOM::Document&);
~HTMLParser(); ~HTMLParser();
static NonnullOwnPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, const ByteBuffer& input); static NonnullRefPtr<HTMLParser> create_for_scripting(DOM::Document&);
static NonnullRefPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input);
static NonnullRefPtr<HTMLParser> create(DOM::Document&, StringView input, String const& encoding);
void run(); void run();
void run(const AK::URL&); void run(const AK::URL&);
@ -76,6 +76,9 @@ public:
size_t script_nesting_level() const { return m_script_nesting_level; } size_t script_nesting_level() const { return m_script_nesting_level; }
private: private:
HTMLParser(DOM::Document&, StringView input, const String& encoding);
HTMLParser(DOM::Document&);
const char* insertion_mode_name() const; const char* insertion_mode_name() const;
DOM::QuirksMode which_quirks_mode(const HTMLToken&) const; DOM::QuirksMode which_quirks_mode(const HTMLToken&) const;

View file

@ -46,8 +46,8 @@ static bool build_markdown_document(DOM::Document& document, const ByteBuffer& d
if (!markdown_document) if (!markdown_document)
return false; return false;
HTML::HTMLParser parser(document, markdown_document->render_to_html(), "utf-8"); auto parser = HTML::HTMLParser::create(document, markdown_document->render_to_html(), "utf-8");
parser.run(document.url()); parser->run(document.url());
return true; return true;
} }
@ -116,8 +116,8 @@ static bool build_gemini_document(DOM::Document& document, const ByteBuffer& dat
dbgln_if(GEMINI_DEBUG, "Gemini data:\n\"\"\"{}\"\"\"", gemini_data); dbgln_if(GEMINI_DEBUG, "Gemini data:\n\"\"\"{}\"\"\"", gemini_data);
dbgln_if(GEMINI_DEBUG, "Converted to HTML:\n\"\"\"{}\"\"\"", html_data); dbgln_if(GEMINI_DEBUG, "Converted to HTML:\n\"\"\"{}\"\"\"", html_data);
HTML::HTMLParser parser(document, html_data, "utf-8"); auto parser = HTML::HTMLParser::create(document, html_data, "utf-8");
parser.run(document.url()); parser->run(document.url());
return true; return true;
} }
@ -226,9 +226,9 @@ bool FrameLoader::load(const AK::URL& url, Type type)
void FrameLoader::load_html(StringView html, const AK::URL& url) void FrameLoader::load_html(StringView html, const AK::URL& url)
{ {
auto document = DOM::Document::create(url); auto document = DOM::Document::create(url);
HTML::HTMLParser parser(document, html, "utf-8"); auto parser = HTML::HTMLParser::create(document, html, "utf-8");
parser.run(url); parser->run(url);
browsing_context().set_active_document(&parser.document()); browsing_context().set_active_document(&parser->document());
} }
// FIXME: Use an actual templating engine (our own one when it's built, preferably // FIXME: Use an actual templating engine (our own one when it's built, preferably