1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-05-31 16:18:12 +00:00

LibWeb: Load X(HT)ML documents and transform them into HTML DOM

This commit is contained in:
Ali Mohammad Pur 2022-03-28 16:25:17 +04:30 committed by Andreas Kling
parent c1649e3372
commit 5a0123fd2f
9 changed files with 347 additions and 29 deletions

View file

@ -354,6 +354,7 @@ set(SOURCES
XHR/EventNames.cpp XHR/EventNames.cpp
XHR/XMLHttpRequest.cpp XHR/XMLHttpRequest.cpp
XHR/XMLHttpRequestEventTarget.cpp XHR/XMLHttpRequestEventTarget.cpp
XML/XMLDocumentBuilder.cpp
) )
set(GENERATED_SOURCES set(GENERATED_SOURCES
@ -364,7 +365,7 @@ set(GENERATED_SOURCES
) )
serenity_lib(LibWeb web) serenity_lib(LibWeb web)
target_link_libraries(LibWeb LibCore LibJS LibMarkdown LibGemini LibGUI LibGfx LibTextCodec LibProtocol LibImageDecoderClient LibWasm) target_link_libraries(LibWeb LibCore LibJS LibMarkdown LibGemini LibGUI LibGfx LibTextCodec LibProtocol LibImageDecoderClient LibWasm LibXML)
function(libweb_js_wrapper class) function(libweb_js_wrapper class)
cmake_parse_arguments(PARSE_ARGV 1 LIBWEB_WRAPPER "ITERABLE" "" "") cmake_parse_arguments(PARSE_ARGV 1 LIBWEB_WRAPPER "ITERABLE" "" "")

View file

@ -7,6 +7,10 @@
#pragma once #pragma once
namespace Web {
class XMLDocumentBuilder;
}
namespace Web::Cookie { namespace Web::Cookie {
struct Cookie; struct Cookie;
struct ParsedCookie; struct ParsedCookie;

View file

@ -7,6 +7,7 @@
#include <LibWeb/Bindings/DOMParserWrapper.h> #include <LibWeb/Bindings/DOMParserWrapper.h>
#include <LibWeb/HTML/DOMParser.h> #include <LibWeb/HTML/DOMParser.h>
#include <LibWeb/HTML/Parser/HTMLParser.h> #include <LibWeb/HTML/Parser/HTMLParser.h>
#include <LibWeb/XML/XMLDocumentBuilder.h>
namespace Web::HTML { namespace Web::HTML {
@ -36,16 +37,23 @@ NonnullRefPtr<DOM::Document> DOMParser::parse_from_string(String const& string,
parser->run("about:blank"); parser->run("about:blank");
} else { } else {
// -> Otherwise // -> Otherwise
// FIXME: 1. Create an XML parser parse, associated with document, and with XML scripting support disabled.
// 2. Parse string using parser.
// 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then:
// 1. Assert: document has no child nodes.
// 2. Let root be the result of creating an element given document, "parsererror", and "http://www.mozilla.org/newlayout/xml/parsererror.xml".
// 3. Optionally, add attributes or children to root to describe the nature of the parsing error.
// 4. Append root to document.
dbgln("DOMParser::parse_from_string: Unimplemented parser for type: {}", Bindings::idl_enum_to_string(type)); // 1. Create an XML parser parse, associated with document, and with XML scripting support disabled.
TODO(); XML::Parser parser(string, { .resolve_external_resource = resolve_xml_resource });
XMLDocumentBuilder builder { document, XMLScriptingSupport::Disabled };
// 2. Parse string using parser.
auto result = parser.parse_with_listener(builder);
// 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then:
if (result.is_error() || builder.has_error()) {
// NOTE: The XML parsing can produce nodes before it hits an error, just remove them.
// 1. Assert: document has no child nodes.
document->remove_all_children(true);
// 2. Let root be the result of creating an element given document, "parsererror", and "http://www.mozilla.org/newlayout/xml/parsererror.xml".
auto root = DOM::create_element(document, "parsererror", "http://www.mozilla.org/newlayout/xml/parsererror.xml");
// FIXME: 3. Optionally, add attributes or children to root to describe the nature of the parsing error.
// 4. Append root to document.
document->append_child(root);
}
} }
// 3. Return document. // 3. Return document.

View file

@ -26,11 +26,6 @@ HTMLScriptElement::HTMLScriptElement(DOM::Document& document, DOM::QualifiedName
HTMLScriptElement::~HTMLScriptElement() = default; HTMLScriptElement::~HTMLScriptElement() = default;
void HTMLScriptElement::set_parser_document(Badge<HTMLParser>, DOM::Document& document)
{
m_parser_document = document;
}
void HTMLScriptElement::begin_delaying_document_load_event(DOM::Document& document) void HTMLScriptElement::begin_delaying_document_load_event(DOM::Document& document)
{ {
// https://html.spec.whatwg.org/multipage/scripting.html#concept-script-script // https://html.spec.whatwg.org/multipage/scripting.html#concept-script-script
@ -38,11 +33,6 @@ void HTMLScriptElement::begin_delaying_document_load_event(DOM::Document& docume
m_document_load_event_delayer.emplace(document); m_document_load_event_delayer.emplace(document);
} }
void HTMLScriptElement::set_non_blocking(Badge<HTMLParser>, bool non_blocking)
{
m_non_blocking = non_blocking;
}
// https://html.spec.whatwg.org/multipage/scripting.html#execute-the-script-block // https://html.spec.whatwg.org/multipage/scripting.html#execute-the-script-block
void HTMLScriptElement::execute_script() void HTMLScriptElement::execute_script()
{ {

View file

@ -24,10 +24,18 @@ public:
bool is_ready_to_be_parser_executed() const { return m_ready_to_be_parser_executed; } bool is_ready_to_be_parser_executed() const { return m_ready_to_be_parser_executed; }
bool failed_to_load() const { return m_failed_to_load; } bool failed_to_load() const { return m_failed_to_load; }
void set_parser_document(Badge<HTMLParser>, DOM::Document&); template<OneOf<XMLDocumentBuilder, HTMLParser> T>
void set_non_blocking(Badge<HTMLParser>, bool); void set_parser_document(Badge<T>, DOM::Document& document) { m_parser_document = document; }
void set_already_started(Badge<HTMLParser>, bool b) { m_already_started = b; }
void prepare_script(Badge<HTMLParser>) { prepare_script(); } template<OneOf<XMLDocumentBuilder, HTMLParser> T>
void set_non_blocking(Badge<T>, bool b) { m_non_blocking = b; }
template<OneOf<XMLDocumentBuilder, HTMLParser> T>
void set_already_started(Badge<T>, bool b) { m_already_started = b; }
template<OneOf<XMLDocumentBuilder, HTMLParser> T>
void prepare_script(Badge<T>) { prepare_script(); }
void execute_script(); void execute_script();
bool is_parser_inserted() const { return !!m_parser_document; } bool is_parser_inserted() const { return !!m_parser_document; }

View file

@ -791,12 +791,12 @@ void HTMLParser::handle_in_head(HTMLToken& token)
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node(); auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
auto element = create_element_for(token, Namespace::HTML, *adjusted_insertion_location.parent); auto element = create_element_for(token, Namespace::HTML, *adjusted_insertion_location.parent);
auto& script_element = verify_cast<HTMLScriptElement>(*element); auto& script_element = verify_cast<HTMLScriptElement>(*element);
script_element.set_parser_document({}, document()); script_element.set_parser_document(Badge<HTMLParser> {}, document());
script_element.set_non_blocking({}, false); script_element.set_non_blocking(Badge<HTMLParser> {}, false);
script_element.set_source_line_number({}, token.start_position().line + 1); // FIXME: This +1 is incorrect for script tags whose script does not start on a new line script_element.set_source_line_number({}, token.start_position().line + 1); // FIXME: This +1 is incorrect for script tags whose script does not start on a new line
if (m_parsing_fragment) { if (m_parsing_fragment) {
script_element.set_already_started({}, true); script_element.set_already_started(Badge<HTMLParser> {}, true);
} }
if (m_invoked_via_document_write) { if (m_invoked_via_document_write) {
@ -2223,7 +2223,7 @@ void HTMLParser::handle_text(HTMLToken& token)
if (token.is_end_of_file()) { if (token.is_end_of_file()) {
log_parse_error(); log_parse_error();
if (current_node().local_name() == HTML::TagNames::script) if (current_node().local_name() == HTML::TagNames::script)
verify_cast<HTMLScriptElement>(current_node()).set_already_started({}, true); verify_cast<HTMLScriptElement>(current_node()).set_already_started(Badge<HTMLParser> {}, true);
(void)m_stack_of_open_elements.pop(); (void)m_stack_of_open_elements.pop();
m_insertion_mode = m_original_insertion_mode; m_insertion_mode = m_original_insertion_mode;
process_using_the_rules_for(m_insertion_mode, token); process_using_the_rules_for(m_insertion_mode, token);
@ -2243,7 +2243,7 @@ void HTMLParser::handle_text(HTMLToken& token)
m_tokenizer.update_insertion_point(); m_tokenizer.update_insertion_point();
increment_script_nesting_level(); increment_script_nesting_level();
// FIXME: Check if active speculative HTML parser is null. // FIXME: Check if active speculative HTML parser is null.
script->prepare_script({}); script->prepare_script(Badge<HTMLParser> {});
decrement_script_nesting_level(); decrement_script_nesting_level();
if (script_nesting_level() == 0) if (script_nesting_level() == 0)
m_parser_pause_flag = false; m_parser_pause_flag = false;

View file

@ -22,6 +22,7 @@
#include <LibWeb/Loader/FrameLoader.h> #include <LibWeb/Loader/FrameLoader.h>
#include <LibWeb/Loader/ResourceLoader.h> #include <LibWeb/Loader/ResourceLoader.h>
#include <LibWeb/Page/Page.h> #include <LibWeb/Page/Page.h>
#include <LibWeb/XML/XMLDocumentBuilder.h>
namespace Web { namespace Web {
@ -119,6 +120,15 @@ static bool build_gemini_document(DOM::Document& document, const ByteBuffer& dat
return true; return true;
} }
static bool build_xml_document(DOM::Document& document, const ByteBuffer& data)
{
XML::Parser parser(data, { .resolve_external_resource = resolve_xml_resource });
XMLDocumentBuilder builder { document };
auto result = parser.parse_with_listener(builder);
return !result.is_error() && !builder.has_error();
}
bool FrameLoader::parse_document(DOM::Document& document, const ByteBuffer& data) bool FrameLoader::parse_document(DOM::Document& document, const ByteBuffer& data)
{ {
auto& mime_type = document.content_type(); auto& mime_type = document.content_type();
@ -127,6 +137,8 @@ bool FrameLoader::parse_document(DOM::Document& document, const ByteBuffer& data
parser->run(document.url()); parser->run(document.url());
return true; return true;
} }
if (mime_type.ends_with("+xml") || mime_type.is_one_of("text/xml", "application/xml"))
return build_xml_document(document, data);
if (mime_type.starts_with("image/")) if (mime_type.starts_with("image/"))
return build_image_document(document, data); return build_image_document(document, data);
if (mime_type == "text/plain" || mime_type == "application/json") if (mime_type == "text/plain" || mime_type == "application/json")

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,45 @@
/*
* Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <LibWeb/DOM/Comment.h>
#include <LibWeb/DOM/Document.h>
#include <LibWeb/DOM/ElementFactory.h>
#include <LibWeb/DOM/Node.h>
#include <LibWeb/DOM/Text.h>
#include <LibXML/Parser/Parser.h>
namespace Web {
enum class XMLScriptingSupport {
Disabled,
Enabled,
};
ErrorOr<String> resolve_xml_resource(XML::SystemID const&, Optional<XML::PublicID> const&);
class XMLDocumentBuilder final : public XML::Listener {
public:
XMLDocumentBuilder(DOM::Document& document, XMLScriptingSupport = XMLScriptingSupport::Enabled);
bool has_error() const { return m_has_error; }
private:
virtual void element_start(XML::Name const& name, HashMap<XML::Name, String> const& attributes) override;
virtual void element_end(XML::Name const& name) override;
virtual void text(String const& data) override;
virtual void comment(String const& data) override;
virtual void document_end() override;
DOM::Document& m_document;
DOM::Node* m_current_node { nullptr };
XMLScriptingSupport m_scripting_support { XMLScriptingSupport::Enabled };
bool m_has_error { false };
StringBuilder text_builder;
};
}