mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 16:18:12 +00:00
LibWeb: Load X(HT)ML documents and transform them into HTML DOM
This commit is contained in:
parent
c1649e3372
commit
5a0123fd2f
9 changed files with 347 additions and 29 deletions
|
@ -354,6 +354,7 @@ set(SOURCES
|
||||||
XHR/EventNames.cpp
|
XHR/EventNames.cpp
|
||||||
XHR/XMLHttpRequest.cpp
|
XHR/XMLHttpRequest.cpp
|
||||||
XHR/XMLHttpRequestEventTarget.cpp
|
XHR/XMLHttpRequestEventTarget.cpp
|
||||||
|
XML/XMLDocumentBuilder.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
set(GENERATED_SOURCES
|
set(GENERATED_SOURCES
|
||||||
|
@ -364,7 +365,7 @@ set(GENERATED_SOURCES
|
||||||
)
|
)
|
||||||
|
|
||||||
serenity_lib(LibWeb web)
|
serenity_lib(LibWeb web)
|
||||||
target_link_libraries(LibWeb LibCore LibJS LibMarkdown LibGemini LibGUI LibGfx LibTextCodec LibProtocol LibImageDecoderClient LibWasm)
|
target_link_libraries(LibWeb LibCore LibJS LibMarkdown LibGemini LibGUI LibGfx LibTextCodec LibProtocol LibImageDecoderClient LibWasm LibXML)
|
||||||
|
|
||||||
function(libweb_js_wrapper class)
|
function(libweb_js_wrapper class)
|
||||||
cmake_parse_arguments(PARSE_ARGV 1 LIBWEB_WRAPPER "ITERABLE" "" "")
|
cmake_parse_arguments(PARSE_ARGV 1 LIBWEB_WRAPPER "ITERABLE" "" "")
|
||||||
|
|
|
@ -7,6 +7,10 @@
|
||||||
|
|
||||||
#pragma once
|
#pragma once
|
||||||
|
|
||||||
|
namespace Web {
|
||||||
|
class XMLDocumentBuilder;
|
||||||
|
}
|
||||||
|
|
||||||
namespace Web::Cookie {
|
namespace Web::Cookie {
|
||||||
struct Cookie;
|
struct Cookie;
|
||||||
struct ParsedCookie;
|
struct ParsedCookie;
|
||||||
|
|
|
@ -7,6 +7,7 @@
|
||||||
#include <LibWeb/Bindings/DOMParserWrapper.h>
|
#include <LibWeb/Bindings/DOMParserWrapper.h>
|
||||||
#include <LibWeb/HTML/DOMParser.h>
|
#include <LibWeb/HTML/DOMParser.h>
|
||||||
#include <LibWeb/HTML/Parser/HTMLParser.h>
|
#include <LibWeb/HTML/Parser/HTMLParser.h>
|
||||||
|
#include <LibWeb/XML/XMLDocumentBuilder.h>
|
||||||
|
|
||||||
namespace Web::HTML {
|
namespace Web::HTML {
|
||||||
|
|
||||||
|
@ -36,16 +37,23 @@ NonnullRefPtr<DOM::Document> DOMParser::parse_from_string(String const& string,
|
||||||
parser->run("about:blank");
|
parser->run("about:blank");
|
||||||
} else {
|
} else {
|
||||||
// -> Otherwise
|
// -> Otherwise
|
||||||
// FIXME: 1. Create an XML parser parse, associated with document, and with XML scripting support disabled.
|
|
||||||
// 2. Parse string using parser.
|
|
||||||
// 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then:
|
|
||||||
// 1. Assert: document has no child nodes.
|
|
||||||
// 2. Let root be the result of creating an element given document, "parsererror", and "http://www.mozilla.org/newlayout/xml/parsererror.xml".
|
|
||||||
// 3. Optionally, add attributes or children to root to describe the nature of the parsing error.
|
|
||||||
// 4. Append root to document.
|
|
||||||
|
|
||||||
dbgln("DOMParser::parse_from_string: Unimplemented parser for type: {}", Bindings::idl_enum_to_string(type));
|
// 1. Create an XML parser parse, associated with document, and with XML scripting support disabled.
|
||||||
TODO();
|
XML::Parser parser(string, { .resolve_external_resource = resolve_xml_resource });
|
||||||
|
XMLDocumentBuilder builder { document, XMLScriptingSupport::Disabled };
|
||||||
|
// 2. Parse string using parser.
|
||||||
|
auto result = parser.parse_with_listener(builder);
|
||||||
|
// 3. If the previous step resulted in an XML well-formedness or XML namespace well-formedness error, then:
|
||||||
|
if (result.is_error() || builder.has_error()) {
|
||||||
|
// NOTE: The XML parsing can produce nodes before it hits an error, just remove them.
|
||||||
|
// 1. Assert: document has no child nodes.
|
||||||
|
document->remove_all_children(true);
|
||||||
|
// 2. Let root be the result of creating an element given document, "parsererror", and "http://www.mozilla.org/newlayout/xml/parsererror.xml".
|
||||||
|
auto root = DOM::create_element(document, "parsererror", "http://www.mozilla.org/newlayout/xml/parsererror.xml");
|
||||||
|
// FIXME: 3. Optionally, add attributes or children to root to describe the nature of the parsing error.
|
||||||
|
// 4. Append root to document.
|
||||||
|
document->append_child(root);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// 3. Return document.
|
// 3. Return document.
|
||||||
|
|
|
@ -26,11 +26,6 @@ HTMLScriptElement::HTMLScriptElement(DOM::Document& document, DOM::QualifiedName
|
||||||
|
|
||||||
HTMLScriptElement::~HTMLScriptElement() = default;
|
HTMLScriptElement::~HTMLScriptElement() = default;
|
||||||
|
|
||||||
void HTMLScriptElement::set_parser_document(Badge<HTMLParser>, DOM::Document& document)
|
|
||||||
{
|
|
||||||
m_parser_document = document;
|
|
||||||
}
|
|
||||||
|
|
||||||
void HTMLScriptElement::begin_delaying_document_load_event(DOM::Document& document)
|
void HTMLScriptElement::begin_delaying_document_load_event(DOM::Document& document)
|
||||||
{
|
{
|
||||||
// https://html.spec.whatwg.org/multipage/scripting.html#concept-script-script
|
// https://html.spec.whatwg.org/multipage/scripting.html#concept-script-script
|
||||||
|
@ -38,11 +33,6 @@ void HTMLScriptElement::begin_delaying_document_load_event(DOM::Document& docume
|
||||||
m_document_load_event_delayer.emplace(document);
|
m_document_load_event_delayer.emplace(document);
|
||||||
}
|
}
|
||||||
|
|
||||||
void HTMLScriptElement::set_non_blocking(Badge<HTMLParser>, bool non_blocking)
|
|
||||||
{
|
|
||||||
m_non_blocking = non_blocking;
|
|
||||||
}
|
|
||||||
|
|
||||||
// https://html.spec.whatwg.org/multipage/scripting.html#execute-the-script-block
|
// https://html.spec.whatwg.org/multipage/scripting.html#execute-the-script-block
|
||||||
void HTMLScriptElement::execute_script()
|
void HTMLScriptElement::execute_script()
|
||||||
{
|
{
|
||||||
|
|
|
@ -24,10 +24,18 @@ public:
|
||||||
bool is_ready_to_be_parser_executed() const { return m_ready_to_be_parser_executed; }
|
bool is_ready_to_be_parser_executed() const { return m_ready_to_be_parser_executed; }
|
||||||
bool failed_to_load() const { return m_failed_to_load; }
|
bool failed_to_load() const { return m_failed_to_load; }
|
||||||
|
|
||||||
void set_parser_document(Badge<HTMLParser>, DOM::Document&);
|
template<OneOf<XMLDocumentBuilder, HTMLParser> T>
|
||||||
void set_non_blocking(Badge<HTMLParser>, bool);
|
void set_parser_document(Badge<T>, DOM::Document& document) { m_parser_document = document; }
|
||||||
void set_already_started(Badge<HTMLParser>, bool b) { m_already_started = b; }
|
|
||||||
void prepare_script(Badge<HTMLParser>) { prepare_script(); }
|
template<OneOf<XMLDocumentBuilder, HTMLParser> T>
|
||||||
|
void set_non_blocking(Badge<T>, bool b) { m_non_blocking = b; }
|
||||||
|
|
||||||
|
template<OneOf<XMLDocumentBuilder, HTMLParser> T>
|
||||||
|
void set_already_started(Badge<T>, bool b) { m_already_started = b; }
|
||||||
|
|
||||||
|
template<OneOf<XMLDocumentBuilder, HTMLParser> T>
|
||||||
|
void prepare_script(Badge<T>) { prepare_script(); }
|
||||||
|
|
||||||
void execute_script();
|
void execute_script();
|
||||||
|
|
||||||
bool is_parser_inserted() const { return !!m_parser_document; }
|
bool is_parser_inserted() const { return !!m_parser_document; }
|
||||||
|
|
|
@ -791,12 +791,12 @@ void HTMLParser::handle_in_head(HTMLToken& token)
|
||||||
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
|
auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
|
||||||
auto element = create_element_for(token, Namespace::HTML, *adjusted_insertion_location.parent);
|
auto element = create_element_for(token, Namespace::HTML, *adjusted_insertion_location.parent);
|
||||||
auto& script_element = verify_cast<HTMLScriptElement>(*element);
|
auto& script_element = verify_cast<HTMLScriptElement>(*element);
|
||||||
script_element.set_parser_document({}, document());
|
script_element.set_parser_document(Badge<HTMLParser> {}, document());
|
||||||
script_element.set_non_blocking({}, false);
|
script_element.set_non_blocking(Badge<HTMLParser> {}, false);
|
||||||
script_element.set_source_line_number({}, token.start_position().line + 1); // FIXME: This +1 is incorrect for script tags whose script does not start on a new line
|
script_element.set_source_line_number({}, token.start_position().line + 1); // FIXME: This +1 is incorrect for script tags whose script does not start on a new line
|
||||||
|
|
||||||
if (m_parsing_fragment) {
|
if (m_parsing_fragment) {
|
||||||
script_element.set_already_started({}, true);
|
script_element.set_already_started(Badge<HTMLParser> {}, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m_invoked_via_document_write) {
|
if (m_invoked_via_document_write) {
|
||||||
|
@ -2223,7 +2223,7 @@ void HTMLParser::handle_text(HTMLToken& token)
|
||||||
if (token.is_end_of_file()) {
|
if (token.is_end_of_file()) {
|
||||||
log_parse_error();
|
log_parse_error();
|
||||||
if (current_node().local_name() == HTML::TagNames::script)
|
if (current_node().local_name() == HTML::TagNames::script)
|
||||||
verify_cast<HTMLScriptElement>(current_node()).set_already_started({}, true);
|
verify_cast<HTMLScriptElement>(current_node()).set_already_started(Badge<HTMLParser> {}, true);
|
||||||
(void)m_stack_of_open_elements.pop();
|
(void)m_stack_of_open_elements.pop();
|
||||||
m_insertion_mode = m_original_insertion_mode;
|
m_insertion_mode = m_original_insertion_mode;
|
||||||
process_using_the_rules_for(m_insertion_mode, token);
|
process_using_the_rules_for(m_insertion_mode, token);
|
||||||
|
@ -2243,7 +2243,7 @@ void HTMLParser::handle_text(HTMLToken& token)
|
||||||
m_tokenizer.update_insertion_point();
|
m_tokenizer.update_insertion_point();
|
||||||
increment_script_nesting_level();
|
increment_script_nesting_level();
|
||||||
// FIXME: Check if active speculative HTML parser is null.
|
// FIXME: Check if active speculative HTML parser is null.
|
||||||
script->prepare_script({});
|
script->prepare_script(Badge<HTMLParser> {});
|
||||||
decrement_script_nesting_level();
|
decrement_script_nesting_level();
|
||||||
if (script_nesting_level() == 0)
|
if (script_nesting_level() == 0)
|
||||||
m_parser_pause_flag = false;
|
m_parser_pause_flag = false;
|
||||||
|
|
|
@ -22,6 +22,7 @@
|
||||||
#include <LibWeb/Loader/FrameLoader.h>
|
#include <LibWeb/Loader/FrameLoader.h>
|
||||||
#include <LibWeb/Loader/ResourceLoader.h>
|
#include <LibWeb/Loader/ResourceLoader.h>
|
||||||
#include <LibWeb/Page/Page.h>
|
#include <LibWeb/Page/Page.h>
|
||||||
|
#include <LibWeb/XML/XMLDocumentBuilder.h>
|
||||||
|
|
||||||
namespace Web {
|
namespace Web {
|
||||||
|
|
||||||
|
@ -119,6 +120,15 @@ static bool build_gemini_document(DOM::Document& document, const ByteBuffer& dat
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static bool build_xml_document(DOM::Document& document, const ByteBuffer& data)
|
||||||
|
{
|
||||||
|
|
||||||
|
XML::Parser parser(data, { .resolve_external_resource = resolve_xml_resource });
|
||||||
|
XMLDocumentBuilder builder { document };
|
||||||
|
auto result = parser.parse_with_listener(builder);
|
||||||
|
return !result.is_error() && !builder.has_error();
|
||||||
|
}
|
||||||
|
|
||||||
bool FrameLoader::parse_document(DOM::Document& document, const ByteBuffer& data)
|
bool FrameLoader::parse_document(DOM::Document& document, const ByteBuffer& data)
|
||||||
{
|
{
|
||||||
auto& mime_type = document.content_type();
|
auto& mime_type = document.content_type();
|
||||||
|
@ -127,6 +137,8 @@ bool FrameLoader::parse_document(DOM::Document& document, const ByteBuffer& data
|
||||||
parser->run(document.url());
|
parser->run(document.url());
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
if (mime_type.ends_with("+xml") || mime_type.is_one_of("text/xml", "application/xml"))
|
||||||
|
return build_xml_document(document, data);
|
||||||
if (mime_type.starts_with("image/"))
|
if (mime_type.starts_with("image/"))
|
||||||
return build_image_document(document, data);
|
return build_image_document(document, data);
|
||||||
if (mime_type == "text/plain" || mime_type == "application/json")
|
if (mime_type == "text/plain" || mime_type == "application/json")
|
||||||
|
|
250
Userland/Libraries/LibWeb/XML/XMLDocumentBuilder.cpp
Normal file
250
Userland/Libraries/LibWeb/XML/XMLDocumentBuilder.cpp
Normal file
File diff suppressed because one or more lines are too long
45
Userland/Libraries/LibWeb/XML/XMLDocumentBuilder.h
Normal file
45
Userland/Libraries/LibWeb/XML/XMLDocumentBuilder.h
Normal file
|
@ -0,0 +1,45 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <LibWeb/DOM/Comment.h>
|
||||||
|
#include <LibWeb/DOM/Document.h>
|
||||||
|
#include <LibWeb/DOM/ElementFactory.h>
|
||||||
|
#include <LibWeb/DOM/Node.h>
|
||||||
|
#include <LibWeb/DOM/Text.h>
|
||||||
|
#include <LibXML/Parser/Parser.h>
|
||||||
|
|
||||||
|
namespace Web {
|
||||||
|
|
||||||
|
enum class XMLScriptingSupport {
|
||||||
|
Disabled,
|
||||||
|
Enabled,
|
||||||
|
};
|
||||||
|
|
||||||
|
ErrorOr<String> resolve_xml_resource(XML::SystemID const&, Optional<XML::PublicID> const&);
|
||||||
|
|
||||||
|
class XMLDocumentBuilder final : public XML::Listener {
|
||||||
|
public:
|
||||||
|
XMLDocumentBuilder(DOM::Document& document, XMLScriptingSupport = XMLScriptingSupport::Enabled);
|
||||||
|
|
||||||
|
bool has_error() const { return m_has_error; }
|
||||||
|
|
||||||
|
private:
|
||||||
|
virtual void element_start(XML::Name const& name, HashMap<XML::Name, String> const& attributes) override;
|
||||||
|
virtual void element_end(XML::Name const& name) override;
|
||||||
|
virtual void text(String const& data) override;
|
||||||
|
virtual void comment(String const& data) override;
|
||||||
|
virtual void document_end() override;
|
||||||
|
|
||||||
|
DOM::Document& m_document;
|
||||||
|
DOM::Node* m_current_node { nullptr };
|
||||||
|
XMLScriptingSupport m_scripting_support { XMLScriptingSupport::Enabled };
|
||||||
|
bool m_has_error { false };
|
||||||
|
StringBuilder text_builder;
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue