From f7ad8c0f94af4d42e669d8f3fa9af51a46e30cbb Mon Sep 17 00:00:00 2001 From: Luke Date: Mon, 5 Jul 2021 05:20:31 +0100 Subject: [PATCH] LibWeb: Add DOMParser This allows you to invoke the HTML document parser and retrieve a document as though it was loaded as a web page, minus any scripting ability. This does not currently support XML parsing. This is used by YouTube (or more accurately, Web Components Polyfills) to polyfill templates. --- .../LibWeb/Bindings/WindowObjectHelper.h | 3 ++ Userland/Libraries/LibWeb/CMakeLists.txt | 2 + Userland/Libraries/LibWeb/Forward.h | 2 + Userland/Libraries/LibWeb/HTML/DOMParser.cpp | 41 +++++++++++++++++++ Userland/Libraries/LibWeb/HTML/DOMParser.h | 39 ++++++++++++++++++ Userland/Libraries/LibWeb/HTML/DOMParser.idl | 6 +++ 6 files changed, 93 insertions(+) create mode 100644 Userland/Libraries/LibWeb/HTML/DOMParser.cpp create mode 100644 Userland/Libraries/LibWeb/HTML/DOMParser.h create mode 100644 Userland/Libraries/LibWeb/HTML/DOMParser.idl diff --git a/Userland/Libraries/LibWeb/Bindings/WindowObjectHelper.h b/Userland/Libraries/LibWeb/Bindings/WindowObjectHelper.h index 0909b0f6e8..cdc01843be 100644 --- a/Userland/Libraries/LibWeb/Bindings/WindowObjectHelper.h +++ b/Userland/Libraries/LibWeb/Bindings/WindowObjectHelper.h @@ -22,6 +22,8 @@ #include #include #include +#include +#include #include #include #include @@ -247,6 +249,7 @@ ADD_WINDOW_OBJECT_INTERFACE(DocumentType) \ ADD_WINDOW_OBJECT_INTERFACE(DOMException) \ ADD_WINDOW_OBJECT_INTERFACE(DOMImplementation) \ + ADD_WINDOW_OBJECT_INTERFACE(DOMParser) \ ADD_WINDOW_OBJECT_INTERFACE(Element) \ ADD_WINDOW_OBJECT_INTERFACE(Event) \ ADD_WINDOW_OBJECT_INTERFACE(EventTarget) \ diff --git a/Userland/Libraries/LibWeb/CMakeLists.txt b/Userland/Libraries/LibWeb/CMakeLists.txt index 03b0cfac54..8a1cabc134 100644 --- a/Userland/Libraries/LibWeb/CMakeLists.txt +++ b/Userland/Libraries/LibWeb/CMakeLists.txt @@ -70,6 +70,7 @@ set(SOURCES HTML/AttributeNames.cpp HTML/BrowsingContextContainer.cpp HTML/CanvasRenderingContext2D.cpp + HTML/DOMParser.cpp HTML/EventNames.cpp HTML/FormAssociatedElement.cpp HTML/GlobalEventHandlers.cpp @@ -331,6 +332,7 @@ libweb_js_wrapper(DOM/Range) libweb_js_wrapper(DOM/Text) libweb_js_wrapper(HTML/CanvasRenderingContext2D) libweb_js_wrapper(HTML/CloseEvent) +libweb_js_wrapper(HTML/DOMParser) libweb_js_wrapper(HTML/HTMLAnchorElement) libweb_js_wrapper(HTML/HTMLAreaElement) libweb_js_wrapper(HTML/HTMLAudioElement) diff --git a/Userland/Libraries/LibWeb/Forward.h b/Userland/Libraries/LibWeb/Forward.h index 186e0d2b17..d82e0d8c22 100644 --- a/Userland/Libraries/LibWeb/Forward.h +++ b/Userland/Libraries/LibWeb/Forward.h @@ -59,6 +59,7 @@ enum class QuirksMode; namespace Web::HTML { class CanvasRenderingContext2D; class CloseEvent; +class DOMParser; class EventHandler; class HTMLAnchorElement; class HTMLAreaElement; @@ -211,6 +212,7 @@ class DocumentTypeWrapper; class DocumentWrapper; class DOMExceptionWrapper; class DOMImplementationWrapper; +class DOMParserWrapper; class ElementWrapper; class EventListenerWrapper; class EventTargetWrapper; diff --git a/Userland/Libraries/LibWeb/HTML/DOMParser.cpp b/Userland/Libraries/LibWeb/HTML/DOMParser.cpp new file mode 100644 index 0000000000..65657721f7 --- /dev/null +++ b/Userland/Libraries/LibWeb/HTML/DOMParser.cpp @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2021, Luke Wilde + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#include +#include + +namespace Web::HTML { + +DOMParser::DOMParser() +{ +} + +DOMParser::~DOMParser() +{ +} + +// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#dom-domparser-parsefromstring +NonnullRefPtr DOMParser::parse_from_string(String const& string, String const& type) +{ + // FIXME: Pass in this's relevant global object's associated Document's URL. + auto document = DOM::Document::create(); + document->set_content_type(type); + + // NOTE: This isn't a case insensitive match since the DOMParserSupportedType enum enforces an all lowercase type. + if (type == "text/html") { + // FIXME: Set document's type to "html". + HTMLDocumentParser parser(document, string, "UTF-8"); + // FIXME: This is to match the default URL. Instead, pass in this's relevant global object's associated Document's URL. + parser.run("about:blank"); + } else { + dbgln("DOMParser::parse_from_string: Unimplemented parser for type: {}", type); + TODO(); + } + + return document; +} + +} diff --git a/Userland/Libraries/LibWeb/HTML/DOMParser.h b/Userland/Libraries/LibWeb/HTML/DOMParser.h new file mode 100644 index 0000000000..9746eb21c2 --- /dev/null +++ b/Userland/Libraries/LibWeb/HTML/DOMParser.h @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2021, Luke Wilde + * + * SPDX-License-Identifier: BSD-2-Clause + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace Web::HTML { + +// https://html.spec.whatwg.org/multipage/dynamic-markup-insertion.html#domparser +class DOMParser final + : public RefCounted + , public Weakable + , public Bindings::Wrappable { +public: + using WrapperType = Bindings::DOMParserWrapper; + + static DOM::ExceptionOr> create_with_global_object(Bindings::WindowObject&) + { + return adopt_ref(*new DOMParser()); + } + + virtual ~DOMParser() override; + + NonnullRefPtr parse_from_string(String const&, String const&); + +private: + DOMParser(); +}; + +} diff --git a/Userland/Libraries/LibWeb/HTML/DOMParser.idl b/Userland/Libraries/LibWeb/HTML/DOMParser.idl new file mode 100644 index 0000000000..276076359f --- /dev/null +++ b/Userland/Libraries/LibWeb/HTML/DOMParser.idl @@ -0,0 +1,6 @@ +interface DOMParser { + constructor(); + + // FIXME: "type" should use the DOMParserSupportedType enum. + Document parseFromString(DOMString string, DOMString type); +};