1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 21:07:35 +00:00

LibHTML: Add Comment and CharacterData nodes and improve HTML parsing

This patch adds the CharacterData subclass of Node, which is now the
parent class of Text and a new Comment class.

A Comment node is one of these in HTML: <!--hello friends-->
Since these occur somewhat frequently on the web, we need to be able
to parse them.

This patch also adds a child rejection mechanism to the DOM tree.
Nodes can now override is_child_allowed(Node) and return false if they
don't want a particular Node to become a child of theirs. This is used
to prevent Document from taking on unwanted children.
This commit is contained in:
Andreas Kling 2019-10-12 23:26:47 +02:00
parent 6d150df58a
commit b083a233d8
15 changed files with 158 additions and 25 deletions

View file

@ -0,0 +1,11 @@
#include <LibHTML/DOM/CharacterData.h>
CharacterData::CharacterData(Document& document, NodeType type, const String& data)
: Node(document, type)
, m_data(data)
{
}
CharacterData::~CharacterData()
{
}

View file

@ -0,0 +1,25 @@
#pragma once
#include <AK/String.h>
#include <LibHTML/DOM/Node.h>
class CharacterData : public Node {
public:
virtual ~CharacterData() override;
const String& data() const { return m_data; }
virtual String text_content() const override { return m_data; }
protected:
explicit CharacterData(Document&, NodeType, const String&);
private:
String m_data;
};
template<>
inline bool is<CharacterData>(const Node& node)
{
return node.is_character_data();
}

View file

@ -0,0 +1,11 @@
#include <LibHTML/DOM/Comment.h>
#include <LibHTML/Layout/LayoutText.h>
Comment::Comment(Document& document, const String& data)
: CharacterData(document, NodeType::COMMENT_NODE, data)
{
}
Comment::~Comment()
{
}

View file

@ -0,0 +1,18 @@
#pragma once
#include <AK/String.h>
#include <LibHTML/DOM/CharacterData.h>
class Comment final : public CharacterData {
public:
explicit Comment(Document&, const String&);
virtual ~Comment() override;
virtual String tag_name() const override { return "#comment"; }
};
template<>
inline bool is<Comment>(const Node& node)
{
return node.is_comment();
}

View file

@ -29,6 +29,23 @@ StyleResolver& Document::style_resolver()
return *m_style_resolver;
}
bool Document::is_child_allowed(const Node& node) const
{
switch (node.type()) {
case NodeType::DOCUMENT_NODE:
case NodeType::TEXT_NODE:
return false;
case NodeType::COMMENT_NODE:
return true;
case NodeType::DOCUMENT_TYPE_NODE:
return !first_child_of_type<DocumentType>();
case NodeType::ELEMENT_NODE:
return !first_child_of_type<Element>();
default:
return false;
}
}
void Document::fixup()
{
if (!is<DocumentType>(first_child()))

View file

@ -67,6 +67,8 @@ public:
void invalidate_layout();
Function<void()> on_invalidate_layout;
virtual bool is_child_allowed(const Node&) const override;
private:
virtual RefPtr<LayoutNode> create_layout_node(const StyleResolver&, const StyleProperties* parent_style) const override;

View file

@ -7,7 +7,7 @@ public:
explicit DocumentType(Document&);
virtual ~DocumentType() override;
virtual String tag_name() const override { return "!DOCTYPE"; }
virtual String tag_name() const override { return "#doctype"; }
};
template<>

View file

@ -10,6 +10,7 @@ enum class NodeType : unsigned {
INVALID = 0,
ELEMENT_NODE = 1,
TEXT_NODE = 3,
COMMENT_NODE = 8,
DOCUMENT_NODE = 9,
DOCUMENT_TYPE_NODE = 10,
};
@ -32,6 +33,8 @@ public:
bool is_text() const { return type() == NodeType::TEXT_NODE; }
bool is_document() const { return type() == NodeType::DOCUMENT_NODE; }
bool is_document_type() const { return type() == NodeType::DOCUMENT_TYPE_NODE; }
bool is_comment() const { return type() == NodeType::COMMENT_NODE; }
bool is_character_data() const { return type() == NodeType::TEXT_NODE || type() == NodeType::COMMENT_NODE; }
bool is_parent_node() const { return is_element() || is_document(); }
virtual RefPtr<LayoutNode> create_layout_node(const StyleResolver&, const StyleProperties* parent_style) const;
@ -66,6 +69,8 @@ public:
const Element* previous_element_sibling() const;
const Element* next_element_sibling() const;
virtual bool is_child_allowed(const Node&) const { return true; }
protected:
Node(Document&, NodeType);

View file

@ -2,8 +2,7 @@
#include <LibHTML/Layout/LayoutText.h>
Text::Text(Document& document, const String& data)
: Node(document, NodeType::TEXT_NODE)
, m_data(data)
: CharacterData(document, NodeType::TEXT_NODE, data)
{
}

View file

@ -1,23 +1,17 @@
#pragma once
#include <AK/String.h>
#include <LibHTML/DOM/Node.h>
#include <LibHTML/DOM/CharacterData.h>
class Text final : public Node {
class Text final : public CharacterData {
public:
explicit Text(Document&, const String&);
virtual ~Text() override;
const String& data() const { return m_data; }
virtual String tag_name() const override { return "#text"; }
virtual String text_content() const override { return m_data; }
private:
virtual RefPtr<LayoutNode> create_layout_node(const StyleResolver&, const StyleProperties* parent_style) const override;
String m_data;
};
template<>