mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 04:27:45 +00:00
LibXML: Add a fairly basic XML parser
Currently this can parse XML and resolve external resources/references, and read a DTD (but not apply or verify its rules). That's good enough for _most_ XHTML documents as the HTML 5 spec enforces its own rules about document well-formedness, and does not make use of XML DTDs (aside from a list of predefined entities). An accompanying `xml` utility is provided that can read and dump XML documents, and can also run the XML conformance test suite.
This commit is contained in:
parent
06cedf5bae
commit
67357fe984
15 changed files with 2895 additions and 0 deletions
53
Userland/Libraries/LibXML/DOM/Document.h
Normal file
53
Userland/Libraries/LibXML/DOM/Document.h
Normal file
|
@ -0,0 +1,53 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/HashMap.h>
|
||||
#include <AK/NonnullOwnPtr.h>
|
||||
#include <LibXML/DOM/DocumentTypeDeclaration.h>
|
||||
#include <LibXML/DOM/Node.h>
|
||||
#include <LibXML/Forward.h>
|
||||
|
||||
namespace XML {
|
||||
|
||||
enum class Version {
|
||||
Version10,
|
||||
Version11,
|
||||
};
|
||||
|
||||
struct Doctype {
|
||||
String type;
|
||||
Vector<MarkupDeclaration> markup_declarations;
|
||||
Optional<ExternalID> external_id;
|
||||
};
|
||||
|
||||
class Document {
|
||||
public:
|
||||
explicit Document(NonnullOwnPtr<Node> root, Optional<Doctype> doctype, HashMap<Name, String> processing_instructions, Version version)
|
||||
: m_root(move(root))
|
||||
, m_processing_instructions(move(processing_instructions))
|
||||
, m_version(version)
|
||||
, m_explicit_doctype(move(doctype))
|
||||
{
|
||||
}
|
||||
|
||||
Node& root() { return *m_root; }
|
||||
Node const& root() const { return *m_root; }
|
||||
|
||||
HashMap<Name, String> const& processing_instructions() const { return m_processing_instructions; }
|
||||
|
||||
Version version() const { return m_version; }
|
||||
|
||||
Optional<Doctype> const& doctype() const { return m_explicit_doctype; }
|
||||
|
||||
private:
|
||||
NonnullOwnPtr<Node> m_root;
|
||||
HashMap<Name, String> m_processing_instructions;
|
||||
Version m_version;
|
||||
Optional<Doctype> m_explicit_doctype;
|
||||
};
|
||||
}
|
138
Userland/Libraries/LibXML/DOM/DocumentTypeDeclaration.h
Normal file
138
Userland/Libraries/LibXML/DOM/DocumentTypeDeclaration.h
Normal file
|
@ -0,0 +1,138 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/HashTable.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Variant.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibXML/FundamentalTypes.h>
|
||||
|
||||
namespace XML {
|
||||
|
||||
struct ElementDeclaration {
|
||||
struct Empty {
|
||||
};
|
||||
struct Any {
|
||||
};
|
||||
struct Mixed {
|
||||
HashTable<Name> types;
|
||||
bool many;
|
||||
};
|
||||
struct Children {
|
||||
struct Entry;
|
||||
enum class Qualifier {
|
||||
ExactlyOnce,
|
||||
Optional,
|
||||
Any,
|
||||
OneOrMore,
|
||||
};
|
||||
|
||||
struct Choice {
|
||||
Vector<Entry> entries;
|
||||
Qualifier qualifier;
|
||||
};
|
||||
struct Sequence {
|
||||
Vector<Entry> entries;
|
||||
Qualifier qualifier;
|
||||
};
|
||||
|
||||
struct Entry {
|
||||
Variant<Name, Choice, Sequence> sub_entries;
|
||||
Qualifier qualifier;
|
||||
};
|
||||
|
||||
Variant<Choice, Sequence> contents;
|
||||
Qualifier qualifier;
|
||||
};
|
||||
using ContentSpec = Variant<Empty, Any, Mixed, Children>;
|
||||
|
||||
Name type;
|
||||
ContentSpec content_spec;
|
||||
};
|
||||
|
||||
struct AttributeListDeclaration {
|
||||
enum class StringType {
|
||||
CData,
|
||||
};
|
||||
enum class TokenizedType {
|
||||
ID,
|
||||
IDRef,
|
||||
IDRefs,
|
||||
Entity,
|
||||
Entities,
|
||||
NMToken,
|
||||
NMTokens,
|
||||
};
|
||||
struct NotationType {
|
||||
HashTable<Name> names;
|
||||
};
|
||||
struct Enumeration {
|
||||
// FIXME: NMToken
|
||||
HashTable<String> tokens;
|
||||
};
|
||||
using Type = Variant<StringType, TokenizedType, NotationType, Enumeration>;
|
||||
|
||||
struct Required {
|
||||
};
|
||||
struct Implied {
|
||||
};
|
||||
struct Fixed {
|
||||
String value;
|
||||
};
|
||||
struct DefaultValue {
|
||||
String value;
|
||||
};
|
||||
|
||||
using Default = Variant<Required, Implied, Fixed, DefaultValue>;
|
||||
|
||||
struct Definition {
|
||||
Name name;
|
||||
Type type;
|
||||
Default default_;
|
||||
};
|
||||
Name type;
|
||||
Vector<Definition> attributes;
|
||||
};
|
||||
|
||||
struct PublicID {
|
||||
String public_literal;
|
||||
};
|
||||
|
||||
struct SystemID {
|
||||
String system_literal;
|
||||
};
|
||||
|
||||
struct ExternalID {
|
||||
Optional<PublicID> public_id;
|
||||
SystemID system_id;
|
||||
};
|
||||
|
||||
struct EntityDefinition {
|
||||
ExternalID id;
|
||||
Optional<Name> notation;
|
||||
};
|
||||
|
||||
struct GEDeclaration {
|
||||
Name name;
|
||||
Variant<String, EntityDefinition> definition;
|
||||
};
|
||||
|
||||
struct PEDeclaration {
|
||||
Name name;
|
||||
Variant<String, ExternalID> definition;
|
||||
};
|
||||
|
||||
using EntityDeclaration = Variant<GEDeclaration, PEDeclaration>;
|
||||
|
||||
struct NotationDeclaration {
|
||||
Name name;
|
||||
Variant<ExternalID, PublicID> notation;
|
||||
};
|
||||
|
||||
using MarkupDeclaration = Variant<ElementDeclaration, AttributeListDeclaration, EntityDeclaration, NotationDeclaration>;
|
||||
}
|
54
Userland/Libraries/LibXML/DOM/Node.cpp
Normal file
54
Userland/Libraries/LibXML/DOM/Node.cpp
Normal file
|
@ -0,0 +1,54 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include <AK/HashMap.h>
|
||||
#include <LibXML/DOM/Node.h>
|
||||
|
||||
namespace XML {
|
||||
|
||||
bool Node::operator==(Node const& other) const
|
||||
{
|
||||
return content.visit(
|
||||
[&](Text const& text) -> bool {
|
||||
auto other_text = other.content.get_pointer<Text>();
|
||||
if (!other_text)
|
||||
return false;
|
||||
return text.builder.string_view() == other_text->builder.string_view();
|
||||
},
|
||||
[&](Comment const& comment) -> bool {
|
||||
auto other_comment = other.content.get_pointer<Comment>();
|
||||
if (!other_comment)
|
||||
return false;
|
||||
return comment.text == other_comment->text;
|
||||
},
|
||||
[&](Element const& element) -> bool {
|
||||
auto other_element = other.content.get_pointer<Element>();
|
||||
if (!other_element)
|
||||
return false;
|
||||
if (element.name != other_element->name)
|
||||
return false;
|
||||
if (element.attributes.size() != other_element->attributes.size())
|
||||
return false;
|
||||
|
||||
for (auto& entry : element.attributes) {
|
||||
auto it = other_element->attributes.find(entry.key);
|
||||
if (it == other_element->attributes.end())
|
||||
return false;
|
||||
if (it->value != entry.value)
|
||||
return false;
|
||||
}
|
||||
|
||||
if (element.children.size() != other_element->children.size())
|
||||
return false;
|
||||
for (size_t i = 0; i < element.children.size(); ++i) {
|
||||
if (element.children[i] != other_element->children[i])
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
}
|
40
Userland/Libraries/LibXML/DOM/Node.h
Normal file
40
Userland/Libraries/LibXML/DOM/Node.h
Normal file
|
@ -0,0 +1,40 @@
|
|||
/*
|
||||
* Copyright (c) 2022, Ali Mohammad Pur <mpfard@serenityos.org>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/NonnullOwnPtrVector.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/Variant.h>
|
||||
#include <AK/Vector.h>
|
||||
#include <LibXML/FundamentalTypes.h>
|
||||
|
||||
namespace XML {
|
||||
|
||||
struct Attribute {
|
||||
Name name;
|
||||
String value;
|
||||
};
|
||||
|
||||
struct Node {
|
||||
struct Text {
|
||||
StringBuilder builder;
|
||||
};
|
||||
struct Comment {
|
||||
String text;
|
||||
};
|
||||
struct Element {
|
||||
Name name;
|
||||
HashMap<Name, String> attributes;
|
||||
NonnullOwnPtrVector<Node> children;
|
||||
};
|
||||
|
||||
bool operator==(Node const&) const;
|
||||
|
||||
Variant<Text, Comment, Element> content;
|
||||
Node* parent { nullptr };
|
||||
};
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue