mirror of
https://github.com/RGBCube/serenity
synced 2025-07-25 19:27:44 +00:00
LibWeb: Move the HTML parser into HTML/Parser/
This commit is contained in:
parent
a296020e03
commit
cc4109c03b
18 changed files with 32 additions and 42 deletions
|
@ -37,7 +37,7 @@
|
|||
#include <LibWeb/Layout/LayoutWidget.h>
|
||||
#include <LibWeb/Loader/ResourceLoader.h>
|
||||
#include <LibWeb/PageView.h>
|
||||
#include <LibWeb/Parser/HTMLDocumentParser.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLDocumentParser.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
|
|
2302
Libraries/LibWeb/HTML/Parser/Entities.cpp
Normal file
2302
Libraries/LibWeb/HTML/Parser/Entities.cpp
Normal file
File diff suppressed because it is too large
Load diff
43
Libraries/LibWeb/HTML/Parser/Entities.h
Normal file
43
Libraries/LibWeb/HTML/Parser/Entities.h
Normal file
|
@ -0,0 +1,43 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace Web {
|
||||
namespace HTML {
|
||||
|
||||
struct EntityMatch {
|
||||
Vector<u32, 2> codepoints;
|
||||
StringView entity;
|
||||
};
|
||||
|
||||
Optional<EntityMatch> codepoints_from_entity(const StringView&);
|
||||
|
||||
}
|
||||
}
|
2802
Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp
Normal file
2802
Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp
Normal file
File diff suppressed because it is too large
Load diff
189
Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.h
Normal file
189
Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.h
Normal file
|
@ -0,0 +1,189 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/NonnullRefPtrVector.h>
|
||||
#include <LibWeb/DOM/Node.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
|
||||
#include <LibWeb/HTML/Parser/ListOfActiveFormattingElements.h>
|
||||
#include <LibWeb/HTML/Parser/StackOfOpenElements.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
#define ENUMERATE_INSERTION_MODES \
|
||||
__ENUMERATE_INSERTION_MODE(Initial) \
|
||||
__ENUMERATE_INSERTION_MODE(BeforeHTML) \
|
||||
__ENUMERATE_INSERTION_MODE(BeforeHead) \
|
||||
__ENUMERATE_INSERTION_MODE(InHead) \
|
||||
__ENUMERATE_INSERTION_MODE(InHeadNoscript) \
|
||||
__ENUMERATE_INSERTION_MODE(AfterHead) \
|
||||
__ENUMERATE_INSERTION_MODE(InBody) \
|
||||
__ENUMERATE_INSERTION_MODE(Text) \
|
||||
__ENUMERATE_INSERTION_MODE(InTable) \
|
||||
__ENUMERATE_INSERTION_MODE(InTableText) \
|
||||
__ENUMERATE_INSERTION_MODE(InCaption) \
|
||||
__ENUMERATE_INSERTION_MODE(InColumnGroup) \
|
||||
__ENUMERATE_INSERTION_MODE(InTableBody) \
|
||||
__ENUMERATE_INSERTION_MODE(InRow) \
|
||||
__ENUMERATE_INSERTION_MODE(InCell) \
|
||||
__ENUMERATE_INSERTION_MODE(InSelect) \
|
||||
__ENUMERATE_INSERTION_MODE(InSelectInTable) \
|
||||
__ENUMERATE_INSERTION_MODE(InTemplate) \
|
||||
__ENUMERATE_INSERTION_MODE(AfterBody) \
|
||||
__ENUMERATE_INSERTION_MODE(InFrameset) \
|
||||
__ENUMERATE_INSERTION_MODE(AfterFrameset) \
|
||||
__ENUMERATE_INSERTION_MODE(AfterAfterBody) \
|
||||
__ENUMERATE_INSERTION_MODE(AfterAfterFrameset)
|
||||
|
||||
RefPtr<DOM::Document> parse_html_document(const StringView&, const URL&, const String& encoding);
|
||||
|
||||
class HTMLDocumentParser {
|
||||
public:
|
||||
HTMLDocumentParser(const StringView& input, const String& encoding);
|
||||
HTMLDocumentParser(const StringView& input, const String& encoding, DOM::Document& existing_document);
|
||||
~HTMLDocumentParser();
|
||||
|
||||
void run(const URL&);
|
||||
|
||||
DOM::Document& document();
|
||||
|
||||
static NonnullRefPtrVector<DOM::Node> parse_html_fragment(DOM::Element& context_element, const StringView&);
|
||||
|
||||
enum class InsertionMode {
|
||||
#define __ENUMERATE_INSERTION_MODE(mode) mode,
|
||||
ENUMERATE_INSERTION_MODES
|
||||
#undef __ENUMERATE_INSERTION_MODE
|
||||
};
|
||||
|
||||
InsertionMode insertion_mode() const { return m_insertion_mode; }
|
||||
|
||||
static bool is_special_tag(const FlyString& tag_name);
|
||||
|
||||
private:
|
||||
const char* insertion_mode_name() const;
|
||||
|
||||
DOM::QuirksMode which_quirks_mode(const HTMLToken&) const;
|
||||
|
||||
void handle_initial(HTMLToken&);
|
||||
void handle_before_html(HTMLToken&);
|
||||
void handle_before_head(HTMLToken&);
|
||||
void handle_in_head(HTMLToken&);
|
||||
void handle_in_head_noscript(HTMLToken&);
|
||||
void handle_after_head(HTMLToken&);
|
||||
void handle_in_body(HTMLToken&);
|
||||
void handle_after_body(HTMLToken&);
|
||||
void handle_after_after_body(HTMLToken&);
|
||||
void handle_text(HTMLToken&);
|
||||
void handle_in_table(HTMLToken&);
|
||||
void handle_in_table_body(HTMLToken&);
|
||||
void handle_in_row(HTMLToken&);
|
||||
void handle_in_cell(HTMLToken&);
|
||||
void handle_in_table_text(HTMLToken&);
|
||||
void handle_in_select_in_table(HTMLToken&);
|
||||
void handle_in_select(HTMLToken&);
|
||||
void handle_in_caption(HTMLToken&);
|
||||
void handle_in_column_group(HTMLToken&);
|
||||
void handle_in_template(HTMLToken&);
|
||||
void handle_in_frameset(HTMLToken&);
|
||||
void handle_after_frameset(HTMLToken&);
|
||||
void handle_after_after_frameset(HTMLToken&);
|
||||
|
||||
void stop_parsing() { m_stop_parsing = true; }
|
||||
|
||||
void generate_implied_end_tags(const FlyString& exception = {});
|
||||
bool stack_of_open_elements_has_element_with_tag_name_in_scope(const FlyString& tag_name);
|
||||
NonnullRefPtr<DOM::Element> create_element_for(const HTMLToken&);
|
||||
|
||||
struct AdjustedInsertionLocation {
|
||||
RefPtr<DOM::Node> parent;
|
||||
RefPtr<DOM::Node> insert_before_sibling;
|
||||
};
|
||||
|
||||
AdjustedInsertionLocation find_appropriate_place_for_inserting_node();
|
||||
|
||||
DOM::Text* find_character_insertion_node();
|
||||
void flush_character_insertions();
|
||||
RefPtr<DOM::Element> insert_html_element(const HTMLToken&);
|
||||
DOM::Element& current_node();
|
||||
DOM::Element& node_before_current_node();
|
||||
void insert_character(u32 data);
|
||||
void insert_comment(HTMLToken&);
|
||||
void reconstruct_the_active_formatting_elements();
|
||||
void close_a_p_element();
|
||||
void process_using_the_rules_for(InsertionMode, HTMLToken&);
|
||||
void parse_generic_raw_text_element(HTMLToken&);
|
||||
void increment_script_nesting_level();
|
||||
void decrement_script_nesting_level();
|
||||
size_t script_nesting_level() const { return m_script_nesting_level; }
|
||||
void reset_the_insertion_mode_appropriately();
|
||||
|
||||
void adjust_mathml_attributes(HTMLToken&);
|
||||
void adjust_svg_attributes(HTMLToken&);
|
||||
void adjust_foreign_attributes(HTMLToken&);
|
||||
|
||||
enum AdoptionAgencyAlgorithmOutcome {
|
||||
DoNothing,
|
||||
RunAnyOtherEndTagSteps,
|
||||
};
|
||||
|
||||
AdoptionAgencyAlgorithmOutcome run_the_adoption_agency_algorithm(HTMLToken&);
|
||||
void clear_the_stack_back_to_a_table_context();
|
||||
void clear_the_stack_back_to_a_table_body_context();
|
||||
void clear_the_stack_back_to_a_table_row_context();
|
||||
void close_the_cell();
|
||||
|
||||
InsertionMode m_insertion_mode { InsertionMode::Initial };
|
||||
InsertionMode m_original_insertion_mode { InsertionMode::Initial };
|
||||
|
||||
StackOfOpenElements m_stack_of_open_elements;
|
||||
Vector<InsertionMode> m_stack_of_template_insertion_modes;
|
||||
ListOfActiveFormattingElements m_list_of_active_formatting_elements;
|
||||
|
||||
HTMLTokenizer m_tokenizer;
|
||||
|
||||
bool m_foster_parenting { false };
|
||||
bool m_frameset_ok { true };
|
||||
bool m_parsing_fragment { false };
|
||||
bool m_scripting_enabled { true };
|
||||
bool m_invoked_via_document_write { false };
|
||||
bool m_aborted { false };
|
||||
bool m_parser_pause_flag { false };
|
||||
bool m_stop_parsing { false };
|
||||
size_t m_script_nesting_level { 0 };
|
||||
|
||||
RefPtr<DOM::Document> m_document;
|
||||
RefPtr<HTMLHeadElement> m_head_element;
|
||||
RefPtr<HTMLFormElement> m_form_element;
|
||||
RefPtr<DOM::Element> m_context_element;
|
||||
|
||||
Vector<HTMLToken> m_pending_table_character_tokens;
|
||||
|
||||
RefPtr<DOM::Text> m_character_insertion_node;
|
||||
StringBuilder m_character_insertion_builder;
|
||||
};
|
||||
|
||||
}
|
86
Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
Normal file
86
Libraries/LibWeb/HTML/Parser/HTMLToken.cpp
Normal file
|
@ -0,0 +1,86 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <LibWeb/HTML/Parser/HTMLToken.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
String HTMLToken::to_string() const
|
||||
{
|
||||
StringBuilder builder;
|
||||
|
||||
switch (type()) {
|
||||
case HTMLToken::Type::DOCTYPE:
|
||||
builder.append("DOCTYPE");
|
||||
builder.append(" { name: '");
|
||||
builder.append(m_doctype.name.to_string());
|
||||
builder.append("' }");
|
||||
break;
|
||||
case HTMLToken::Type::StartTag:
|
||||
builder.append("StartTag");
|
||||
break;
|
||||
case HTMLToken::Type::EndTag:
|
||||
builder.append("EndTag");
|
||||
break;
|
||||
case HTMLToken::Type::Comment:
|
||||
builder.append("Comment");
|
||||
break;
|
||||
case HTMLToken::Type::Character:
|
||||
builder.append("Character");
|
||||
break;
|
||||
case HTMLToken::Type::EndOfFile:
|
||||
builder.append("EndOfFile");
|
||||
break;
|
||||
case HTMLToken::Type::Invalid:
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
if (type() == HTMLToken::Type::StartTag || type() == HTMLToken::Type::EndTag) {
|
||||
builder.append(" { name: '");
|
||||
builder.append(m_tag.tag_name.to_string());
|
||||
builder.append("', { ");
|
||||
for (auto& attribute : m_tag.attributes) {
|
||||
builder.append(attribute.local_name_builder.to_string());
|
||||
builder.append("=\"");
|
||||
builder.append(attribute.value_builder.to_string());
|
||||
builder.append("\" ");
|
||||
}
|
||||
builder.append("} }");
|
||||
}
|
||||
|
||||
if (type() == HTMLToken::Type::Comment || type() == HTMLToken::Type::Character) {
|
||||
builder.append(" { data: '");
|
||||
builder.append(m_comment_or_character.data.to_string());
|
||||
builder.append("' }");
|
||||
}
|
||||
|
||||
return builder.to_string();
|
||||
|
||||
//dbg() << "[" << String::format("%42s", state_name(m_state)) << "] " << builder.to_string();
|
||||
//m_current_token = {};
|
||||
}
|
||||
|
||||
}
|
212
Libraries/LibWeb/HTML/Parser/HTMLToken.h
Normal file
212
Libraries/LibWeb/HTML/Parser/HTMLToken.h
Normal file
|
@ -0,0 +1,212 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/FlyString.h>
|
||||
#include <AK/String.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <AK/Types.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <AK/Vector.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
class HTMLToken {
|
||||
friend class HTMLDocumentParser;
|
||||
friend class HTMLTokenizer;
|
||||
|
||||
public:
|
||||
enum class Type {
|
||||
Invalid,
|
||||
DOCTYPE,
|
||||
StartTag,
|
||||
EndTag,
|
||||
Comment,
|
||||
Character,
|
||||
EndOfFile,
|
||||
};
|
||||
|
||||
static HTMLToken make_character(u32 codepoint)
|
||||
{
|
||||
HTMLToken token;
|
||||
token.m_type = Type::Character;
|
||||
token.m_comment_or_character.data.append(codepoint);
|
||||
return token;
|
||||
}
|
||||
|
||||
static HTMLToken make_start_tag(const FlyString& tag_name)
|
||||
{
|
||||
HTMLToken token;
|
||||
token.m_type = Type::StartTag;
|
||||
token.m_tag.tag_name.append(tag_name);
|
||||
return token;
|
||||
}
|
||||
|
||||
bool is_doctype() const { return m_type == Type::DOCTYPE; }
|
||||
bool is_start_tag() const { return m_type == Type::StartTag; }
|
||||
bool is_end_tag() const { return m_type == Type::EndTag; }
|
||||
bool is_comment() const { return m_type == Type::Comment; }
|
||||
bool is_character() const { return m_type == Type::Character; }
|
||||
bool is_end_of_file() const { return m_type == Type::EndOfFile; }
|
||||
|
||||
u32 codepoint() const
|
||||
{
|
||||
ASSERT(is_character());
|
||||
Utf8View view(m_comment_or_character.data.string_view());
|
||||
ASSERT(view.length_in_codepoints() == 1);
|
||||
return *view.begin();
|
||||
}
|
||||
|
||||
bool is_parser_whitespace() const
|
||||
{
|
||||
// NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
|
||||
if (!is_character())
|
||||
return false;
|
||||
switch (codepoint()) {
|
||||
case '\t':
|
||||
case '\n':
|
||||
case '\f':
|
||||
case '\r':
|
||||
case ' ':
|
||||
return true;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
String tag_name() const
|
||||
{
|
||||
ASSERT(is_start_tag() || is_end_tag());
|
||||
return m_tag.tag_name.to_string();
|
||||
}
|
||||
|
||||
bool is_self_closing() const
|
||||
{
|
||||
ASSERT(is_start_tag() || is_end_tag());
|
||||
return m_tag.self_closing;
|
||||
}
|
||||
|
||||
bool has_acknowledged_self_closing_flag() const
|
||||
{
|
||||
ASSERT(is_self_closing());
|
||||
return m_tag.self_closing_acknowledged;
|
||||
}
|
||||
|
||||
void acknowledge_self_closing_flag_if_set()
|
||||
{
|
||||
if (is_self_closing())
|
||||
m_tag.self_closing_acknowledged = true;
|
||||
}
|
||||
|
||||
StringView attribute(const FlyString& attribute_name)
|
||||
{
|
||||
ASSERT(is_start_tag() || is_end_tag());
|
||||
for (auto& attribute : m_tag.attributes) {
|
||||
if (attribute_name == attribute.local_name_builder.string_view())
|
||||
return attribute.value_builder.string_view();
|
||||
}
|
||||
return {};
|
||||
}
|
||||
|
||||
void adjust_attribute_name(const FlyString& old_name, const FlyString& new_name)
|
||||
{
|
||||
ASSERT(is_start_tag() || is_end_tag());
|
||||
for (auto& attribute : m_tag.attributes) {
|
||||
if (old_name == attribute.local_name_builder.string_view()) {
|
||||
attribute.local_name_builder.clear();
|
||||
attribute.local_name_builder.append(new_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void adjust_foreign_attribute(const FlyString& old_name, const FlyString& prefix, const FlyString& local_name, const FlyString& namespace_)
|
||||
{
|
||||
ASSERT(is_start_tag() || is_end_tag());
|
||||
for (auto& attribute : m_tag.attributes) {
|
||||
if (old_name == attribute.local_name_builder.string_view()) {
|
||||
attribute.prefix_builder.clear();
|
||||
attribute.prefix_builder.append(prefix);
|
||||
|
||||
attribute.local_name_builder.clear();
|
||||
attribute.local_name_builder.append(local_name);
|
||||
|
||||
attribute.namespace_builder.clear();
|
||||
attribute.namespace_builder.append(namespace_);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void drop_attributes()
|
||||
{
|
||||
ASSERT(is_start_tag() || is_end_tag());
|
||||
m_tag.attributes.clear();
|
||||
}
|
||||
|
||||
Type type() const { return m_type; }
|
||||
|
||||
String to_string() const;
|
||||
|
||||
private:
|
||||
struct AttributeBuilder {
|
||||
StringBuilder prefix_builder;
|
||||
StringBuilder local_name_builder;
|
||||
StringBuilder namespace_builder;
|
||||
StringBuilder value_builder;
|
||||
};
|
||||
|
||||
Type m_type { Type::Invalid };
|
||||
|
||||
// Type::DOCTYPE
|
||||
struct {
|
||||
// NOTE: "Missing" is a distinct state from the empty string.
|
||||
|
||||
StringBuilder name;
|
||||
bool missing_name { true };
|
||||
StringBuilder public_identifier;
|
||||
bool missing_public_identifier { true };
|
||||
StringBuilder system_identifier;
|
||||
bool missing_system_identifier { true };
|
||||
bool force_quirks { false };
|
||||
} m_doctype;
|
||||
|
||||
// Type::StartTag
|
||||
// Type::EndTag
|
||||
struct {
|
||||
StringBuilder tag_name;
|
||||
bool self_closing { false };
|
||||
bool self_closing_acknowledged { false };
|
||||
Vector<AttributeBuilder> attributes;
|
||||
} m_tag;
|
||||
|
||||
// Type::Comment
|
||||
// Type::Character
|
||||
struct {
|
||||
StringBuilder data;
|
||||
} m_comment_or_character;
|
||||
};
|
||||
|
||||
}
|
2665
Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
Normal file
2665
Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
Normal file
File diff suppressed because it is too large
Load diff
190
Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
Normal file
190
Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
Normal file
|
@ -0,0 +1,190 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/Queue.h>
|
||||
#include <AK/StringView.h>
|
||||
#include <AK/Types.h>
|
||||
#include <AK/Utf8View.h>
|
||||
#include <LibWeb/Forward.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLToken.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
#define ENUMERATE_TOKENIZER_STATES \
|
||||
__ENUMERATE_TOKENIZER_STATE(Data) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RCDATA) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RAWTEXT) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptData) \
|
||||
__ENUMERATE_TOKENIZER_STATE(PLAINTEXT) \
|
||||
__ENUMERATE_TOKENIZER_STATE(TagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(EndTagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(TagName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RCDATALessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RCDATAEndTagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RCDATAEndTagName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RAWTEXTLessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RAWTEXTEndTagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(RAWTEXTEndTagName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataLessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEndTagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEndTagName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapeStart) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapeStartDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscaped) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedDashDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedLessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedEndTagOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedEndTagName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapeStart) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscaped) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedDashDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedLessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapeEnd) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BeforeAttributeName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AttributeName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterAttributeName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BeforeAttributeValue) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AttributeValueDoubleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AttributeValueSingleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AttributeValueUnquoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterAttributeValueQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(SelfClosingStartTag) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BogusComment) \
|
||||
__ENUMERATE_TOKENIZER_STATE(MarkupDeclarationOpen) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentStart) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentStartDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(Comment) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSign) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBang) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBangDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBangDashDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentEndDash) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentEnd) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CommentEndBang) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPE) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPEName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPEName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEName) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEPublicKeyword) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPEPublicIdentifier) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPEPublicIdentifierDoubleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPEPublicIdentifierSingleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEPublicIdentifier) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BetweenDOCTYPEPublicAndSystemIdentifiers) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPESystemKeyword) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPESystemIdentifier) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPESystemIdentifierDoubleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DOCTYPESystemIdentifierSingleQuoted) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPESystemIdentifier) \
|
||||
__ENUMERATE_TOKENIZER_STATE(BogusDOCTYPE) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CDATASection) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CDATASectionBracket) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CDATASectionEnd) \
|
||||
__ENUMERATE_TOKENIZER_STATE(CharacterReference) \
|
||||
__ENUMERATE_TOKENIZER_STATE(NamedCharacterReference) \
|
||||
__ENUMERATE_TOKENIZER_STATE(AmbiguousAmpersand) \
|
||||
__ENUMERATE_TOKENIZER_STATE(NumericCharacterReference) \
|
||||
__ENUMERATE_TOKENIZER_STATE(HexadecimalCharacterReferenceStart) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DecimalCharacterReferenceStart) \
|
||||
__ENUMERATE_TOKENIZER_STATE(HexadecimalCharacterReference) \
|
||||
__ENUMERATE_TOKENIZER_STATE(DecimalCharacterReference) \
|
||||
__ENUMERATE_TOKENIZER_STATE(NumericCharacterReferenceEnd)
|
||||
|
||||
class HTMLTokenizer {
|
||||
public:
|
||||
explicit HTMLTokenizer(const StringView& input, const String& encoding);
|
||||
|
||||
enum class State {
|
||||
#define __ENUMERATE_TOKENIZER_STATE(state) state,
|
||||
ENUMERATE_TOKENIZER_STATES
|
||||
#undef __ENUMERATE_TOKENIZER_STATE
|
||||
};
|
||||
|
||||
Optional<HTMLToken> next_token();
|
||||
|
||||
void switch_to(Badge<HTMLDocumentParser>, State new_state);
|
||||
|
||||
void set_blocked(bool b) { m_blocked = b; }
|
||||
bool is_blocked() const { return m_blocked; }
|
||||
|
||||
String source() const { return m_decoded_input; }
|
||||
|
||||
private:
|
||||
Optional<u32> next_codepoint();
|
||||
Optional<u32> peek_codepoint(size_t offset) const;
|
||||
bool consume_next_if_match(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive);
|
||||
void create_new_token(HTMLToken::Type);
|
||||
bool current_end_tag_token_is_appropriate() const;
|
||||
|
||||
static const char* state_name(State state)
|
||||
{
|
||||
switch (state) {
|
||||
#define __ENUMERATE_TOKENIZER_STATE(state) \
|
||||
case State::state: \
|
||||
return #state;
|
||||
ENUMERATE_TOKENIZER_STATES
|
||||
#undef __ENUMERATE_TOKENIZER_STATE
|
||||
};
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
void will_emit(HTMLToken&);
|
||||
void will_switch_to(State);
|
||||
void will_reconsume_in(State);
|
||||
|
||||
bool consumed_as_part_of_an_attribute() const;
|
||||
|
||||
State m_state { State::Data };
|
||||
State m_return_state { State::Data };
|
||||
|
||||
Vector<u32> m_temporary_buffer;
|
||||
|
||||
String m_decoded_input;
|
||||
|
||||
StringView m_input;
|
||||
|
||||
Utf8View m_utf8_view;
|
||||
AK::Utf8CodepointIterator m_utf8_iterator;
|
||||
AK::Utf8CodepointIterator m_prev_utf8_iterator;
|
||||
|
||||
HTMLToken m_current_token;
|
||||
|
||||
HTMLToken m_last_emitted_start_tag;
|
||||
|
||||
bool m_has_emitted_eof { false };
|
||||
|
||||
Queue<HTMLToken> m_queued_tokens;
|
||||
|
||||
u32 m_character_reference_code { 0 };
|
||||
|
||||
bool m_blocked { false };
|
||||
};
|
||||
|
||||
}
|
|
@ -0,0 +1,83 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <LibWeb/DOM/Element.h>
|
||||
#include <LibWeb/HTML/Parser/ListOfActiveFormattingElements.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
ListOfActiveFormattingElements::~ListOfActiveFormattingElements()
|
||||
{
|
||||
}
|
||||
|
||||
void ListOfActiveFormattingElements::add(DOM::Element& element)
|
||||
{
|
||||
m_entries.append({ element });
|
||||
}
|
||||
|
||||
void ListOfActiveFormattingElements::add_marker()
|
||||
{
|
||||
m_entries.append({ nullptr });
|
||||
}
|
||||
|
||||
bool ListOfActiveFormattingElements::contains(const DOM::Element& element) const
|
||||
{
|
||||
for (auto& entry : m_entries) {
|
||||
if (entry.element == &element)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
DOM::Element* ListOfActiveFormattingElements::last_element_with_tag_name_before_marker(const FlyString& tag_name)
|
||||
{
|
||||
for (ssize_t i = m_entries.size() - 1; i >= 0; --i) {
|
||||
auto& entry = m_entries[i];
|
||||
if (entry.is_marker())
|
||||
return nullptr;
|
||||
if (entry.element->local_name() == tag_name)
|
||||
return entry.element;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void ListOfActiveFormattingElements::remove(DOM::Element& element)
|
||||
{
|
||||
m_entries.remove_first_matching([&](auto& entry) {
|
||||
return entry.element == &element;
|
||||
});
|
||||
}
|
||||
|
||||
void ListOfActiveFormattingElements::clear_up_to_the_last_marker()
|
||||
{
|
||||
while (!m_entries.is_empty()) {
|
||||
auto entry = m_entries.take_last();
|
||||
if (entry.is_marker())
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
|
@ -0,0 +1,65 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/NonnullRefPtrVector.h>
|
||||
#include <LibWeb/DOM/Element.h>
|
||||
#include <LibWeb/Forward.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
class ListOfActiveFormattingElements {
|
||||
public:
|
||||
ListOfActiveFormattingElements() { }
|
||||
~ListOfActiveFormattingElements();
|
||||
|
||||
struct Entry {
|
||||
bool is_marker() const { return !element; }
|
||||
|
||||
RefPtr<DOM::Element> element;
|
||||
};
|
||||
|
||||
bool is_empty() const { return m_entries.is_empty(); }
|
||||
bool contains(const DOM::Element&) const;
|
||||
|
||||
void add(DOM::Element& element);
|
||||
void add_marker();
|
||||
|
||||
void remove(DOM::Element&);
|
||||
|
||||
const Vector<Entry>& entries() const { return m_entries; }
|
||||
Vector<Entry>& entries() { return m_entries; }
|
||||
|
||||
DOM::Element* last_element_with_tag_name_before_marker(const FlyString& tag_name);
|
||||
|
||||
void clear_up_to_the_last_marker();
|
||||
|
||||
private:
|
||||
Vector<Entry> m_entries;
|
||||
};
|
||||
|
||||
}
|
159
Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp
Normal file
159
Libraries/LibWeb/HTML/Parser/StackOfOpenElements.cpp
Normal file
|
@ -0,0 +1,159 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <LibWeb/DOM/Element.h>
|
||||
#include <LibWeb/HTML/Parser/HTMLDocumentParser.h>
|
||||
#include <LibWeb/HTML/Parser/StackOfOpenElements.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
static Vector<FlyString> s_base_list { "applet", "caption", "html", "table", "td", "th", "marquee", "object", "template" };
|
||||
|
||||
StackOfOpenElements::~StackOfOpenElements()
|
||||
{
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_scope_impl(const FlyString& tag_name, const Vector<FlyString>& list) const
|
||||
{
|
||||
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
|
||||
auto& node = m_elements.at(i);
|
||||
if (node.local_name() == tag_name)
|
||||
return true;
|
||||
if (list.contains_slow(node.local_name()))
|
||||
return false;
|
||||
}
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_scope(const FlyString& tag_name) const
|
||||
{
|
||||
return has_in_scope_impl(tag_name, s_base_list);
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_scope_impl(const DOM::Element& target_node, const Vector<FlyString>& list) const
|
||||
{
|
||||
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
|
||||
auto& node = m_elements.at(i);
|
||||
if (&node == &target_node)
|
||||
return true;
|
||||
if (list.contains_slow(node.local_name()))
|
||||
return false;
|
||||
}
|
||||
ASSERT_NOT_REACHED();
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_scope(const DOM::Element& target_node) const
|
||||
{
|
||||
return has_in_scope_impl(target_node, s_base_list);
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_button_scope(const FlyString& tag_name) const
|
||||
{
|
||||
auto list = s_base_list;
|
||||
list.append("button");
|
||||
return has_in_scope_impl(tag_name, list);
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_table_scope(const FlyString& tag_name) const
|
||||
{
|
||||
return has_in_scope_impl(tag_name, { "html", "table", "template" });
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_list_item_scope(const FlyString& tag_name) const
|
||||
{
|
||||
auto list = s_base_list;
|
||||
list.append("ol");
|
||||
list.append("ul");
|
||||
return has_in_scope_impl(tag_name, list);
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::has_in_select_scope(const FlyString& tag_name) const
|
||||
{
|
||||
return has_in_scope_impl(tag_name, { "option", "optgroup" });
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::contains(const DOM::Element& element) const
|
||||
{
|
||||
for (auto& element_on_stack : m_elements) {
|
||||
if (&element == &element_on_stack)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool StackOfOpenElements::contains(const FlyString& tag_name) const
|
||||
{
|
||||
for (auto& element_on_stack : m_elements) {
|
||||
if (element_on_stack.local_name() == tag_name)
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
void StackOfOpenElements::pop_until_an_element_with_tag_name_has_been_popped(const FlyString& tag_name)
|
||||
{
|
||||
while (m_elements.last().local_name() != tag_name)
|
||||
pop();
|
||||
pop();
|
||||
}
|
||||
|
||||
DOM::Element* StackOfOpenElements::topmost_special_node_below(const DOM::Element& formatting_element)
|
||||
{
|
||||
DOM::Element* found_element = nullptr;
|
||||
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
|
||||
auto& element = m_elements[i];
|
||||
if (&element == &formatting_element)
|
||||
break;
|
||||
if (HTMLDocumentParser::is_special_tag(element.local_name()))
|
||||
found_element = &element;
|
||||
}
|
||||
return found_element;
|
||||
}
|
||||
|
||||
DOM::Element* StackOfOpenElements::last_element_with_tag_name(const FlyString& tag_name)
|
||||
{
|
||||
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
|
||||
auto& element = m_elements[i];
|
||||
if (element.local_name() == tag_name)
|
||||
return &element;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
DOM::Element* StackOfOpenElements::element_before(const DOM::Element& target)
|
||||
{
|
||||
bool found_target = false;
|
||||
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
|
||||
auto& element = m_elements[i];
|
||||
if (&element == &target) {
|
||||
found_target = true;
|
||||
} else if (found_target)
|
||||
return &element;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
}
|
78
Libraries/LibWeb/HTML/Parser/StackOfOpenElements.h
Normal file
78
Libraries/LibWeb/HTML/Parser/StackOfOpenElements.h
Normal file
|
@ -0,0 +1,78 @@
|
|||
/*
|
||||
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions are met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright notice, this
|
||||
* list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright notice,
|
||||
* this list of conditions and the following disclaimer in the documentation
|
||||
* and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
||||
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
||||
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <AK/NonnullRefPtrVector.h>
|
||||
#include <LibWeb/DOM/Element.h>
|
||||
#include <LibWeb/Forward.h>
|
||||
|
||||
namespace Web::HTML {
|
||||
|
||||
class StackOfOpenElements {
|
||||
public:
|
||||
StackOfOpenElements() { }
|
||||
~StackOfOpenElements();
|
||||
|
||||
DOM::Element& first() { return m_elements.first(); }
|
||||
DOM::Element& last() { return m_elements.last(); }
|
||||
|
||||
bool is_empty() const { return m_elements.is_empty(); }
|
||||
void push(NonnullRefPtr<DOM::Element> element) { m_elements.append(move(element)); }
|
||||
NonnullRefPtr<DOM::Element> pop() { return m_elements.take_last(); }
|
||||
|
||||
const DOM::Element& current_node() const { return m_elements.last(); }
|
||||
DOM::Element& current_node() { return m_elements.last(); }
|
||||
|
||||
bool has_in_scope(const FlyString& tag_name) const;
|
||||
bool has_in_button_scope(const FlyString& tag_name) const;
|
||||
bool has_in_table_scope(const FlyString& tag_name) const;
|
||||
bool has_in_list_item_scope(const FlyString& tag_name) const;
|
||||
bool has_in_select_scope(const FlyString& tag_name) const;
|
||||
|
||||
bool has_in_scope(const DOM::Element&) const;
|
||||
|
||||
bool contains(const DOM::Element&) const;
|
||||
bool contains(const FlyString& tag_name) const;
|
||||
|
||||
const NonnullRefPtrVector<DOM::Element>& elements() const { return m_elements; }
|
||||
NonnullRefPtrVector<DOM::Element>& elements() { return m_elements; }
|
||||
|
||||
void pop_until_an_element_with_tag_name_has_been_popped(const FlyString&);
|
||||
|
||||
DOM::Element* topmost_special_node_below(const DOM::Element&);
|
||||
|
||||
DOM::Element* last_element_with_tag_name(const FlyString&);
|
||||
DOM::Element* element_before(const DOM::Element&);
|
||||
|
||||
private:
|
||||
bool has_in_scope_impl(const FlyString& tag_name, const Vector<FlyString>&) const;
|
||||
bool has_in_scope_impl(const DOM::Element& target_node, const Vector<FlyString>&) const;
|
||||
|
||||
NonnullRefPtrVector<DOM::Element> m_elements;
|
||||
};
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue