1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 09:37:44 +00:00

Libraries: Move to Userland/Libraries/

This commit is contained in:
Andreas Kling 2021-01-12 12:17:30 +01:00
parent dc28c07fa5
commit 13d7c09125
1857 changed files with 266 additions and 274 deletions

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,43 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/StringView.h>
#include <AK/Vector.h>
namespace Web {
namespace HTML {
struct EntityMatch {
Vector<u32, 2> code_points;
StringView entity;
};
Optional<EntityMatch> code_points_from_entity(const StringView&);
}
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,193 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/NonnullRefPtrVector.h>
#include <LibWeb/DOM/Node.h>
#include <LibWeb/HTML/Parser/HTMLTokenizer.h>
#include <LibWeb/HTML/Parser/ListOfActiveFormattingElements.h>
#include <LibWeb/HTML/Parser/StackOfOpenElements.h>
namespace Web::HTML {
#define ENUMERATE_INSERTION_MODES \
__ENUMERATE_INSERTION_MODE(Initial) \
__ENUMERATE_INSERTION_MODE(BeforeHTML) \
__ENUMERATE_INSERTION_MODE(BeforeHead) \
__ENUMERATE_INSERTION_MODE(InHead) \
__ENUMERATE_INSERTION_MODE(InHeadNoscript) \
__ENUMERATE_INSERTION_MODE(AfterHead) \
__ENUMERATE_INSERTION_MODE(InBody) \
__ENUMERATE_INSERTION_MODE(Text) \
__ENUMERATE_INSERTION_MODE(InTable) \
__ENUMERATE_INSERTION_MODE(InTableText) \
__ENUMERATE_INSERTION_MODE(InCaption) \
__ENUMERATE_INSERTION_MODE(InColumnGroup) \
__ENUMERATE_INSERTION_MODE(InTableBody) \
__ENUMERATE_INSERTION_MODE(InRow) \
__ENUMERATE_INSERTION_MODE(InCell) \
__ENUMERATE_INSERTION_MODE(InSelect) \
__ENUMERATE_INSERTION_MODE(InSelectInTable) \
__ENUMERATE_INSERTION_MODE(InTemplate) \
__ENUMERATE_INSERTION_MODE(AfterBody) \
__ENUMERATE_INSERTION_MODE(InFrameset) \
__ENUMERATE_INSERTION_MODE(AfterFrameset) \
__ENUMERATE_INSERTION_MODE(AfterAfterBody) \
__ENUMERATE_INSERTION_MODE(AfterAfterFrameset)
RefPtr<DOM::Document> parse_html_document(const StringView&, const URL&, const String& encoding);
class HTMLDocumentParser {
public:
HTMLDocumentParser(DOM::Document&, const StringView& input, const String& encoding);
~HTMLDocumentParser();
void run(const URL&);
DOM::Document& document();
static NonnullRefPtrVector<DOM::Node> parse_html_fragment(DOM::Element& context_element, const StringView&);
enum class InsertionMode {
#define __ENUMERATE_INSERTION_MODE(mode) mode,
ENUMERATE_INSERTION_MODES
#undef __ENUMERATE_INSERTION_MODE
};
InsertionMode insertion_mode() const { return m_insertion_mode; }
static bool is_special_tag(const FlyString& tag_name, const FlyString& namespace_);
private:
const char* insertion_mode_name() const;
DOM::QuirksMode which_quirks_mode(const HTMLToken&) const;
void handle_initial(HTMLToken&);
void handle_before_html(HTMLToken&);
void handle_before_head(HTMLToken&);
void handle_in_head(HTMLToken&);
void handle_in_head_noscript(HTMLToken&);
void handle_after_head(HTMLToken&);
void handle_in_body(HTMLToken&);
void handle_after_body(HTMLToken&);
void handle_after_after_body(HTMLToken&);
void handle_text(HTMLToken&);
void handle_in_table(HTMLToken&);
void handle_in_table_body(HTMLToken&);
void handle_in_row(HTMLToken&);
void handle_in_cell(HTMLToken&);
void handle_in_table_text(HTMLToken&);
void handle_in_select_in_table(HTMLToken&);
void handle_in_select(HTMLToken&);
void handle_in_caption(HTMLToken&);
void handle_in_column_group(HTMLToken&);
void handle_in_template(HTMLToken&);
void handle_in_frameset(HTMLToken&);
void handle_after_frameset(HTMLToken&);
void handle_after_after_frameset(HTMLToken&);
void stop_parsing() { m_stop_parsing = true; }
void generate_implied_end_tags(const FlyString& exception = {});
void generate_all_implied_end_tags_thoroughly();
bool stack_of_open_elements_has_element_with_tag_name_in_scope(const FlyString& tag_name);
NonnullRefPtr<DOM::Element> create_element_for(const HTMLToken&, const FlyString& namespace_);
struct AdjustedInsertionLocation {
RefPtr<DOM::Node> parent;
RefPtr<DOM::Node> insert_before_sibling;
};
AdjustedInsertionLocation find_appropriate_place_for_inserting_node();
DOM::Text* find_character_insertion_node();
void flush_character_insertions();
RefPtr<DOM::Element> insert_foreign_element(const HTMLToken&, const FlyString&);
RefPtr<DOM::Element> insert_html_element(const HTMLToken&);
DOM::Element& current_node();
DOM::Element& adjusted_current_node();
DOM::Element& node_before_current_node();
void insert_character(u32 data);
void insert_comment(HTMLToken&);
void reconstruct_the_active_formatting_elements();
void close_a_p_element();
void process_using_the_rules_for(InsertionMode, HTMLToken&);
void process_using_the_rules_for_foreign_content(HTMLToken&);
void parse_generic_raw_text_element(HTMLToken&);
void increment_script_nesting_level();
void decrement_script_nesting_level();
size_t script_nesting_level() const { return m_script_nesting_level; }
void reset_the_insertion_mode_appropriately();
void adjust_mathml_attributes(HTMLToken&);
void adjust_svg_tag_names(HTMLToken&);
void adjust_svg_attributes(HTMLToken&);
void adjust_foreign_attributes(HTMLToken&);
enum AdoptionAgencyAlgorithmOutcome {
DoNothing,
RunAnyOtherEndTagSteps,
};
AdoptionAgencyAlgorithmOutcome run_the_adoption_agency_algorithm(HTMLToken&);
void clear_the_stack_back_to_a_table_context();
void clear_the_stack_back_to_a_table_body_context();
void clear_the_stack_back_to_a_table_row_context();
void close_the_cell();
InsertionMode m_insertion_mode { InsertionMode::Initial };
InsertionMode m_original_insertion_mode { InsertionMode::Initial };
StackOfOpenElements m_stack_of_open_elements;
Vector<InsertionMode> m_stack_of_template_insertion_modes;
ListOfActiveFormattingElements m_list_of_active_formatting_elements;
HTMLTokenizer m_tokenizer;
bool m_foster_parenting { false };
bool m_frameset_ok { true };
bool m_parsing_fragment { false };
bool m_scripting_enabled { true };
bool m_invoked_via_document_write { false };
bool m_aborted { false };
bool m_parser_pause_flag { false };
bool m_stop_parsing { false };
size_t m_script_nesting_level { 0 };
NonnullRefPtr<DOM::Document> m_document;
RefPtr<HTMLHeadElement> m_head_element;
RefPtr<HTMLFormElement> m_form_element;
RefPtr<DOM::Element> m_context_element;
Vector<HTMLToken> m_pending_table_character_tokens;
RefPtr<DOM::Text> m_character_insertion_node;
StringBuilder m_character_insertion_builder;
};
}

View file

@ -0,0 +1,83 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <LibWeb/HTML/Parser/HTMLToken.h>
namespace Web::HTML {
String HTMLToken::to_string() const
{
StringBuilder builder;
switch (type()) {
case HTMLToken::Type::DOCTYPE:
builder.append("DOCTYPE");
builder.append(" { name: '");
builder.append(m_doctype.name.to_string());
builder.append("' }");
break;
case HTMLToken::Type::StartTag:
builder.append("StartTag");
break;
case HTMLToken::Type::EndTag:
builder.append("EndTag");
break;
case HTMLToken::Type::Comment:
builder.append("Comment");
break;
case HTMLToken::Type::Character:
builder.append("Character");
break;
case HTMLToken::Type::EndOfFile:
builder.append("EndOfFile");
break;
case HTMLToken::Type::Invalid:
ASSERT_NOT_REACHED();
}
if (type() == HTMLToken::Type::StartTag || type() == HTMLToken::Type::EndTag) {
builder.append(" { name: '");
builder.append(m_tag.tag_name.to_string());
builder.append("', { ");
for (auto& attribute : m_tag.attributes) {
builder.append(attribute.local_name_builder.to_string());
builder.append("=\"");
builder.append(attribute.value_builder.to_string());
builder.append("\" ");
}
builder.append("} }");
}
if (type() == HTMLToken::Type::Comment || type() == HTMLToken::Type::Character) {
builder.append(" { data: '");
builder.append(m_comment_or_character.data.to_string());
builder.append("' }");
}
return builder.to_string();
}
}

View file

@ -0,0 +1,226 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/FlyString.h>
#include <AK/String.h>
#include <AK/StringBuilder.h>
#include <AK/Types.h>
#include <AK/Utf8View.h>
#include <AK/Vector.h>
namespace Web::HTML {
class HTMLToken {
friend class HTMLDocumentParser;
friend class HTMLTokenizer;
public:
enum class Type {
Invalid,
DOCTYPE,
StartTag,
EndTag,
Comment,
Character,
EndOfFile,
};
static HTMLToken make_character(u32 code_point)
{
HTMLToken token;
token.m_type = Type::Character;
token.m_comment_or_character.data.append(code_point);
return token;
}
static HTMLToken make_start_tag(const FlyString& tag_name)
{
HTMLToken token;
token.m_type = Type::StartTag;
token.m_tag.tag_name.append(tag_name);
return token;
}
bool is_doctype() const { return m_type == Type::DOCTYPE; }
bool is_start_tag() const { return m_type == Type::StartTag; }
bool is_end_tag() const { return m_type == Type::EndTag; }
bool is_comment() const { return m_type == Type::Comment; }
bool is_character() const { return m_type == Type::Character; }
bool is_end_of_file() const { return m_type == Type::EndOfFile; }
u32 code_point() const
{
ASSERT(is_character());
Utf8View view(m_comment_or_character.data.string_view());
ASSERT(view.length() == 1);
return *view.begin();
}
bool is_parser_whitespace() const
{
// NOTE: The parser considers '\r' to be whitespace, while the tokenizer does not.
if (!is_character())
return false;
switch (code_point()) {
case '\t':
case '\n':
case '\f':
case '\r':
case ' ':
return true;
default:
return false;
}
}
String tag_name() const
{
ASSERT(is_start_tag() || is_end_tag());
return m_tag.tag_name.to_string();
}
bool is_self_closing() const
{
ASSERT(is_start_tag() || is_end_tag());
return m_tag.self_closing;
}
bool has_acknowledged_self_closing_flag() const
{
ASSERT(is_self_closing());
return m_tag.self_closing_acknowledged;
}
void acknowledge_self_closing_flag_if_set()
{
if (is_self_closing())
m_tag.self_closing_acknowledged = true;
}
StringView attribute(const FlyString& attribute_name)
{
ASSERT(is_start_tag() || is_end_tag());
for (auto& attribute : m_tag.attributes) {
if (attribute_name == attribute.local_name_builder.string_view())
return attribute.value_builder.string_view();
}
return {};
}
bool has_attribute(const FlyString& attribute_name)
{
return !attribute(attribute_name).is_null();
}
void adjust_tag_name(const FlyString& old_name, const FlyString& new_name)
{
ASSERT(is_start_tag() || is_end_tag());
if (old_name == m_tag.tag_name.string_view()) {
m_tag.tag_name.clear();
m_tag.tag_name.append(new_name);
}
}
void adjust_attribute_name(const FlyString& old_name, const FlyString& new_name)
{
ASSERT(is_start_tag() || is_end_tag());
for (auto& attribute : m_tag.attributes) {
if (old_name == attribute.local_name_builder.string_view()) {
attribute.local_name_builder.clear();
attribute.local_name_builder.append(new_name);
}
}
}
void adjust_foreign_attribute(const FlyString& old_name, const FlyString& prefix, const FlyString& local_name, const FlyString& namespace_)
{
ASSERT(is_start_tag() || is_end_tag());
for (auto& attribute : m_tag.attributes) {
if (old_name == attribute.local_name_builder.string_view()) {
attribute.prefix_builder.clear();
attribute.prefix_builder.append(prefix);
attribute.local_name_builder.clear();
attribute.local_name_builder.append(local_name);
attribute.namespace_builder.clear();
attribute.namespace_builder.append(namespace_);
}
}
}
void drop_attributes()
{
ASSERT(is_start_tag() || is_end_tag());
m_tag.attributes.clear();
}
Type type() const { return m_type; }
String to_string() const;
private:
struct AttributeBuilder {
StringBuilder prefix_builder;
StringBuilder local_name_builder;
StringBuilder namespace_builder;
StringBuilder value_builder;
};
Type m_type { Type::Invalid };
// Type::DOCTYPE
struct {
// NOTE: "Missing" is a distinct state from the empty string.
StringBuilder name;
bool missing_name { true };
StringBuilder public_identifier;
bool missing_public_identifier { true };
StringBuilder system_identifier;
bool missing_system_identifier { true };
bool force_quirks { false };
} m_doctype;
// Type::StartTag
// Type::EndTag
struct {
StringBuilder tag_name;
bool self_closing { false };
bool self_closing_acknowledged { false };
Vector<AttributeBuilder> attributes;
} m_tag;
// Type::Comment
// Type::Character
struct {
StringBuilder data;
} m_comment_or_character;
};
}

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,190 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/Queue.h>
#include <AK/StringView.h>
#include <AK/Types.h>
#include <AK/Utf8View.h>
#include <LibWeb/Forward.h>
#include <LibWeb/HTML/Parser/HTMLToken.h>
namespace Web::HTML {
#define ENUMERATE_TOKENIZER_STATES \
__ENUMERATE_TOKENIZER_STATE(Data) \
__ENUMERATE_TOKENIZER_STATE(RCDATA) \
__ENUMERATE_TOKENIZER_STATE(RAWTEXT) \
__ENUMERATE_TOKENIZER_STATE(ScriptData) \
__ENUMERATE_TOKENIZER_STATE(PLAINTEXT) \
__ENUMERATE_TOKENIZER_STATE(TagOpen) \
__ENUMERATE_TOKENIZER_STATE(EndTagOpen) \
__ENUMERATE_TOKENIZER_STATE(TagName) \
__ENUMERATE_TOKENIZER_STATE(RCDATALessThanSign) \
__ENUMERATE_TOKENIZER_STATE(RCDATAEndTagOpen) \
__ENUMERATE_TOKENIZER_STATE(RCDATAEndTagName) \
__ENUMERATE_TOKENIZER_STATE(RAWTEXTLessThanSign) \
__ENUMERATE_TOKENIZER_STATE(RAWTEXTEndTagOpen) \
__ENUMERATE_TOKENIZER_STATE(RAWTEXTEndTagName) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataLessThanSign) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataEndTagOpen) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataEndTagName) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapeStart) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapeStartDash) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscaped) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedDash) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedDashDash) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedLessThanSign) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedEndTagOpen) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataEscapedEndTagName) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapeStart) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscaped) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedDash) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedDashDash) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapedLessThanSign) \
__ENUMERATE_TOKENIZER_STATE(ScriptDataDoubleEscapeEnd) \
__ENUMERATE_TOKENIZER_STATE(BeforeAttributeName) \
__ENUMERATE_TOKENIZER_STATE(AttributeName) \
__ENUMERATE_TOKENIZER_STATE(AfterAttributeName) \
__ENUMERATE_TOKENIZER_STATE(BeforeAttributeValue) \
__ENUMERATE_TOKENIZER_STATE(AttributeValueDoubleQuoted) \
__ENUMERATE_TOKENIZER_STATE(AttributeValueSingleQuoted) \
__ENUMERATE_TOKENIZER_STATE(AttributeValueUnquoted) \
__ENUMERATE_TOKENIZER_STATE(AfterAttributeValueQuoted) \
__ENUMERATE_TOKENIZER_STATE(SelfClosingStartTag) \
__ENUMERATE_TOKENIZER_STATE(BogusComment) \
__ENUMERATE_TOKENIZER_STATE(MarkupDeclarationOpen) \
__ENUMERATE_TOKENIZER_STATE(CommentStart) \
__ENUMERATE_TOKENIZER_STATE(CommentStartDash) \
__ENUMERATE_TOKENIZER_STATE(Comment) \
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSign) \
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBang) \
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBangDash) \
__ENUMERATE_TOKENIZER_STATE(CommentLessThanSignBangDashDash) \
__ENUMERATE_TOKENIZER_STATE(CommentEndDash) \
__ENUMERATE_TOKENIZER_STATE(CommentEnd) \
__ENUMERATE_TOKENIZER_STATE(CommentEndBang) \
__ENUMERATE_TOKENIZER_STATE(DOCTYPE) \
__ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPEName) \
__ENUMERATE_TOKENIZER_STATE(DOCTYPEName) \
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEName) \
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEPublicKeyword) \
__ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPEPublicIdentifier) \
__ENUMERATE_TOKENIZER_STATE(DOCTYPEPublicIdentifierDoubleQuoted) \
__ENUMERATE_TOKENIZER_STATE(DOCTYPEPublicIdentifierSingleQuoted) \
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPEPublicIdentifier) \
__ENUMERATE_TOKENIZER_STATE(BetweenDOCTYPEPublicAndSystemIdentifiers) \
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPESystemKeyword) \
__ENUMERATE_TOKENIZER_STATE(BeforeDOCTYPESystemIdentifier) \
__ENUMERATE_TOKENIZER_STATE(DOCTYPESystemIdentifierDoubleQuoted) \
__ENUMERATE_TOKENIZER_STATE(DOCTYPESystemIdentifierSingleQuoted) \
__ENUMERATE_TOKENIZER_STATE(AfterDOCTYPESystemIdentifier) \
__ENUMERATE_TOKENIZER_STATE(BogusDOCTYPE) \
__ENUMERATE_TOKENIZER_STATE(CDATASection) \
__ENUMERATE_TOKENIZER_STATE(CDATASectionBracket) \
__ENUMERATE_TOKENIZER_STATE(CDATASectionEnd) \
__ENUMERATE_TOKENIZER_STATE(CharacterReference) \
__ENUMERATE_TOKENIZER_STATE(NamedCharacterReference) \
__ENUMERATE_TOKENIZER_STATE(AmbiguousAmpersand) \
__ENUMERATE_TOKENIZER_STATE(NumericCharacterReference) \
__ENUMERATE_TOKENIZER_STATE(HexadecimalCharacterReferenceStart) \
__ENUMERATE_TOKENIZER_STATE(DecimalCharacterReferenceStart) \
__ENUMERATE_TOKENIZER_STATE(HexadecimalCharacterReference) \
__ENUMERATE_TOKENIZER_STATE(DecimalCharacterReference) \
__ENUMERATE_TOKENIZER_STATE(NumericCharacterReferenceEnd)
class HTMLTokenizer {
public:
explicit HTMLTokenizer(const StringView& input, const String& encoding);
enum class State {
#define __ENUMERATE_TOKENIZER_STATE(state) state,
ENUMERATE_TOKENIZER_STATES
#undef __ENUMERATE_TOKENIZER_STATE
};
Optional<HTMLToken> next_token();
void switch_to(Badge<HTMLDocumentParser>, State new_state);
void set_blocked(bool b) { m_blocked = b; }
bool is_blocked() const { return m_blocked; }
String source() const { return m_decoded_input; }
private:
Optional<u32> next_code_point();
Optional<u32> peek_code_point(size_t offset) const;
bool consume_next_if_match(const StringView&, CaseSensitivity = CaseSensitivity::CaseSensitive);
void create_new_token(HTMLToken::Type);
bool current_end_tag_token_is_appropriate() const;
static const char* state_name(State state)
{
switch (state) {
#define __ENUMERATE_TOKENIZER_STATE(state) \
case State::state: \
return #state;
ENUMERATE_TOKENIZER_STATES
#undef __ENUMERATE_TOKENIZER_STATE
};
ASSERT_NOT_REACHED();
}
void will_emit(HTMLToken&);
void will_switch_to(State);
void will_reconsume_in(State);
bool consumed_as_part_of_an_attribute() const;
State m_state { State::Data };
State m_return_state { State::Data };
Vector<u32> m_temporary_buffer;
String m_decoded_input;
StringView m_input;
Utf8View m_utf8_view;
AK::Utf8CodepointIterator m_utf8_iterator;
AK::Utf8CodepointIterator m_prev_utf8_iterator;
HTMLToken m_current_token;
HTMLToken m_last_emitted_start_tag;
bool m_has_emitted_eof { false };
Queue<HTMLToken> m_queued_tokens;
u32 m_character_reference_code { 0 };
bool m_blocked { false };
};
}

View file

@ -0,0 +1,84 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <LibWeb/DOM/Element.h>
#include <LibWeb/HTML/Parser/ListOfActiveFormattingElements.h>
namespace Web::HTML {
ListOfActiveFormattingElements::~ListOfActiveFormattingElements()
{
}
void ListOfActiveFormattingElements::add(DOM::Element& element)
{
// FIXME: Implement the Noah's Ark clause https://html.spec.whatwg.org/multipage/parsing.html#push-onto-the-list-of-active-formatting-elements
m_entries.append({ element });
}
void ListOfActiveFormattingElements::add_marker()
{
m_entries.append({ nullptr });
}
bool ListOfActiveFormattingElements::contains(const DOM::Element& element) const
{
for (auto& entry : m_entries) {
if (entry.element == &element)
return true;
}
return false;
}
DOM::Element* ListOfActiveFormattingElements::last_element_with_tag_name_before_marker(const FlyString& tag_name)
{
for (ssize_t i = m_entries.size() - 1; i >= 0; --i) {
auto& entry = m_entries[i];
if (entry.is_marker())
return nullptr;
if (entry.element->local_name() == tag_name)
return entry.element;
}
return nullptr;
}
void ListOfActiveFormattingElements::remove(DOM::Element& element)
{
m_entries.remove_first_matching([&](auto& entry) {
return entry.element == &element;
});
}
void ListOfActiveFormattingElements::clear_up_to_the_last_marker()
{
while (!m_entries.is_empty()) {
auto entry = m_entries.take_last();
if (entry.is_marker())
break;
}
}
}

View file

@ -0,0 +1,65 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/NonnullRefPtrVector.h>
#include <LibWeb/DOM/Element.h>
#include <LibWeb/Forward.h>
namespace Web::HTML {
class ListOfActiveFormattingElements {
public:
ListOfActiveFormattingElements() { }
~ListOfActiveFormattingElements();
struct Entry {
bool is_marker() const { return !element; }
RefPtr<DOM::Element> element;
};
bool is_empty() const { return m_entries.is_empty(); }
bool contains(const DOM::Element&) const;
void add(DOM::Element& element);
void add_marker();
void remove(DOM::Element&);
const Vector<Entry>& entries() const { return m_entries; }
Vector<Entry>& entries() { return m_entries; }
DOM::Element* last_element_with_tag_name_before_marker(const FlyString& tag_name);
void clear_up_to_the_last_marker();
private:
Vector<Entry> m_entries;
};
}

View file

@ -0,0 +1,159 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <LibWeb/DOM/Element.h>
#include <LibWeb/HTML/Parser/HTMLDocumentParser.h>
#include <LibWeb/HTML/Parser/StackOfOpenElements.h>
namespace Web::HTML {
static Vector<FlyString> s_base_list { "applet", "caption", "html", "table", "td", "th", "marquee", "object", "template" };
StackOfOpenElements::~StackOfOpenElements()
{
}
bool StackOfOpenElements::has_in_scope_impl(const FlyString& tag_name, const Vector<FlyString>& list) const
{
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
auto& node = m_elements.at(i);
if (node.local_name() == tag_name)
return true;
if (list.contains_slow(node.local_name()))
return false;
}
ASSERT_NOT_REACHED();
}
bool StackOfOpenElements::has_in_scope(const FlyString& tag_name) const
{
return has_in_scope_impl(tag_name, s_base_list);
}
bool StackOfOpenElements::has_in_scope_impl(const DOM::Element& target_node, const Vector<FlyString>& list) const
{
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
auto& node = m_elements.at(i);
if (&node == &target_node)
return true;
if (list.contains_slow(node.local_name()))
return false;
}
ASSERT_NOT_REACHED();
}
bool StackOfOpenElements::has_in_scope(const DOM::Element& target_node) const
{
return has_in_scope_impl(target_node, s_base_list);
}
bool StackOfOpenElements::has_in_button_scope(const FlyString& tag_name) const
{
auto list = s_base_list;
list.append("button");
return has_in_scope_impl(tag_name, list);
}
bool StackOfOpenElements::has_in_table_scope(const FlyString& tag_name) const
{
return has_in_scope_impl(tag_name, { "html", "table", "template" });
}
bool StackOfOpenElements::has_in_list_item_scope(const FlyString& tag_name) const
{
auto list = s_base_list;
list.append("ol");
list.append("ul");
return has_in_scope_impl(tag_name, list);
}
bool StackOfOpenElements::has_in_select_scope(const FlyString& tag_name) const
{
return has_in_scope_impl(tag_name, { "option", "optgroup" });
}
bool StackOfOpenElements::contains(const DOM::Element& element) const
{
for (auto& element_on_stack : m_elements) {
if (&element == &element_on_stack)
return true;
}
return false;
}
bool StackOfOpenElements::contains(const FlyString& tag_name) const
{
for (auto& element_on_stack : m_elements) {
if (element_on_stack.local_name() == tag_name)
return true;
}
return false;
}
void StackOfOpenElements::pop_until_an_element_with_tag_name_has_been_popped(const FlyString& tag_name)
{
while (m_elements.last().local_name() != tag_name)
pop();
pop();
}
DOM::Element* StackOfOpenElements::topmost_special_node_below(const DOM::Element& formatting_element)
{
DOM::Element* found_element = nullptr;
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
auto& element = m_elements[i];
if (&element == &formatting_element)
break;
if (HTMLDocumentParser::is_special_tag(element.local_name(), element.namespace_()))
found_element = &element;
}
return found_element;
}
StackOfOpenElements::LastElementResult StackOfOpenElements::last_element_with_tag_name(const FlyString& tag_name)
{
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
auto& element = m_elements[i];
if (element.local_name() == tag_name)
return { &element, i };
}
return { nullptr, -1 };
}
DOM::Element* StackOfOpenElements::element_before(const DOM::Element& target)
{
bool found_target = false;
for (ssize_t i = m_elements.size() - 1; i >= 0; --i) {
auto& element = m_elements[i];
if (&element == &target) {
found_target = true;
} else if (found_target)
return &element;
}
return nullptr;
}
}

View file

@ -0,0 +1,82 @@
/*
* Copyright (c) 2020, Andreas Kling <kling@serenityos.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#pragma once
#include <AK/NonnullRefPtrVector.h>
#include <LibWeb/DOM/Element.h>
#include <LibWeb/Forward.h>
namespace Web::HTML {
class StackOfOpenElements {
public:
StackOfOpenElements() { }
~StackOfOpenElements();
DOM::Element& first() { return m_elements.first(); }
DOM::Element& last() { return m_elements.last(); }
bool is_empty() const { return m_elements.is_empty(); }
void push(NonnullRefPtr<DOM::Element> element) { m_elements.append(move(element)); }
NonnullRefPtr<DOM::Element> pop() { return m_elements.take_last(); }
const DOM::Element& current_node() const { return m_elements.last(); }
DOM::Element& current_node() { return m_elements.last(); }
bool has_in_scope(const FlyString& tag_name) const;
bool has_in_button_scope(const FlyString& tag_name) const;
bool has_in_table_scope(const FlyString& tag_name) const;
bool has_in_list_item_scope(const FlyString& tag_name) const;
bool has_in_select_scope(const FlyString& tag_name) const;
bool has_in_scope(const DOM::Element&) const;
bool contains(const DOM::Element&) const;
bool contains(const FlyString& tag_name) const;
const NonnullRefPtrVector<DOM::Element>& elements() const { return m_elements; }
NonnullRefPtrVector<DOM::Element>& elements() { return m_elements; }
void pop_until_an_element_with_tag_name_has_been_popped(const FlyString&);
DOM::Element* topmost_special_node_below(const DOM::Element&);
struct LastElementResult {
DOM::Element* element;
ssize_t index;
};
LastElementResult last_element_with_tag_name(const FlyString&);
DOM::Element* element_before(const DOM::Element&);
private:
bool has_in_scope_impl(const FlyString& tag_name, const Vector<FlyString>&) const;
bool has_in_scope_impl(const DOM::Element& target_node, const Vector<FlyString>&) const;
NonnullRefPtrVector<DOM::Element> m_elements;
};
}