diff --git a/Libraries/LibWeb/CMakeLists.txt b/Libraries/LibWeb/CMakeLists.txt index a37702ec04..83f8c85394 100644 --- a/Libraries/LibWeb/CMakeLists.txt +++ b/Libraries/LibWeb/CMakeLists.txt @@ -89,6 +89,7 @@ set(SOURCES Parser/HTMLParser.cpp Parser/HTMLToken.cpp Parser/HTMLTokenizer.cpp + Parser/ListOfActiveFormattingElements.cpp Parser/StackOfOpenElements.cpp ResourceLoader.cpp StylePropertiesModel.cpp diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp index 24f7e967c5..6da146dec0 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp @@ -492,11 +492,12 @@ void HTMLDocumentParser::reconstruct_the_active_formatting_elements() if (m_list_of_active_formatting_elements.is_empty()) return; - if (m_stack_of_open_elements.contains(m_list_of_active_formatting_elements.last())) + if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element)) return; - ssize_t index = m_list_of_active_formatting_elements.size() - 1; - RefPtr entry = m_list_of_active_formatting_elements.at(index); + ssize_t index = m_list_of_active_formatting_elements.entries().size() - 1; + RefPtr entry = m_list_of_active_formatting_elements.entries().at(index).element; + ASSERT(entry); Rewind: if (index == 0) { @@ -504,14 +505,16 @@ Rewind: } --index; - entry = m_list_of_active_formatting_elements.at(index); + entry = m_list_of_active_formatting_elements.entries().at(index).element; + ASSERT(entry); if (!m_stack_of_open_elements.contains(*entry)) goto Rewind; Advance: ++index; - entry = m_list_of_active_formatting_elements.at(index); + entry = m_list_of_active_formatting_elements.entries().at(index).element; + ASSERT(entry); Create: // FIXME: Hold on to the real token! @@ -520,12 +523,74 @@ Create: fake_token.m_tag.tag_name.append(entry->tag_name()); auto new_element = insert_html_element(fake_token); - m_list_of_active_formatting_elements.ptr_at(index) = *new_element; + m_list_of_active_formatting_elements.entries().at(index).element = *new_element; - if (index != (ssize_t)m_list_of_active_formatting_elements.size() - 1) + if (index != (ssize_t)m_list_of_active_formatting_elements.entries().size() - 1) goto Advance; } +void HTMLDocumentParser::run_the_adoption_agency_algorithm(HTMLToken& token) +{ + auto subject = token.tag_name(); + + // If the current node is an HTML element whose tag name is subject, + // and the current node is not in the list of active formatting elements, + // then pop the current node off the stack of open elements, and return. + if (current_node().tag_name() == subject && !m_list_of_active_formatting_elements.contains(current_node())) { + m_stack_of_open_elements.pop(); + return; + } + + size_t outer_loop_counter = 0; + +//OuterLoop: + if (outer_loop_counter >= 8) + return; + + ++outer_loop_counter; + + auto formatting_element = m_list_of_active_formatting_elements.last_element_with_tag_name_before_marker(subject); + if (!formatting_element) { + // FIXME: If there is no such element, then return and instead act as + // described in the "any other end tag" entry above. + TODO(); + } + + if (!m_stack_of_open_elements.contains(*formatting_element)) { + PARSE_ERROR(); + // FIXME: If formatting element is not in the stack of open elements, + // then this is a parse error; remove the element from the list, and return. + TODO(); + } + + if (!m_stack_of_open_elements.has_in_scope(*formatting_element)) { + PARSE_ERROR(); + return; + } + + if (formatting_element != ¤t_node()) { + PARSE_ERROR(); + } + + // FIXME: Let furthest block be the topmost node in the stack of open elements + // that is lower in the stack than formatting element, and is an element + // in the special category. There might not be one. + RefPtr furthest_block = nullptr; + + if (!furthest_block) { + while (¤t_node() != formatting_element) + m_stack_of_open_elements.pop(); + m_stack_of_open_elements.pop(); + + m_list_of_active_formatting_elements.remove(*formatting_element); + return; + } + + // FIXME: Implement the rest of the AAA :^) + + TODO(); +} + void HTMLDocumentParser::handle_in_body(HTMLToken& token) { if (token.is_character()) { @@ -602,13 +667,16 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token) return; } - { - if (token.is_start_tag() && token.tag_name().is_one_of("b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u")) { - reconstruct_the_active_formatting_elements(); - auto element = insert_html_element(token); - m_list_of_active_formatting_elements.append(*element); - return; - } + if (token.is_start_tag() && token.tag_name().is_one_of("b", "big", "code", "em", "font", "i", "s", "small", "strike", "strong", "tt", "u")) { + reconstruct_the_active_formatting_elements(); + auto element = insert_html_element(token); + m_list_of_active_formatting_elements.add(*element); + return; + } + + if (token.is_end_tag() && token.tag_name().is_one_of("a", "b", "big", "code", "em", "font", "i", "nobr", "s", "small", "strike", "strong", "tt", "u")) { + run_the_adoption_agency_algorithm(token); + return; } if (token.is_start_tag() && token.tag_name().is_one_of("address", "article", "aside", "blockquote", "center", "details", "dialog", "dir", "div", "dl", "fieldset", "figcaption", "figure", "footer", "header", "hgroup", "main", "menu", "nav", "ol", "p", "section", "summary", "ul")) { @@ -746,6 +814,11 @@ void HTMLDocumentParser::handle_text(HTMLToken& token) return; } + if (token.is_end_tag() && token.tag_name() == "style") { + current_node().children_changed(); + // NOTE: We don't return here, keep going. + } + if (token.is_end_tag()) { m_stack_of_open_elements.pop(); m_insertion_mode = m_original_insertion_mode; diff --git a/Libraries/LibWeb/Parser/HTMLDocumentParser.h b/Libraries/LibWeb/Parser/HTMLDocumentParser.h index 6eeff69220..6ce358ac4c 100644 --- a/Libraries/LibWeb/Parser/HTMLDocumentParser.h +++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.h @@ -29,6 +29,7 @@ #include #include #include +#include #include #define ENUMERATE_INSERTION_MODES \ @@ -106,13 +107,13 @@ private: void decrement_script_nesting_level(); size_t script_nesting_level() const { return m_script_nesting_level; } void reset_the_insertion_mode_appropriately(); + void run_the_adoption_agency_algorithm(HTMLToken&); InsertionMode m_insertion_mode { InsertionMode::Initial }; InsertionMode m_original_insertion_mode { InsertionMode::Initial }; StackOfOpenElements m_stack_of_open_elements; - - NonnullRefPtrVector m_list_of_active_formatting_elements; + ListOfActiveFormattingElements m_list_of_active_formatting_elements; HTMLTokenizer m_tokenizer; diff --git a/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.cpp b/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.cpp new file mode 100644 index 0000000000..c49a94e050 --- /dev/null +++ b/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (c) 2020, Andreas Kling + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +namespace Web { + +ListOfActiveFormattingElements::~ListOfActiveFormattingElements() +{ +} + +void ListOfActiveFormattingElements::add(Element& element) +{ + m_entries.append({ element }); +} + +void ListOfActiveFormattingElements::add_marker() +{ + m_entries.append({ nullptr }); +} + +bool ListOfActiveFormattingElements::contains(const Element& element) const +{ + for (auto& entry : m_entries) { + if (entry.element == &element) + return true; + } + return false; +} + +Element* ListOfActiveFormattingElements::last_element_with_tag_name_before_marker(const FlyString& tag_name) +{ + for (ssize_t i = m_entries.size() - 1; i >= 0; --i) { + auto& entry = m_entries[i]; + if (entry.is_marker()) + return nullptr; + if (entry.element->tag_name() == tag_name) + return entry.element; + } + return nullptr; +} + +void ListOfActiveFormattingElements::remove(Element& element) +{ + m_entries.remove_first_matching([&](auto& entry) { + return entry.element == &element; + }); +} + +} diff --git a/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.h b/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.h new file mode 100644 index 0000000000..4c05f633bd --- /dev/null +++ b/Libraries/LibWeb/Parser/ListOfActiveFormattingElements.h @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2020, Andreas Kling + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, this + * list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, + * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#include +#include +#include + +namespace Web { + +class ListOfActiveFormattingElements { +public: + ListOfActiveFormattingElements() { } + ~ListOfActiveFormattingElements(); + + struct Entry { + bool is_marker() const { return !element; } + + RefPtr element; + }; + + bool is_empty() const { return m_entries.is_empty(); } + bool contains(const Element&) const; + + void add(Element& element); + void add_marker(); + + void remove(Element&); + + const Vector& entries() const { return m_entries; } + Vector& entries() { return m_entries; } + + Element* last_element_with_tag_name_before_marker(const FlyString& tag_name); + +private: + Vector m_entries; +}; + +} diff --git a/Libraries/LibWeb/Parser/StackOfOpenElements.cpp b/Libraries/LibWeb/Parser/StackOfOpenElements.cpp index 8834385300..0a57ee344e 100644 --- a/Libraries/LibWeb/Parser/StackOfOpenElements.cpp +++ b/Libraries/LibWeb/Parser/StackOfOpenElements.cpp @@ -52,6 +52,23 @@ bool StackOfOpenElements::has_in_scope(const FlyString& tag_name) const return has_in_scope_impl(tag_name, s_base_list); } +bool StackOfOpenElements::has_in_scope_impl(const Element& target_node, const Vector& list) const +{ + for (ssize_t i = m_elements.size() - 1; i >= 0; --i) { + auto& node = m_elements.at(i); + if (&node == &target_node) + return true; + if (list.contains_slow(node.tag_name())) + return false; + } + ASSERT_NOT_REACHED(); +} + +bool StackOfOpenElements::has_in_scope(const Element& target_node) const +{ + return has_in_scope_impl(target_node, s_base_list); +} + bool StackOfOpenElements::has_in_button_scope(const FlyString& tag_name) const { auto list = s_base_list; diff --git a/Libraries/LibWeb/Parser/StackOfOpenElements.h b/Libraries/LibWeb/Parser/StackOfOpenElements.h index 5e3ed6ae52..4240676aec 100644 --- a/Libraries/LibWeb/Parser/StackOfOpenElements.h +++ b/Libraries/LibWeb/Parser/StackOfOpenElements.h @@ -48,12 +48,15 @@ public: bool has_in_button_scope(const FlyString& tag_name) const; bool has_in_table_scope(const FlyString& tag_name) const; + bool has_in_scope(const Element&) const; + bool contains(const Element&) const; const NonnullRefPtrVector& elements() const { return m_elements; } private: bool has_in_scope_impl(const FlyString& tag_name, const Vector&) const; + bool has_in_scope_impl(const Element& target_node, const Vector&) const; NonnullRefPtrVector m_elements; };