LibWeb: A whole bunch of work towards spec-compliant <script> elements

This is still very unfinished, but there's at least a skeleton of code.
2025-07-26 00:07:43 +00:00 · 2020-05-24 22:00:46 +02:00 · 2020-05-24 22:00:46 +02:00 · 45da08a1e6
commit 45da08a1e6
parent 3a30180e1e
8 changed files with 365 additions and 34 deletions
--- a/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp
+++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.cpp
@ -31,6 +31,7 @@
 #include <LibWeb/DOM/ElementFactory.h>
 #include <LibWeb/DOM/HTMLFormElement.h>
 #include <LibWeb/DOM/HTMLHeadElement.h>
+#include <LibWeb/DOM/HTMLScriptElement.h>
 #include <LibWeb/DOM/Text.h>
 #include <LibWeb/Parser/HTMLDocumentParser.h>
 #include <LibWeb/Parser/HTMLToken.h>
@ -51,9 +52,10 @@ HTMLDocumentParser::~HTMLDocumentParser()
 {
 }

-void HTMLDocumentParser::run()
+void HTMLDocumentParser::run(const URL& url)
 {
    m_document = adopt(*new Document);
+    m_document->set_url(url);

    for (;;) {
        auto optional_token = m_tokenizer.next_token();
@ -212,6 +214,29 @@ void HTMLDocumentParser::handle_in_head(HTMLToken& token)
        return;
    }

+    if (token.is_start_tag() && token.tag_name() == "script") {
+        auto adjusted_insertion_location = find_appropriate_place_for_inserting_node();
+        auto element = create_element_for(token);
+        auto& script_element = to<HTMLScriptElement>(*element);
+        script_element.set_parser_document({}, document());
+        script_element.set_non_blocking({}, false);
+
+        if (m_parsing_fragment) {
+            TODO();
+        }
+
+        if (m_invoked_via_document_write) {
+            TODO();
+        }
+
+        adjusted_insertion_location->append_child(element, false);
+        m_stack_of_open_elements.push(element);
+        m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData);
+        m_original_insertion_mode = m_insertion_mode;
+        m_insertion_mode = InsertionMode::Text;
+        return;
+    }
+
    if (token.is_start_tag() && token.tag_name() == "meta") {
        auto element = insert_html_element(token);
        m_stack_of_open_elements.pop();
@ -425,6 +450,17 @@ void HTMLDocumentParser::handle_in_body(HTMLToken& token)
    ASSERT_NOT_REACHED();
 }

+void HTMLDocumentParser::increment_script_nesting_level()
+{
+    ++m_script_nesting_level;
+}
+
+void HTMLDocumentParser::decrement_script_nesting_level()
+{
+    ASSERT(m_script_nesting_level);
+    --m_script_nesting_level;
+}
+
 void HTMLDocumentParser::handle_text(HTMLToken& token)
 {
    if (token.is_character()) {
@ -432,7 +468,17 @@ void HTMLDocumentParser::handle_text(HTMLToken& token)
        return;
    }
    if (token.is_end_tag() && token.tag_name() == "script") {
-        ASSERT_NOT_REACHED();
+        NonnullRefPtr<HTMLScriptElement> script = to<HTMLScriptElement>(current_node());
+        m_stack_of_open_elements.pop();
+        m_insertion_mode = m_original_insertion_mode;
+        // FIXME: Handle tokenizer insertion point stuff here.
+        increment_script_nesting_level();
+        script->prepare_script({});
+        decrement_script_nesting_level();
+        if (script_nesting_level() == 0)
+            m_parser_pause_flag = false;
+        // FIXME: Handle tokenizer insertion point stuff here too.
+        return;
    }
    if (token.is_end_tag()) {
        m_stack_of_open_elements.pop();
--- a/Libraries/LibWeb/Parser/HTMLDocumentParser.h
+++ b/Libraries/LibWeb/Parser/HTMLDocumentParser.h
@ -63,7 +63,7 @@ public:
    explicit HTMLDocumentParser(const StringView& input);
    ~HTMLDocumentParser();

-    void run();
+    void run(const URL&);

    Document& document();

@ -100,6 +100,9 @@ private:
    void reconstruct_the_active_formatting_elements();
    void process_using_the_rules_for(InsertionMode, HTMLToken&);
    void parse_generic_raw_text_element(HTMLToken&);
+    void increment_script_nesting_level();
+    void decrement_script_nesting_level();
+    size_t script_nesting_level() const { return m_script_nesting_level; }

    InsertionMode m_insertion_mode { InsertionMode::Initial };
    InsertionMode m_original_insertion_mode { InsertionMode::Initial };
@ -114,6 +117,10 @@ private:
    bool m_frameset_ok { true };
    bool m_parsing_fragment { false };
    bool m_scripting_enabled { true };
+    bool m_invoked_via_document_write { false };
+
+    bool m_parser_pause_flag { false };
+    size_t m_script_nesting_level { 0 };

    RefPtr<Document> m_document;
    RefPtr<HTMLHeadElement> m_head_element;
--- a/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
+++ b/Libraries/LibWeb/Parser/HTMLTokenizer.cpp
@ -38,22 +38,28 @@
        ASSERT_NOT_REACHED();                                                                               \
    } while (0)

-#define SWITCH_TO(new_state)                    \
-    will_switch_to(State::new_state);           \
-    m_state = State::new_state;                 \
-    current_input_character = next_codepoint(); \
-    goto new_state;
+#define SWITCH_TO(new_state)                        \
+    do {                                            \
+        will_switch_to(State::new_state);           \
+        m_state = State::new_state;                 \
+        current_input_character = next_codepoint(); \
+        goto new_state;                             \
+    } while (0)

-#define RECONSUME_IN(new_state)          \
-    will_reconsume_in(State::new_state); \
-    m_state = State::new_state;          \
-    goto new_state;
+#define RECONSUME_IN(new_state)              \
+    do {                                     \
+        will_reconsume_in(State::new_state); \
+        m_state = State::new_state;          \
+        goto new_state;                      \
+    } while (0)

 #define SWITCH_TO_AND_EMIT_CURRENT_TOKEN(new_state) \
-    will_switch_to(State::new_state);               \
-    m_state = State::new_state;                     \
-    will_emit(m_current_token);                     \
-    return m_current_token;
+    do {                                            \
+        will_switch_to(State::new_state);           \
+        m_state = State::new_state;                 \
+        will_emit(m_current_token);                 \
+        return m_current_token;                     \
+    } while (0)

 #define DONT_CONSUME_NEXT_INPUT_CHARACTER --m_cursor;

@ -77,23 +83,29 @@

 #define ANYTHING_ELSE if (1)

-#define EMIT_EOF                                  \
-    if (m_has_emitted_eof)                        \
-        return {};                                \
-    m_has_emitted_eof = true;                     \
-    create_new_token(HTMLToken::Type::EndOfFile); \
-    will_emit(m_current_token);                   \
-    return m_current_token;
+#define EMIT_EOF                                      \
+    do {                                              \
+        if (m_has_emitted_eof)                        \
+            return {};                                \
+        m_has_emitted_eof = true;                     \
+        create_new_token(HTMLToken::Type::EndOfFile); \
+        will_emit(m_current_token);                   \
+        return m_current_token;                       \
+    } while (0)

-#define EMIT_CURRENT_TOKEN      \
-    will_emit(m_current_token); \
-    return m_current_token;
+#define EMIT_CURRENT_TOKEN          \
+    do {                            \
+        will_emit(m_current_token); \
+        return m_current_token;     \
+    } while (0)

-#define EMIT_CHARACTER(codepoint)                                  \
-    create_new_token(HTMLToken::Type::Character);                  \
-    m_current_token.m_comment_or_character.data.append(codepoint); \
-    will_emit(m_current_token);                                    \
-    return m_current_token;
+#define EMIT_CHARACTER(codepoint)                                      \
+    do {                                                               \
+        create_new_token(HTMLToken::Type::Character);                  \
+        m_current_token.m_comment_or_character.data.append(codepoint); \
+        will_emit(m_current_token);                                    \
+        return m_current_token;                                        \
+    } while (0)

 #define EMIT_CURRENT_CHARACTER \
    EMIT_CHARACTER(current_input_character.value());
@ -915,8 +927,104 @@ Optional<HTMLToken> HTMLTokenizer::next_token()
            }
            END_STATE

+            BEGIN_STATE(ScriptData)
+            {
+                ON('<')
+                {
+                    SWITCH_TO(ScriptDataLessThanSign);
+                }
+                ON(0)
+                {
+                    TODO();
+                }
+                ON_EOF
+                {
+                    EMIT_EOF;
+                }
+                ANYTHING_ELSE
+                {
+                    EMIT_CURRENT_CHARACTER;
+                }
+            }
+            END_STATE
+
+            BEGIN_STATE(ScriptDataLessThanSign)
+            {
+                ON('/')
+                {
+                    m_temporary_buffer.clear();
+                    SWITCH_TO(ScriptDataEndTagOpen);
+                }
+                ON('!')
+                {
+                    TODO();
+                }
+                ANYTHING_ELSE
+                {
+                    EMIT_CHARACTER('<');
+                    RECONSUME_IN(ScriptData);
+                }
+            }
+            END_STATE
+
+            BEGIN_STATE(ScriptDataEndTagOpen)
+            {
+                ON_ASCII_ALPHA
+                {
+                    create_new_token(HTMLToken::Type::EndTag);
+                    RECONSUME_IN(ScriptDataEndTagName);
+                }
+                ANYTHING_ELSE
+                {
+                    TODO();
+                }
+            }
+            END_STATE
+
+            BEGIN_STATE(ScriptDataEndTagName)
+            {
+                ON_WHITESPACE
+                {
+                    if (current_end_tag_token_is_appropriate())
+                        SWITCH_TO(BeforeAttributeName);
+                    // FIXME: Otherwise, treat it as per the "anything else" entry below.
+                    TODO();
+                }
+                ON('/')
+                {
+                    if (current_end_tag_token_is_appropriate())
+                        SWITCH_TO(SelfClosingStartTag);
+                    // FIXME: Otherwise, treat it as per the "anything else" entry below.
+                    TODO();
+                }
+                ON('>')
+                {
+                    if (current_end_tag_token_is_appropriate())
+                        SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data);
+                    // FIXME: Otherwise, treat it as per the "anything else" entry below.
+                    TODO();
+                }
+                ON_ASCII_UPPER_ALPHA
+                {
+                    m_current_token.m_tag.tag_name.append(tolower(current_input_character.value()));
+                    m_temporary_buffer.append(current_input_character.value());
+                    continue;
+                }
+                ON_ASCII_LOWER_ALPHA
+                {
+                    m_current_token.m_tag.tag_name.append(current_input_character.value());
+                    m_temporary_buffer.append(current_input_character.value());
+                    continue;
+                }
+                ANYTHING_ELSE
+                {
+                    TODO();
+                }
+            }
+            END_STATE
+
        default:
-            ASSERT_NOT_REACHED();
+            TODO();
        }
    }
 }
@ -986,5 +1094,4 @@ bool HTMLTokenizer::current_end_tag_token_is_appropriate() const
        return false;
    return m_current_token.tag_name() == m_last_emitted_start_tag.tag_name();
 }
-
 }