LibWeb: Implement aborting the HTML parser

This is roughly on-spec, although I had to invent a simple "aborted" state for the tokenizer.
2025-09-15 05:56:17 +00:00 · 2022-09-20 21:08:14 +02:00 · 2022-09-20 21:08:14 +02:00 · ab8432783e
commit ab8432783e
parent 37ed1b28fa
4 changed files with 33 additions and 0 deletions
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.cpp
@ -3751,4 +3751,25 @@ JS::Realm& HTMLParser::realm()
    return m_document->realm();
 }
 // https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser
 void HTMLParser::abort()
 {
    // 1. Throw away any pending content in the input stream, and discard any future content that would have been added to it.
    m_tokenizer.abort();
    // FIXME: 2. Stop the speculative HTML parser for this HTML parser.
    // 3. Update the current document readiness to "interactive".
    m_document->update_readiness(DocumentReadyState::Interactive);
    // 4. Pop all the nodes off the stack of open elements.
    while (!m_stack_of_open_elements.is_empty())
        m_stack_of_open_elements.pop();
    // 5. Update the current document readiness to "complete".
    m_document->update_readiness(DocumentReadyState::Complete);
    m_aborted = true;
 }
 }
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLParser.h
@ -68,7 +68,11 @@ public:
    HTMLTokenizer& tokenizer() { return m_tokenizer; }
    // https://html.spec.whatwg.org/multipage/parsing.html#abort-a-parser
    void abort();
    bool aborted() const { return m_aborted; }
    bool stopped() const { return m_stop_parsing; }
    size_t script_nesting_level() const { return m_script_nesting_level; }
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp
@ -257,6 +257,9 @@ _StartOfFunction:
    if (!m_queued_tokens.is_empty())
        return m_queued_tokens.dequeue();
    if (m_aborted)
        return {};
    for (;;) {
        auto current_input_character = next_code_point();
        switch (m_state) {
--- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
+++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.h
@ -143,6 +143,9 @@ public:
        m_insertion_point.position = m_utf8_view.iterator_offset(m_utf8_iterator);
    }
    // This permanently cuts off the tokenizer input stream.
    void abort() { m_aborted = true; }
 private:
    void skip(size_t count);
    Optional<u32> next_code_point();
@ -207,6 +210,8 @@ private:
    bool m_blocked { false };
    bool m_aborted { false };
    Vector<HTMLToken::Position> m_source_positions;
 };