1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 16:47:36 +00:00

LibWeb: Stop parsing after document.write at the insertion point

If a call to `document.write` inserts an incomplete HTML tag, e.g.:

    document.write("<p");

we would previously continue parsing the document until we reached a
closing angle bracket. However, the spec states we should stop once we
reach the new insertion point.
This commit is contained in:
Timothy Flynn 2024-02-18 12:45:53 -05:00 committed by Andreas Kling
parent 64dcd3f1f4
commit af57bd5cca
7 changed files with 62 additions and 10 deletions

View file

@ -169,14 +169,14 @@ void HTMLParser::visit_edges(Cell::Visitor& visitor)
m_list_of_active_formatting_elements.visit_edges(visitor);
}
void HTMLParser::run()
void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
{
for (;;) {
// FIXME: Find a better way to say that we come from Document::close() and want to process EOF.
if (!m_tokenizer.is_eof_inserted() && m_tokenizer.is_insertion_point_reached())
return;
auto optional_token = m_tokenizer.next_token();
auto optional_token = m_tokenizer.next_token(stop_at_insertion_point);
if (!optional_token.has_value())
break;
auto& token = optional_token.value();
@ -216,11 +216,11 @@ void HTMLParser::run()
flush_character_insertions();
}
void HTMLParser::run(const AK::URL& url)
void HTMLParser::run(const AK::URL& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point)
{
m_document->set_url(url);
m_document->set_source(MUST(String::from_byte_string(m_tokenizer.source())));
run();
run(stop_at_insertion_point);
the_end(*m_document, this);
m_document->detach_parser({});
}

View file

@ -53,8 +53,8 @@ public:
static JS::NonnullGCPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input);
static JS::NonnullGCPtr<HTMLParser> create(DOM::Document&, StringView input, ByteString const& encoding);
void run();
void run(const AK::URL&);
void run(HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No);
void run(const AK::URL&, HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No);
static void the_end(JS::NonnullGCPtr<DOM::Document>, JS::GCPtr<HTMLParser> = nullptr);

View file

@ -248,7 +248,7 @@ HTMLToken::Position HTMLTokenizer::nth_last_position(size_t n)
return m_source_positions.at(m_source_positions.size() - 1 - n);
}
Optional<HTMLToken> HTMLTokenizer::next_token()
Optional<HTMLToken> HTMLTokenizer::next_token(StopAtInsertionPoint stop_at_insertion_point)
{
if (!m_source_positions.is_empty()) {
auto last_position = m_source_positions.last();
@ -263,6 +263,9 @@ _StartOfFunction:
return {};
for (;;) {
if (stop_at_insertion_point == StopAtInsertionPoint::Yes && is_insertion_point_reached())
return {};
auto current_input_character = next_code_point();
switch (m_state) {
// 13.2.5.1 Data state, https://html.spec.whatwg.org/multipage/parsing.html#data-state

View file

@ -111,7 +111,11 @@ public:
#undef __ENUMERATE_TOKENIZER_STATE
};
Optional<HTMLToken> next_token();
enum class StopAtInsertionPoint {
No,
Yes,
};
Optional<HTMLToken> next_token(StopAtInsertionPoint = StopAtInsertionPoint::No);
void set_parser(Badge<HTMLParser>, HTMLParser& parser) { m_parser = &parser; }