mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 07:22:45 +00:00 
			
		
		
		
	LibWeb: Stop parsing after document.write at the insertion point
				
					
				
			If a call to `document.write` inserts an incomplete HTML tag, e.g.:
    document.write("<p");
we would previously continue parsing the document until we reached a
closing angle bracket. However, the spec states we should stop once we
reach the new insertion point.
			
			
This commit is contained in:
		
							parent
							
								
									64dcd3f1f4
								
							
						
					
					
						commit
						af57bd5cca
					
				
					 7 changed files with 62 additions and 10 deletions
				
			
		|  | @ -0,0 +1,34 @@ | |||
| Viewport <#document> at (0,0) content-size 800x600 children: not-inline | ||||
|   BlockContainer <html> at (0,0) content-size 800x600 [BFC] children: not-inline | ||||
|     BlockContainer <body> at (8,16) content-size 784x83 children: not-inline | ||||
|       BlockContainer <p> at (8,16) content-size 784x17 children: inline | ||||
|         frag 0 from TextNode start: 0, length: 4, rect: [8,16 30.078125x17] baseline: 13.296875 | ||||
|             "Well" | ||||
|         TextNode <#text> | ||||
|       BlockContainer <(anonymous)> at (8,49) content-size 784x0 children: inline | ||||
|         TextNode <#text> | ||||
|       BlockContainer <p> at (8,49) content-size 784x17 children: inline | ||||
|         frag 0 from TextNode start: 0, length: 5, rect: [8,49 36.84375x17] baseline: 13.296875 | ||||
|             "hello" | ||||
|         TextNode <#text> | ||||
|       BlockContainer <(anonymous)> at (8,82) content-size 784x0 children: inline | ||||
|         TextNode <#text> | ||||
|       BlockContainer <p> at (8,82) content-size 784x17 children: inline | ||||
|         frag 0 from TextNode start: 0, length: 8, rect: [8,82 59.21875x17] baseline: 13.296875 | ||||
|             "friends!" | ||||
|         TextNode <#text> | ||||
|       BlockContainer <(anonymous)> at (8,115) content-size 784x0 children: inline | ||||
|         TextNode <#text> | ||||
| 
 | ||||
| ViewportPaintable (Viewport<#document>) [0,0 800x600] | ||||
|   PaintableWithLines (BlockContainer<HTML>) [0,0 800x600] | ||||
|     PaintableWithLines (BlockContainer<BODY>) [8,16 784x83] overflow: [8,16 784x99] | ||||
|       PaintableWithLines (BlockContainer<P>) [8,16 784x17] | ||||
|         TextPaintable (TextNode<#text>) | ||||
|       PaintableWithLines (BlockContainer(anonymous)) [8,49 784x0] | ||||
|       PaintableWithLines (BlockContainer<P>) [8,49 784x17] | ||||
|         TextPaintable (TextNode<#text>) | ||||
|       PaintableWithLines (BlockContainer(anonymous)) [8,82 784x0] | ||||
|       PaintableWithLines (BlockContainer<P>) [8,82 784x17] | ||||
|         TextPaintable (TextNode<#text>) | ||||
|       PaintableWithLines (BlockContainer(anonymous)) [8,115 784x0] | ||||
|  | @ -0,0 +1,8 @@ | |||
| <p>Well</p> | ||||
| 
 | ||||
| <script type="text/javascript"> | ||||
|     document.write("<p"); | ||||
|     document.write(">hello</p>"); | ||||
| </script> | ||||
| 
 | ||||
| <p>friends!</p> | ||||
|  | @ -530,9 +530,12 @@ WebIDL::ExceptionOr<void> Document::run_the_document_write_steps(StringView inpu | |||
|     // 5. Insert input into the input stream just before the insertion point.
 | ||||
|     m_parser->tokenizer().insert_input_at_insertion_point(input); | ||||
| 
 | ||||
|     // 6. If there is no pending parsing-blocking script, have the HTML parser process input, one code point at a time, processing resulting tokens as they are emitted, and stopping when the tokenizer reaches the insertion point or when the processing of the tokenizer is aborted by the tree construction stage (this can happen if a script end tag token is emitted by the tokenizer).
 | ||||
|     // 6. If there is no pending parsing-blocking script, have the HTML parser process input, one code point at a time,
 | ||||
|     //    processing resulting tokens as they are emitted, and stopping when the tokenizer reaches the insertion point
 | ||||
|     //    or when the processing of the tokenizer is aborted by the tree construction stage (this can happen if a script
 | ||||
|     //    end tag token is emitted by the tokenizer).
 | ||||
|     if (!pending_parsing_blocking_script()) | ||||
|         m_parser->run(); | ||||
|         m_parser->run(HTML::HTMLTokenizer::StopAtInsertionPoint::Yes); | ||||
| 
 | ||||
|     return {}; | ||||
| } | ||||
|  |  | |||
|  | @ -169,14 +169,14 @@ void HTMLParser::visit_edges(Cell::Visitor& visitor) | |||
|     m_list_of_active_formatting_elements.visit_edges(visitor); | ||||
| } | ||||
| 
 | ||||
| void HTMLParser::run() | ||||
| void HTMLParser::run(HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point) | ||||
| { | ||||
|     for (;;) { | ||||
|         // FIXME: Find a better way to say that we come from Document::close() and want to process EOF.
 | ||||
|         if (!m_tokenizer.is_eof_inserted() && m_tokenizer.is_insertion_point_reached()) | ||||
|             return; | ||||
| 
 | ||||
|         auto optional_token = m_tokenizer.next_token(); | ||||
|         auto optional_token = m_tokenizer.next_token(stop_at_insertion_point); | ||||
|         if (!optional_token.has_value()) | ||||
|             break; | ||||
|         auto& token = optional_token.value(); | ||||
|  | @ -216,11 +216,11 @@ void HTMLParser::run() | |||
|     flush_character_insertions(); | ||||
| } | ||||
| 
 | ||||
| void HTMLParser::run(const AK::URL& url) | ||||
| void HTMLParser::run(const AK::URL& url, HTMLTokenizer::StopAtInsertionPoint stop_at_insertion_point) | ||||
| { | ||||
|     m_document->set_url(url); | ||||
|     m_document->set_source(MUST(String::from_byte_string(m_tokenizer.source()))); | ||||
|     run(); | ||||
|     run(stop_at_insertion_point); | ||||
|     the_end(*m_document, this); | ||||
|     m_document->detach_parser({}); | ||||
| } | ||||
|  |  | |||
|  | @ -53,8 +53,8 @@ public: | |||
|     static JS::NonnullGCPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, ByteBuffer const& input); | ||||
|     static JS::NonnullGCPtr<HTMLParser> create(DOM::Document&, StringView input, ByteString const& encoding); | ||||
| 
 | ||||
|     void run(); | ||||
|     void run(const AK::URL&); | ||||
|     void run(HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No); | ||||
|     void run(const AK::URL&, HTMLTokenizer::StopAtInsertionPoint = HTMLTokenizer::StopAtInsertionPoint::No); | ||||
| 
 | ||||
|     static void the_end(JS::NonnullGCPtr<DOM::Document>, JS::GCPtr<HTMLParser> = nullptr); | ||||
| 
 | ||||
|  |  | |||
|  | @ -248,7 +248,7 @@ HTMLToken::Position HTMLTokenizer::nth_last_position(size_t n) | |||
|     return m_source_positions.at(m_source_positions.size() - 1 - n); | ||||
| } | ||||
| 
 | ||||
| Optional<HTMLToken> HTMLTokenizer::next_token() | ||||
| Optional<HTMLToken> HTMLTokenizer::next_token(StopAtInsertionPoint stop_at_insertion_point) | ||||
| { | ||||
|     if (!m_source_positions.is_empty()) { | ||||
|         auto last_position = m_source_positions.last(); | ||||
|  | @ -263,6 +263,9 @@ _StartOfFunction: | |||
|         return {}; | ||||
| 
 | ||||
|     for (;;) { | ||||
|         if (stop_at_insertion_point == StopAtInsertionPoint::Yes && is_insertion_point_reached()) | ||||
|             return {}; | ||||
| 
 | ||||
|         auto current_input_character = next_code_point(); | ||||
|         switch (m_state) { | ||||
|             // 13.2.5.1 Data state, https://html.spec.whatwg.org/multipage/parsing.html#data-state
 | ||||
|  |  | |||
|  | @ -111,7 +111,11 @@ public: | |||
| #undef __ENUMERATE_TOKENIZER_STATE | ||||
|     }; | ||||
| 
 | ||||
|     Optional<HTMLToken> next_token(); | ||||
|     enum class StopAtInsertionPoint { | ||||
|         No, | ||||
|         Yes, | ||||
|     }; | ||||
|     Optional<HTMLToken> next_token(StopAtInsertionPoint = StopAtInsertionPoint::No); | ||||
| 
 | ||||
|     void set_parser(Badge<HTMLParser>, HTMLParser& parser) { m_parser = &parser; } | ||||
| 
 | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Flynn
						Timothy Flynn