1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-25 19:17:44 +00:00

LibWeb: Add basic support for dynamic markup insertion

This implements basic support for dynamic markup insertion, adding
 * Document::open()
 * Document::write(Vector<String> const&)
 * Document::writeln(Vector<String> const&)
 * Document::close()

The HTMLParser is modified to make it possible to create a
script-created parser which initially only contains a HTMLTokenizer
without any data. Aditionally the HTMLParser::run method gains an
overload which does not modify the Document and does not run
HTMLParser::the_end() so that we can reenter the parser at a later time.
Furthermore all FIXMEs that consern the insertion point are implemented
wich is defined in the HTMLTokenizer. Additionally the following
member-variables of the HTMLParser are now exposed by getter funcions:
 * m_tokenizer
 * m_aborted
 * m_script_nesting_level

The HTMLTokenizer is modified so that it contains an insertion
point which keeps track of where the next input from the Document::write
functions will be inserted. The insertion point is implemented as the
charakter offset into m_decoded_input and a boolean describing if the
insertion point is defined. Functions to update, check and {re}store the
insertion point are also added.
The function HTMLTokenizer::insert_eof is added to tell a script-created
parser that document::close was called and HTMLParser::the_end() should
be called.
Lastly an explicit default constructor is added to HTMLTokenizer to
create a empty HTMLTokenizer into which data can be inserted.
This commit is contained in:
Lorenz Steinert 2022-02-19 15:58:21 +01:00 committed by Andreas Kling
parent d29d9462e9
commit db789813c9
7 changed files with 282 additions and 19 deletions

View file

@ -137,17 +137,24 @@ HTMLParser::HTMLParser(DOM::Document& document, StringView input, const String&
m_document->set_encoding(standardized_encoding.value());
}
HTMLParser::HTMLParser(DOM::Document& document)
: m_document(document)
{
m_tokenizer.set_parser({}, *this);
}
HTMLParser::~HTMLParser()
{
m_document->set_should_invalidate_styles_on_attribute_changes(true);
}
void HTMLParser::run(const AK::URL& url)
void HTMLParser::run()
{
m_document->set_url(url);
m_document->set_source(m_tokenizer.source());
for (;;) {
// FIXME: Find a better way to say that we come from Document::close() and want to process EOF.
if (!m_tokenizer.is_eof_inserted() && m_tokenizer.is_insertion_point_reached())
return;
auto optional_token = m_tokenizer.next_token();
if (!optional_token.has_value())
break;
@ -186,7 +193,13 @@ void HTMLParser::run(const AK::URL& url)
}
flush_character_insertions();
}
void HTMLParser::run(const AK::URL& url)
{
m_document->set_url(url);
m_document->set_source(m_tokenizer.source());
run();
the_end();
}
@ -197,7 +210,8 @@ void HTMLParser::the_end()
// FIXME: 1. If the active speculative HTML parser is not null, then stop the speculative HTML parser and return.
// FIXME: 2. Set the insertion point to undefined.
// 2. Set the insertion point to undefined.
m_tokenizer.undefine_insertion_point();
// 3. Update the current document readiness to "interactive".
m_document->update_readiness(HTML::DocumentReadyState::Interactive);
@ -2003,6 +2017,7 @@ void HTMLParser::decrement_script_nesting_level()
--m_script_nesting_level;
}
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-incdata
void HTMLParser::handle_text(HTMLToken& token)
{
if (token.is_character()) {
@ -2025,13 +2040,18 @@ void HTMLParser::handle_text(HTMLToken& token)
NonnullRefPtr<HTMLScriptElement> script = verify_cast<HTMLScriptElement>(current_node());
(void)m_stack_of_open_elements.pop();
m_insertion_mode = m_original_insertion_mode;
// FIXME: Handle tokenizer insertion point stuff here.
// Let the old insertion point have the same value as the current insertion point.
m_tokenizer.store_insertion_point();
// Let the insertion point be just before the next input character.
m_tokenizer.update_insertion_point();
increment_script_nesting_level();
// FIXME: Check if active speculative HTML parser is null.
script->prepare_script({});
decrement_script_nesting_level();
if (script_nesting_level() == 0)
m_parser_pause_flag = false;
// FIXME: Handle tokenizer insertion point stuff here too.
// Let the insertion point have the value of the old insertion point.
m_tokenizer.restore_insertion_point();
while (document().pending_parsing_blocking_script()) {
if (script_nesting_level() != 0) {
@ -2065,7 +2085,8 @@ void HTMLParser::handle_text(HTMLToken& token)
m_tokenizer.set_blocked(false);
// FIXME: Handle tokenizer insertion point stuff here too.
// Let the insertion point be just before the next input character.
m_tokenizer.update_insertion_point();
VERIFY(script_nesting_level() == 0);
increment_script_nesting_level();
@ -2076,7 +2097,8 @@ void HTMLParser::handle_text(HTMLToken& token)
VERIFY(script_nesting_level() == 0);
m_parser_pause_flag = false;
// FIXME: Handle tokenizer insertion point stuff here too.
// Let the insertion point be undefined again.
m_tokenizer.undefine_insertion_point();
}
}
return;
@ -2986,8 +3008,26 @@ void HTMLParser::process_using_the_rules_for_foreign_content(HTMLToken& token)
if (token.is_end_tag() && current_node().namespace_() == Namespace::SVG && current_node().tag_name() == SVG::TagNames::script) {
ScriptEndTag:
// Pop the current node off the stack of open elements.
(void)m_stack_of_open_elements.pop();
// Let the old insertion point have the same value as the current insertion point.
m_tokenizer.store_insertion_point();
// Let the insertion point be just before the next input character.
m_tokenizer.update_insertion_point();
// Increment the parser's script nesting level by one.
increment_script_nesting_level();
// Set the parser pause flag to true.
m_parser_pause_flag = true;
// FIXME: Implement SVG script parsing.
TODO();
// Decrement the parser's script nesting level by one.
decrement_script_nesting_level();
// If the parser's script nesting level is zero, then set the parser pause flag to false.
if (script_nesting_level() == 0)
m_parser_pause_flag = false;
// Let the insertion point have the value of the old insertion point.
m_tokenizer.restore_insertion_point();
}
if (token.is_end_tag()) {

View file

@ -46,10 +46,12 @@ class HTMLParser {
public:
HTMLParser(DOM::Document&, StringView input, const String& encoding);
HTMLParser(DOM::Document&);
~HTMLParser();
static NonnullOwnPtr<HTMLParser> create_with_uncertain_encoding(DOM::Document&, const ByteBuffer& input);
void run();
void run(const AK::URL&);
DOM::Document& document();
@ -67,6 +69,12 @@ public:
static bool is_special_tag(const FlyString& tag_name, const FlyString& namespace_);
HTMLTokenizer& tokenizer() { return m_tokenizer; }
bool aborted() const { return m_aborted; }
size_t script_nesting_level() const { return m_script_nesting_level; }
private:
const char* insertion_mode_name() const;
@ -127,7 +135,6 @@ private:
void parse_generic_raw_text_element(HTMLToken&);
void increment_script_nesting_level();
void decrement_script_nesting_level();
size_t script_nesting_level() const { return m_script_nesting_level; }
void reset_the_insertion_mode_appropriately();
void adjust_mathml_attributes(HTMLToken&);

View file

@ -2780,6 +2780,15 @@ void HTMLTokenizer::create_new_token(HTMLToken::Type type)
m_current_token.set_start_position({}, nth_last_position(offset));
}
HTMLTokenizer::HTMLTokenizer()
{
m_decoded_input = "";
m_utf8_view = Utf8View(m_decoded_input);
m_utf8_iterator = m_utf8_view.begin();
m_prev_utf8_iterator = m_utf8_view.begin();
m_source_positions.empend(0u, 0u);
}
HTMLTokenizer::HTMLTokenizer(StringView input, String const& encoding)
{
auto* decoder = TextCodec::decoder_for(encoding);
@ -2787,9 +2796,37 @@ HTMLTokenizer::HTMLTokenizer(StringView input, String const& encoding)
m_decoded_input = decoder->to_utf8(input);
m_utf8_view = Utf8View(m_decoded_input);
m_utf8_iterator = m_utf8_view.begin();
m_prev_utf8_iterator = m_utf8_view.begin();
m_source_positions.empend(0u, 0u);
}
void HTMLTokenizer::insert_input_at_insertion_point(String const& input)
{
auto utf8_iterator_byte_offset = m_utf8_view.byte_offset_of(m_utf8_iterator);
// FIXME: Implement a InputStream to handle insertion_point and iterators.
StringBuilder builder {};
builder.append(m_decoded_input.substring(0, m_insertion_point.position));
builder.append(input);
builder.append(m_decoded_input.substring(m_insertion_point.position));
m_decoded_input = builder.build();
m_utf8_view = Utf8View(m_decoded_input);
m_utf8_iterator = m_utf8_view.iterator_at_byte_offset(utf8_iterator_byte_offset);
m_insertion_point.position += input.length();
}
void HTMLTokenizer::insert_eof()
{
m_explicit_eof_inserted = true;
}
bool HTMLTokenizer::is_eof_inserted()
{
return m_explicit_eof_inserted;
}
void HTMLTokenizer::will_switch_to([[maybe_unused]] State new_state)
{
dbgln_if(TOKENIZER_TRACE_DEBUG, "[{}] Switch to {}", state_name(m_state), state_name(new_state));

View file

@ -101,6 +101,7 @@ namespace Web::HTML {
class HTMLTokenizer {
public:
explicit HTMLTokenizer();
explicit HTMLTokenizer(StringView input, String const& encoding);
enum class State {
@ -124,6 +125,24 @@ public:
String source() const { return m_decoded_input; }
void insert_input_at_insertion_point(String const& input);
void insert_eof();
bool is_eof_inserted();
bool is_insertion_point_defined() const { return m_insertion_point.defined; }
bool is_insertion_point_reached()
{
return m_insertion_point.defined && m_insertion_point.position >= m_utf8_view.iterator_offset(m_utf8_iterator);
}
void undefine_insertion_point() { m_insertion_point.defined = false; }
void store_insertion_point() { m_old_insertion_point = m_insertion_point; }
void restore_insertion_point() { m_insertion_point = m_old_insertion_point; }
void update_insertion_point()
{
m_insertion_point.defined = true;
m_insertion_point.position = m_utf8_view.iterator_offset(m_utf8_iterator);
}
private:
void skip(size_t count);
Optional<u32> next_code_point();
@ -163,6 +182,13 @@ private:
String m_decoded_input;
struct InsertionPoint {
size_t position { 0 };
bool defined { false };
};
InsertionPoint m_insertion_point {};
InsertionPoint m_old_insertion_point {};
Utf8View m_utf8_view;
Utf8CodePointIterator m_utf8_iterator;
Utf8CodePointIterator m_prev_utf8_iterator;
@ -172,6 +198,7 @@ private:
Optional<String> m_last_emitted_start_tag_name;
bool m_explicit_eof_inserted { false };
bool m_has_emitted_eof { false };
Queue<HTMLToken> m_queued_tokens;