mirror of
https://github.com/RGBCube/serenity
synced 2025-05-20 17:55:08 +00:00
LibWeb: Annotate and simplify the HTML fragment parsing algorithm
This patch adds inline spec comments, and then adjusts the code a bit so it reads more like the spec.
This commit is contained in:
parent
bddc9d6c52
commit
5530040b3c
1 changed files with 56 additions and 13 deletions
|
@ -3388,48 +3388,91 @@ DOM::Document& HTMLParser::document()
|
|||
return *m_document;
|
||||
}
|
||||
|
||||
// https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
|
||||
Vector<JS::Handle<DOM::Node>> HTMLParser::parse_html_fragment(DOM::Element& context_element, StringView markup)
|
||||
{
|
||||
// 1. Create a new Document node, and mark it as being an HTML document.
|
||||
auto temp_document = DOM::Document::create(context_element.realm());
|
||||
temp_document->set_document_type(DOM::Document::Type::HTML);
|
||||
|
||||
// 2. If the node document of the context element is in quirks mode, then let the Document be in quirks mode.
|
||||
// Otherwise, the node document of the context element is in limited-quirks mode, then let the Document be in limited-quirks mode.
|
||||
// Otherwise, leave the Document in no-quirks mode.
|
||||
temp_document->set_quirks_mode(context_element.document().mode());
|
||||
|
||||
// 3. Create a new HTML parser, and associate it with the just created Document node.
|
||||
auto parser = HTMLParser::create(*temp_document, markup, "utf-8");
|
||||
parser->m_context_element = JS::make_handle(context_element);
|
||||
parser->m_parsing_fragment = true;
|
||||
parser->document().set_quirks_mode(context_element.document().mode());
|
||||
|
||||
// 4. Set the state of the HTML parser's tokenization stage as follows, switching on the context element:
|
||||
// - title
|
||||
// - textarea
|
||||
if (context_element.local_name().is_one_of(HTML::TagNames::title, HTML::TagNames::textarea)) {
|
||||
// Switch the tokenizer to the RCDATA state.
|
||||
parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RCDATA);
|
||||
} else if (context_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes)) {
|
||||
}
|
||||
// - style
|
||||
// - xmp
|
||||
// - iframe
|
||||
// - noembed
|
||||
// - noframes
|
||||
else if (context_element.local_name().is_one_of(HTML::TagNames::style, HTML::TagNames::xmp, HTML::TagNames::iframe, HTML::TagNames::noembed, HTML::TagNames::noframes)) {
|
||||
// Switch the tokenizer to the RAWTEXT state.
|
||||
parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
|
||||
} else if (context_element.local_name().is_one_of(HTML::TagNames::script)) {
|
||||
}
|
||||
// - script
|
||||
else if (context_element.local_name().is_one_of(HTML::TagNames::script)) {
|
||||
// Switch the tokenizer to the script data state.
|
||||
parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::ScriptData);
|
||||
} else if (context_element.local_name().is_one_of(HTML::TagNames::noscript)) {
|
||||
}
|
||||
// - noscript
|
||||
else if (context_element.local_name().is_one_of(HTML::TagNames::noscript)) {
|
||||
// If the scripting flag is enabled, switch the tokenizer to the RAWTEXT state. Otherwise, leave the tokenizer in the data state.
|
||||
if (context_element.document().is_scripting_enabled())
|
||||
parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::RAWTEXT);
|
||||
} else if (context_element.local_name().is_one_of(HTML::TagNames::plaintext)) {
|
||||
}
|
||||
// - plaintext
|
||||
else if (context_element.local_name().is_one_of(HTML::TagNames::plaintext)) {
|
||||
// Switch the tokenizer to the PLAINTEXT state.
|
||||
parser->m_tokenizer.switch_to({}, HTMLTokenizer::State::PLAINTEXT);
|
||||
}
|
||||
// Any other element
|
||||
else {
|
||||
// Leave the tokenizer in the data state.
|
||||
}
|
||||
|
||||
// 5. Let root be a new html element with no attributes.
|
||||
auto root = create_element(context_element.document(), HTML::TagNames::html, Namespace::HTML);
|
||||
parser->document().append_child(root);
|
||||
|
||||
// 6. Append the element root to the Document node created above.
|
||||
temp_document->append_child(root);
|
||||
|
||||
// 7. Set up the parser's stack of open elements so that it contains just the single element root.
|
||||
parser->m_stack_of_open_elements.push(root);
|
||||
|
||||
// 8. If the context element is a template element,
|
||||
if (context_element.local_name() == HTML::TagNames::template_) {
|
||||
// push "in template" onto the stack of template insertion modes so that it is the new current template insertion mode.
|
||||
parser->m_stack_of_template_insertion_modes.append(InsertionMode::InTemplate);
|
||||
}
|
||||
|
||||
// FIXME: Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
|
||||
// FIXME: 9. Create a start tag token whose name is the local name of context and whose attributes are the attributes of context.
|
||||
// Let this start tag token be the start tag token of the context node, e.g. for the purposes of determining if it is an HTML integration point.
|
||||
|
||||
// 10. Reset the parser's insertion mode appropriately.
|
||||
parser->reset_the_insertion_mode_appropriately();
|
||||
|
||||
for (auto* form_candidate = &context_element; form_candidate; form_candidate = form_candidate->parent_element()) {
|
||||
if (is<HTMLFormElement>(*form_candidate)) {
|
||||
parser->m_form_element = JS::make_handle(verify_cast<HTMLFormElement>(*form_candidate));
|
||||
break;
|
||||
}
|
||||
}
|
||||
// 11. Set the parser's form element pointer to the nearest node to the context element that is a form element
|
||||
// (going straight up the ancestor chain, and including the element itself, if it is a form element), if any.
|
||||
// (If there is no such form element, the form element pointer keeps its initial value, null.)
|
||||
parser->m_form_element = context_element.first_ancestor_of_type<HTMLFormElement>();
|
||||
|
||||
// 12. Place the input into the input stream for the HTML parser just created. The encoding confidence is irrelevant.
|
||||
// 13. Start the parser and let it run until it has consumed all the characters just inserted into the input stream.
|
||||
parser->run(context_element.document().url());
|
||||
|
||||
// 14. Return the child nodes of root, in tree order.
|
||||
Vector<JS::Handle<DOM::Node>> children;
|
||||
while (JS::GCPtr<DOM::Node> child = root->first_child()) {
|
||||
root->remove_child(*child);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue