mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 15:48:12 +00:00
LibWeb: Implement more table support in the new HTML parser
This is enough to parse the Google front page! (Note: I did have to hack the tokenizer while parsing Google, in order to avoid named character references screwing everything up. We'll fix that too soon enough!)
This commit is contained in:
parent
7f18c51f4c
commit
ebb1649a52
5 changed files with 142 additions and 1 deletions
|
@ -118,6 +118,15 @@ void HTMLDocumentParser::process_using_the_rules_for(InsertionMode mode, HTMLTok
|
||||||
case InsertionMode::InTable:
|
case InsertionMode::InTable:
|
||||||
handle_in_table(token);
|
handle_in_table(token);
|
||||||
break;
|
break;
|
||||||
|
case InsertionMode::InTableBody:
|
||||||
|
handle_in_table_body(token);
|
||||||
|
break;
|
||||||
|
case InsertionMode::InRow:
|
||||||
|
handle_in_row(token);
|
||||||
|
break;
|
||||||
|
case InsertionMode::InCell:
|
||||||
|
handle_in_cell(token);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
ASSERT_NOT_REACHED();
|
ASSERT_NOT_REACHED();
|
||||||
}
|
}
|
||||||
|
@ -507,6 +516,9 @@ void HTMLDocumentParser::reconstruct_the_active_formatting_elements()
|
||||||
if (m_list_of_active_formatting_elements.is_empty())
|
if (m_list_of_active_formatting_elements.is_empty())
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
if (m_list_of_active_formatting_elements.entries().last().is_marker())
|
||||||
|
return;
|
||||||
|
|
||||||
if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element))
|
if (m_stack_of_open_elements.contains(*m_list_of_active_formatting_elements.entries().last().element))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
@ -853,6 +865,108 @@ void HTMLDocumentParser::handle_text(HTMLToken& token)
|
||||||
ASSERT_NOT_REACHED();
|
ASSERT_NOT_REACHED();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void HTMLDocumentParser::clear_the_stack_back_to_a_table_context()
|
||||||
|
{
|
||||||
|
while (!current_node().tag_name().is_one_of("table", "template", "html"))
|
||||||
|
m_stack_of_open_elements.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLDocumentParser::clear_the_stack_back_to_a_table_row_context()
|
||||||
|
{
|
||||||
|
while (!current_node().tag_name().is_one_of("tr", "template", "html"))
|
||||||
|
m_stack_of_open_elements.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLDocumentParser::clear_the_stack_back_to_a_table_body_context()
|
||||||
|
{
|
||||||
|
while (!current_node().tag_name().is_one_of("tbody", "tfoot", "thead", "template", "html"))
|
||||||
|
m_stack_of_open_elements.pop();
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLDocumentParser::handle_in_row(HTMLToken& token)
|
||||||
|
{
|
||||||
|
if (token.is_start_tag() && token.tag_name().is_one_of("th", "td")) {
|
||||||
|
clear_the_stack_back_to_a_table_row_context();
|
||||||
|
insert_html_element(token);
|
||||||
|
m_insertion_mode = InsertionMode::InCell;
|
||||||
|
m_list_of_active_formatting_elements.add_marker();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (token.is_end_tag() && token.tag_name() == "tr") {
|
||||||
|
if (!m_stack_of_open_elements.has_in_table_scope("tr")) {
|
||||||
|
PARSE_ERROR();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
clear_the_stack_back_to_a_table_row_context();
|
||||||
|
m_stack_of_open_elements.pop();
|
||||||
|
m_insertion_mode = InsertionMode::InTableBody;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
TODO();
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLDocumentParser::handle_in_cell(HTMLToken& token)
|
||||||
|
{
|
||||||
|
if (token.is_end_tag() && token.tag_name().is_one_of("td", "th")) {
|
||||||
|
if (!m_stack_of_open_elements.has_in_table_scope(token.tag_name())) {
|
||||||
|
PARSE_ERROR();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
generate_implied_end_tags();
|
||||||
|
|
||||||
|
if (current_node().tag_name() != token.tag_name()) {
|
||||||
|
PARSE_ERROR();
|
||||||
|
}
|
||||||
|
|
||||||
|
while (current_node().tag_name() != token.tag_name())
|
||||||
|
m_stack_of_open_elements.pop();
|
||||||
|
m_stack_of_open_elements.pop();
|
||||||
|
|
||||||
|
m_list_of_active_formatting_elements.clear_up_to_the_last_marker();
|
||||||
|
|
||||||
|
m_insertion_mode = InsertionMode::InRow;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (token.is_start_tag() && token.tag_name().is_one_of("caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr")) {
|
||||||
|
TODO();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (token.is_end_tag() && token.tag_name().is_one_of("body", "caption", "col", "colgroup", "html")) {
|
||||||
|
PARSE_ERROR();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (token.is_end_tag() && token.tag_name().is_one_of("table", "tbody", "tfoot", "thead", "tr")) {
|
||||||
|
TODO();
|
||||||
|
}
|
||||||
|
|
||||||
|
process_using_the_rules_for(InsertionMode::InBody, token);
|
||||||
|
}
|
||||||
|
|
||||||
|
void HTMLDocumentParser::handle_in_table_body(HTMLToken& token)
|
||||||
|
{
|
||||||
|
if (token.is_start_tag() && token.tag_name() == "tr") {
|
||||||
|
clear_the_stack_back_to_a_table_body_context();
|
||||||
|
insert_html_element(token);
|
||||||
|
m_insertion_mode = InsertionMode::InRow;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((token.is_start_tag() && token.tag_name().is_one_of("caption", "col", "colgroup", "tbody", "tfoot", "thead"))
|
||||||
|
|| (token.is_end_tag() && token.tag_name() == "table")) {
|
||||||
|
// FIXME: If the stack of open elements does not have a tbody, thead, or tfoot element in table scope, this is a parse error; ignore the token.
|
||||||
|
|
||||||
|
clear_the_stack_back_to_a_table_body_context();
|
||||||
|
m_stack_of_open_elements.pop();
|
||||||
|
m_insertion_mode = InsertionMode::InTable;
|
||||||
|
process_using_the_rules_for(InsertionMode::InTable, token);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
TODO();
|
||||||
|
}
|
||||||
|
|
||||||
void HTMLDocumentParser::handle_in_table(HTMLToken& token)
|
void HTMLDocumentParser::handle_in_table(HTMLToken& token)
|
||||||
{
|
{
|
||||||
if (token.is_character() && current_node().tag_name().is_one_of("table", "tbody", "tfoot", "thead", "tr")) {
|
if (token.is_character() && current_node().tag_name().is_one_of("table", "tbody", "tfoot", "thead", "tr")) {
|
||||||
|
@ -879,7 +993,14 @@ void HTMLDocumentParser::handle_in_table(HTMLToken& token)
|
||||||
TODO();
|
TODO();
|
||||||
}
|
}
|
||||||
if (token.is_start_tag() && token.tag_name().is_one_of("td", "th", "tr")) {
|
if (token.is_start_tag() && token.tag_name().is_one_of("td", "th", "tr")) {
|
||||||
TODO();
|
clear_the_stack_back_to_a_table_context();
|
||||||
|
HTMLToken fake_tbody_token;
|
||||||
|
fake_tbody_token.m_type = HTMLToken::Type::StartTag;
|
||||||
|
fake_tbody_token.m_tag.tag_name.append("tbody");
|
||||||
|
insert_html_element(fake_tbody_token);
|
||||||
|
m_insertion_mode = InsertionMode::InTableBody;
|
||||||
|
process_using_the_rules_for(InsertionMode::InTableBody, token);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
if (token.is_start_tag() && token.tag_name() == "table") {
|
if (token.is_start_tag() && token.tag_name() == "table") {
|
||||||
PARSE_ERROR();
|
PARSE_ERROR();
|
||||||
|
|
|
@ -90,6 +90,9 @@ private:
|
||||||
void handle_after_after_body(HTMLToken&);
|
void handle_after_after_body(HTMLToken&);
|
||||||
void handle_text(HTMLToken&);
|
void handle_text(HTMLToken&);
|
||||||
void handle_in_table(HTMLToken&);
|
void handle_in_table(HTMLToken&);
|
||||||
|
void handle_in_table_body(HTMLToken&);
|
||||||
|
void handle_in_row(HTMLToken&);
|
||||||
|
void handle_in_cell(HTMLToken&);
|
||||||
|
|
||||||
void generate_implied_end_tags(const FlyString& exception = {});
|
void generate_implied_end_tags(const FlyString& exception = {});
|
||||||
bool stack_of_open_elements_has_element_with_tag_name_in_scope(const FlyString& tag_name);
|
bool stack_of_open_elements_has_element_with_tag_name_in_scope(const FlyString& tag_name);
|
||||||
|
@ -108,6 +111,9 @@ private:
|
||||||
size_t script_nesting_level() const { return m_script_nesting_level; }
|
size_t script_nesting_level() const { return m_script_nesting_level; }
|
||||||
void reset_the_insertion_mode_appropriately();
|
void reset_the_insertion_mode_appropriately();
|
||||||
void run_the_adoption_agency_algorithm(HTMLToken&);
|
void run_the_adoption_agency_algorithm(HTMLToken&);
|
||||||
|
void clear_the_stack_back_to_a_table_context();
|
||||||
|
void clear_the_stack_back_to_a_table_body_context();
|
||||||
|
void clear_the_stack_back_to_a_table_row_context();
|
||||||
|
|
||||||
InsertionMode m_insertion_mode { InsertionMode::Initial };
|
InsertionMode m_insertion_mode { InsertionMode::Initial };
|
||||||
InsertionMode m_original_insertion_mode { InsertionMode::Initial };
|
InsertionMode m_original_insertion_mode { InsertionMode::Initial };
|
||||||
|
|
|
@ -71,4 +71,13 @@ void ListOfActiveFormattingElements::remove(Element& element)
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void ListOfActiveFormattingElements::clear_up_to_the_last_marker()
|
||||||
|
{
|
||||||
|
while (!m_entries.is_empty()) {
|
||||||
|
auto entry = m_entries.take_last();
|
||||||
|
if (entry.is_marker())
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -56,6 +56,8 @@ public:
|
||||||
|
|
||||||
Element* last_element_with_tag_name_before_marker(const FlyString& tag_name);
|
Element* last_element_with_tag_name_before_marker(const FlyString& tag_name);
|
||||||
|
|
||||||
|
void clear_up_to_the_last_marker();
|
||||||
|
|
||||||
private:
|
private:
|
||||||
Vector<Entry> m_entries;
|
Vector<Entry> m_entries;
|
||||||
};
|
};
|
||||||
|
|
|
@ -37,6 +37,9 @@ public:
|
||||||
StackOfOpenElements() { }
|
StackOfOpenElements() { }
|
||||||
~StackOfOpenElements();
|
~StackOfOpenElements();
|
||||||
|
|
||||||
|
Element& first() { return m_elements.first(); }
|
||||||
|
Element& last() { return m_elements.last(); }
|
||||||
|
|
||||||
bool is_empty() const { return m_elements.is_empty(); }
|
bool is_empty() const { return m_elements.is_empty(); }
|
||||||
void push(NonnullRefPtr<Element> element) { m_elements.append(move(element)); }
|
void push(NonnullRefPtr<Element> element) { m_elements.append(move(element)); }
|
||||||
NonnullRefPtr<Element> pop() { return m_elements.take_last(); }
|
NonnullRefPtr<Element> pop() { return m_elements.take_last(); }
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue