diff --git a/Tests/LibWeb/TestHTMLTokenizer.cpp b/Tests/LibWeb/TestHTMLTokenizer.cpp index 47965d2697..6d51c315ea 100644 --- a/Tests/LibWeb/TestHTMLTokenizer.cpp +++ b/Tests/LibWeb/TestHTMLTokenizer.cpp @@ -23,14 +23,18 @@ using Token = Web::HTML::HTMLToken; last_token = &*current_token; \ ++current_token; -#define EXPECT_START_TAG_TOKEN(_tag_name) \ - EXPECT_EQ(current_token->type(), Token::Type::StartTag); \ - EXPECT_EQ(current_token->tag_name(), #_tag_name); \ +#define EXPECT_START_TAG_TOKEN(_tag_name, start_column, end_column) \ + EXPECT_EQ(current_token->type(), Token::Type::StartTag); \ + EXPECT_EQ(current_token->tag_name(), #_tag_name); \ + EXPECT_EQ(current_token->start_position().column, start_column); \ + EXPECT_EQ(current_token->end_position().column, end_column); \ NEXT_TOKEN(); -#define EXPECT_END_TAG_TOKEN(_tag_name) \ - EXPECT_EQ(current_token->type(), Token::Type::EndTag); \ - EXPECT_EQ(current_token->tag_name(), #_tag_name); \ +#define EXPECT_END_TAG_TOKEN(_tag_name, start_column, end_column) \ + EXPECT_EQ(current_token->type(), Token::Type::EndTag); \ + EXPECT_EQ(current_token->tag_name(), #_tag_name); \ + EXPECT_EQ(current_token->start_position().column, start_column); \ + EXPECT_EQ(current_token->end_position().column, end_column); \ NEXT_TOKEN(); #define EXPECT_END_OF_FILE_TOKEN() \ @@ -97,12 +101,12 @@ TEST_CASE(basic) { auto tokens = run_tokenizer(""sv); BEGIN_ENUMERATION(tokens); - EXPECT_START_TAG_TOKEN(html); - EXPECT_START_TAG_TOKEN(head); - EXPECT_END_TAG_TOKEN(head); - EXPECT_START_TAG_TOKEN(body); - EXPECT_END_TAG_TOKEN(body); - EXPECT_END_TAG_TOKEN(html); + EXPECT_START_TAG_TOKEN(html, 1u, 5u); + EXPECT_START_TAG_TOKEN(head, 7u, 11u); + EXPECT_END_TAG_TOKEN(head, 14u, 18u); + EXPECT_START_TAG_TOKEN(body, 20u, 24u); + EXPECT_END_TAG_TOKEN(body, 27u, 31u); + EXPECT_END_TAG_TOKEN(html, 34u, 38u); EXPECT_END_OF_FILE_TOKEN(); END_ENUMERATION(); } @@ -111,9 +115,9 @@ TEST_CASE(basic_with_text) { auto tokens = run_tokenizer("

This is some text.

"sv); BEGIN_ENUMERATION(tokens); - EXPECT_START_TAG_TOKEN(p); + EXPECT_START_TAG_TOKEN(p, 1u, 2u); EXPECT_CHARACTER_TOKENS(This is some text.); - EXPECT_END_TAG_TOKEN(p); + EXPECT_END_TAG_TOKEN(p, 23u, 24u); EXPECT_END_OF_FILE_TOKEN(); END_ENUMERATION(); } @@ -122,7 +126,7 @@ TEST_CASE(unquoted_attributes) { auto tokens = run_tokenizer("

"sv); BEGIN_ENUMERATION(tokens); - EXPECT_START_TAG_TOKEN(p); + EXPECT_START_TAG_TOKEN(p, 1u, 10u); EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(1); EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar"); EXPECT_END_OF_FILE_TOKEN(); @@ -133,7 +137,7 @@ TEST_CASE(single_quoted_attributes) { auto tokens = run_tokenizer("

"sv); BEGIN_ENUMERATION(tokens); - EXPECT_START_TAG_TOKEN(p); + EXPECT_START_TAG_TOKEN(p, 1u, 12u); EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(1); EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar"); EXPECT_END_OF_FILE_TOKEN(); @@ -144,7 +148,7 @@ TEST_CASE(double_quoted_attributes) { auto tokens = run_tokenizer("

"sv); BEGIN_ENUMERATION(tokens); - EXPECT_START_TAG_TOKEN(p); + EXPECT_START_TAG_TOKEN(p, 1u, 12u); EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(1); EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar"); EXPECT_END_OF_FILE_TOKEN(); @@ -155,7 +159,7 @@ TEST_CASE(multiple_attributes) { auto tokens = run_tokenizer("

"sv); BEGIN_ENUMERATION(tokens); - EXPECT_START_TAG_TOKEN(p); + EXPECT_START_TAG_TOKEN(p, 1u, 35u); EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(3); EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar"); EXPECT_TAG_TOKEN_ATTRIBUTE(baz, "foobar"); @@ -168,7 +172,7 @@ TEST_CASE(character_reference_in_attribute) { auto tokens = run_tokenizer("

"sv); BEGIN_ENUMERATION(tokens); - EXPECT_START_TAG_TOKEN(p); + EXPECT_START_TAG_TOKEN(p, 1u, 43u); EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(3); EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "a&b"); EXPECT_TAG_TOKEN_ATTRIBUTE(bar, "a&b"); @@ -181,9 +185,9 @@ TEST_CASE(comment) { auto tokens = run_tokenizer("

"sv); BEGIN_ENUMERATION(tokens); - EXPECT_START_TAG_TOKEN(p); + EXPECT_START_TAG_TOKEN(p, 1u, 2u); EXPECT_COMMENT_TOKEN(); - EXPECT_END_TAG_TOKEN(p); + EXPECT_END_TAG_TOKEN(p, 31u, 32u); EXPECT_END_OF_FILE_TOKEN(); END_ENUMERATION(); } @@ -193,8 +197,8 @@ TEST_CASE(doctype) auto tokens = run_tokenizer(""sv); BEGIN_ENUMERATION(tokens); EXPECT_DOCTYPE_TOKEN(); - EXPECT_START_TAG_TOKEN(html); - EXPECT_END_TAG_TOKEN(html); + EXPECT_START_TAG_TOKEN(html, 16u, 20u); + EXPECT_END_TAG_TOKEN(html, 23u, 27u); } // NOTE: This relies on the format of HTMLToken::to_string() staying the same. @@ -215,5 +219,5 @@ TEST_CASE(regression) DeprecatedString file_contents { content.bytes() }; auto tokens = run_tokenizer(file_contents); u32 hash = hash_tokens(tokens); - EXPECT_EQ(hash, 710375345u); + EXPECT_EQ(hash, 3657343287u); } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index aa6681f575..d99534de0f 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -347,7 +347,6 @@ _StartOfFunction: ON('>') { m_current_token.set_tag_name(consume_current_builder()); - m_current_token.set_end_position({}, nth_last_position(1)); SWITCH_TO_AND_EMIT_CURRENT_TOKEN(Data); } ON_ASCII_UPPER_ALPHA @@ -366,7 +365,6 @@ _StartOfFunction: ON_EOF { log_parse_error(); - m_current_token.set_end_position({}, nth_last_position(0)); EMIT_EOF; } ANYTHING_ELSE @@ -2773,19 +2771,9 @@ bool HTMLTokenizer::consume_next_if_match(StringView string, CaseSensitivity cas void HTMLTokenizer::create_new_token(HTMLToken::Type type) { m_current_token = { type }; - size_t offset = 0; - switch (type) { - case HTMLToken::Type::StartTag: - offset = 1; - break; - case HTMLToken::Type::EndTag: - offset = 2; - break; - default: - break; - } - m_current_token.set_start_position({}, nth_last_position(offset)); + auto is_start_or_end_tag = type == HTMLToken::Type::StartTag || type == HTMLToken::Type::EndTag; + m_current_token.set_start_position({}, nth_last_position(is_start_or_end_tag ? 1 : 0)); } HTMLTokenizer::HTMLTokenizer() @@ -2855,7 +2843,9 @@ void HTMLTokenizer::will_emit(HTMLToken& token) { if (token.is_start_tag()) m_last_emitted_start_tag_name = token.tag_name(); - token.set_end_position({}, nth_last_position(0)); + + auto is_start_or_end_tag = token.type() == HTMLToken::Type::StartTag || token.type() == HTMLToken::Type::EndTag; + token.set_end_position({}, nth_last_position(is_start_or_end_tag ? 1 : 0)); } bool HTMLTokenizer::current_end_tag_token_is_appropriate() const diff --git a/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp index 782750ff46..0fb3663922 100644 --- a/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp +++ b/Userland/Libraries/LibWeb/HTML/SyntaxHighlighter/SyntaxHighlighter.cpp @@ -132,8 +132,6 @@ void SyntaxHighlighter::rehighlight(Palette const& palette) continue; } - size_t token_start_offset = token->is_end_tag() ? 1 : 0; - if (token->is_comment()) { highlight( token->start_position().line, @@ -150,25 +148,25 @@ void SyntaxHighlighter::rehighlight(Palette const& palette) } else if (token->is_start_tag() || token->is_end_tag()) { highlight( token->start_position().line, - token->start_position().column + token_start_offset, + token->start_position().column, token->start_position().line, - token->start_position().column + token_start_offset + token->tag_name().length(), + token->start_position().column + token->tag_name().length(), { palette.syntax_keyword(), {}, true }, token->is_start_tag() ? AugmentedTokenKind::OpenTag : AugmentedTokenKind::CloseTag); token->for_each_attribute([&](auto& attribute) { highlight( attribute.name_start_position.line, - attribute.name_start_position.column + token_start_offset, + attribute.name_start_position.column, attribute.name_end_position.line, - attribute.name_end_position.column + token_start_offset, + attribute.name_end_position.column, { palette.syntax_identifier(), {} }, AugmentedTokenKind::AttributeName); highlight( attribute.value_start_position.line, - attribute.value_start_position.column + token_start_offset, + attribute.value_start_position.column, attribute.value_end_position.line, - attribute.value_end_position.column + token_start_offset, + attribute.value_end_position.column, { palette.syntax_string(), {} }, AugmentedTokenKind::AttributeValue); return IterationDecision::Continue;