diff --git a/Tests/LibWeb/TestHTMLTokenizer.cpp b/Tests/LibWeb/TestHTMLTokenizer.cpp index 6d51c315ea..39f4fe4dec 100644 --- a/Tests/LibWeb/TestHTMLTokenizer.cpp +++ b/Tests/LibWeb/TestHTMLTokenizer.cpp @@ -59,9 +59,15 @@ using Token = Web::HTML::HTMLToken; EXPECT_EQ(current_token->type(), Token::Type::DOCTYPE); \ NEXT_TOKEN(); -#define EXPECT_TAG_TOKEN_ATTRIBUTE(name, value) \ - VERIFY(last_token); \ - EXPECT_EQ(last_token->attribute(#name), value); +#define EXPECT_TAG_TOKEN_ATTRIBUTE(name, attribute_value, name_start_column, name_end_column, value_start_column, value_end_column) \ + VERIFY(last_token); \ + auto name##_attr = last_token->raw_attribute(#name); \ + VERIFY(name##_attr.has_value()); \ + EXPECT_EQ(name##_attr->value, attribute_value); \ + EXPECT_EQ(name##_attr->name_start_position.column, name_start_column); \ + EXPECT_EQ(name##_attr->name_end_position.column, name_end_column); \ + EXPECT_EQ(name##_attr->value_start_position.column, value_start_column); \ + EXPECT_EQ(name##_attr->value_end_position.column, value_end_column); #define EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(count) \ VERIFY(last_token); \ @@ -128,7 +134,7 @@ TEST_CASE(unquoted_attributes) BEGIN_ENUMERATION(tokens); EXPECT_START_TAG_TOKEN(p, 1u, 10u); EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(1); - EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar"); + EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar", 3u, 6u, 7u, 10u); EXPECT_END_OF_FILE_TOKEN(); END_ENUMERATION(); } @@ -139,7 +145,7 @@ TEST_CASE(single_quoted_attributes) BEGIN_ENUMERATION(tokens); EXPECT_START_TAG_TOKEN(p, 1u, 12u); EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(1); - EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar"); + EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar", 3u, 6u, 7u, 12u); EXPECT_END_OF_FILE_TOKEN(); END_ENUMERATION(); } @@ -150,20 +156,32 @@ TEST_CASE(double_quoted_attributes) BEGIN_ENUMERATION(tokens); EXPECT_START_TAG_TOKEN(p, 1u, 12u); EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(1); - EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar"); + EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar", 3u, 6u, 7u, 12u); + EXPECT_END_OF_FILE_TOKEN(); + END_ENUMERATION(); +} + +TEST_CASE(valueless_attribute) +{ + auto tokens = run_tokenizer("

"sv); + BEGIN_ENUMERATION(tokens); + EXPECT_START_TAG_TOKEN(p, 1u, 6u); + EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(1); + EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "", 3u, 6u, 0u, 0u); EXPECT_END_OF_FILE_TOKEN(); END_ENUMERATION(); } TEST_CASE(multiple_attributes) { - auto tokens = run_tokenizer("

"sv); + auto tokens = run_tokenizer("

"sv); BEGIN_ENUMERATION(tokens); - EXPECT_START_TAG_TOKEN(p, 1u, 35u); - EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(3); - EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar"); - EXPECT_TAG_TOKEN_ATTRIBUTE(baz, "foobar"); - EXPECT_TAG_TOKEN_ATTRIBUTE(foo2, "bar2"); + EXPECT_START_TAG_TOKEN(p, 1u, 39u); + EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(4); + EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "bar", 3u, 6u, 7u, 12u); + EXPECT_TAG_TOKEN_ATTRIBUTE(baz, "foobar", 13u, 16u, 17u, 23u); + EXPECT_TAG_TOKEN_ATTRIBUTE(biz, "", 24u, 27u, 0u, 0u); + EXPECT_TAG_TOKEN_ATTRIBUTE(foo2, "bar2", 28u, 32u, 33u, 39u); EXPECT_END_OF_FILE_TOKEN(); END_ENUMERATION(); } @@ -174,9 +192,9 @@ TEST_CASE(character_reference_in_attribute) BEGIN_ENUMERATION(tokens); EXPECT_START_TAG_TOKEN(p, 1u, 43u); EXPECT_TAG_TOKEN_ATTRIBUTE_COUNT(3); - EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "a&b"); - EXPECT_TAG_TOKEN_ATTRIBUTE(bar, "a&b"); - EXPECT_TAG_TOKEN_ATTRIBUTE(baz, "a&b"); + EXPECT_TAG_TOKEN_ATTRIBUTE(foo, "a&b", 3u, 6u, 7u, 14u); + EXPECT_TAG_TOKEN_ATTRIBUTE(bar, "a&b", 15u, 18u, 19u, 28u); + EXPECT_TAG_TOKEN_ATTRIBUTE(baz, "a&b", 29u, 32u, 33u, 43u); EXPECT_END_OF_FILE_TOKEN(); END_ENUMERATION(); } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h index 07c1cb7511..548e3193a3 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLToken.h @@ -246,6 +246,13 @@ public: } StringView attribute(DeprecatedFlyString const& attribute_name) const + { + if (auto result = raw_attribute(attribute_name); result.has_value()) + return result->value; + return {}; + } + + Optional raw_attribute(DeprecatedFlyString const& attribute_name) const { VERIFY(is_start_tag() || is_end_tag()); @@ -254,7 +261,7 @@ public: return {}; for (auto& attribute : *ptr) { if (attribute_name == attribute.local_name) - return attribute.value; + return attribute; } return {}; } diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp index d99534de0f..c38a78102e 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLTokenizer.cpp @@ -1051,8 +1051,6 @@ _StartOfFunction: } ON('/') { - if (m_current_token.has_attributes()) - m_current_token.last_attribute().name_end_position = nth_last_position(1); RECONSUME_IN(AfterAttributeName); } ON('>') @@ -1108,21 +1106,25 @@ _StartOfFunction: { ON_WHITESPACE { + m_current_token.last_attribute().name_end_position = nth_last_position(1); m_current_token.last_attribute().local_name = consume_current_builder(); RECONSUME_IN(AfterAttributeName); } ON('/') { + m_current_token.last_attribute().name_end_position = nth_last_position(1); m_current_token.last_attribute().local_name = consume_current_builder(); RECONSUME_IN(AfterAttributeName); } ON('>') { + m_current_token.last_attribute().name_end_position = nth_last_position(1); m_current_token.last_attribute().local_name = consume_current_builder(); RECONSUME_IN(AfterAttributeName); } ON_EOF { + m_current_token.last_attribute().name_end_position = nth_last_position(1); m_current_token.last_attribute().local_name = consume_current_builder(); RECONSUME_IN(AfterAttributeName); } @@ -1196,7 +1198,7 @@ _StartOfFunction: { m_current_token.add_attribute({}); if (!m_source_positions.is_empty()) - m_current_token.last_attribute().name_start_position = m_source_positions.last(); + m_current_token.last_attribute().name_start_position = nth_last_position(1); RECONSUME_IN(AttributeName); } }