mirror of
https://github.com/RGBCube/serenity
synced 2025-07-27 11:07:45 +00:00
AK: Improve the parsing of data urls
Improve the parsing of data urls in URLParser to bring it more up-to- spec. At the moment, we cannot parse the components of the MIME type since it is represented as a string, but the spec requires it to be parsed as a "MIME type record".
This commit is contained in:
parent
f5c3225286
commit
4e8e1b7b3a
2 changed files with 55 additions and 11 deletions
|
@ -116,7 +116,7 @@ constexpr bool is_double_dot_path_segment(StringView const& input)
|
||||||
}
|
}
|
||||||
|
|
||||||
// https://fetch.spec.whatwg.org/#data-urls
|
// https://fetch.spec.whatwg.org/#data-urls
|
||||||
// FIXME: This only loosely follow the spec, as we use the same class for "regular" and data URLs, unlike the spec.
|
// FIXME: This only loosely follows the spec, as we use the same class for "regular" and data URLs, unlike the spec.
|
||||||
Optional<URL> URLParser::parse_data_url(StringView const& raw_input)
|
Optional<URL> URLParser::parse_data_url(StringView const& raw_input)
|
||||||
{
|
{
|
||||||
dbgln_if(URL_PARSER_DEBUG, "URLParser::parse_data_url: Parsing '{}'.", raw_input);
|
dbgln_if(URL_PARSER_DEBUG, "URLParser::parse_data_url: Parsing '{}'.", raw_input);
|
||||||
|
@ -125,24 +125,28 @@ Optional<URL> URLParser::parse_data_url(StringView const& raw_input)
|
||||||
auto comma_offset = input.find(',');
|
auto comma_offset = input.find(',');
|
||||||
if (!comma_offset.has_value())
|
if (!comma_offset.has_value())
|
||||||
return {};
|
return {};
|
||||||
auto mime_type = input.substring_view(0, comma_offset.value());
|
auto mime_type = StringUtils::trim(input.substring_view(0, comma_offset.value()), "\t\n\f\r ", TrimMode::Both);
|
||||||
// FIXME: Strip leading and trailing ASCII whitespace from mimeType
|
|
||||||
auto encoded_body = input.substring_view(comma_offset.value() + 1);
|
auto encoded_body = input.substring_view(comma_offset.value() + 1);
|
||||||
auto body = URL::percent_decode(encoded_body);
|
auto body = URL::percent_decode(encoded_body);
|
||||||
bool is_base_64_encoded = false;
|
bool is_base64_encoded = false;
|
||||||
if (mime_type.ends_with(";base64", CaseSensitivity::CaseInsensitive)) {
|
if (mime_type.ends_with("base64", CaseSensitivity::CaseInsensitive)) {
|
||||||
is_base_64_encoded = true;
|
auto substring_view = mime_type.substring_view(0, mime_type.length() - 6);
|
||||||
mime_type = mime_type.substring_view(0, mime_type.length() - 7);
|
auto trimmed_substring_view = StringUtils::trim(substring_view, " ", TrimMode::Right);
|
||||||
|
if (trimmed_substring_view.ends_with(';')) {
|
||||||
|
is_base64_encoded = true;
|
||||||
|
mime_type = trimmed_substring_view.substring_view(0, trimmed_substring_view.length() - 1);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (mime_type.starts_with(";")) {
|
|
||||||
StringBuilder builder;
|
StringBuilder builder;
|
||||||
|
if (mime_type.starts_with(";") || mime_type.is_empty()) {
|
||||||
builder.append("text/plain");
|
builder.append("text/plain");
|
||||||
builder.append(mime_type);
|
builder.append(mime_type);
|
||||||
mime_type = builder.to_string();
|
mime_type = builder.string_view();
|
||||||
}
|
}
|
||||||
|
|
||||||
URL url { mime_type, move(body), is_base_64_encoded };
|
// FIXME: Parse the MIME type's components according to https://mimesniff.spec.whatwg.org/#parse-a-mime-type
|
||||||
|
URL url { StringUtils::trim(mime_type, "\n\r\t ", TrimMode::Both), move(body), is_base64_encoded };
|
||||||
dbgln_if(URL_PARSER_DEBUG, "URLParser::parse_data_url: Parsed data URL to be '{}'.", url.serialize());
|
dbgln_if(URL_PARSER_DEBUG, "URLParser::parse_data_url: Parsed data URL to be '{}'.", url.serialize());
|
||||||
return url;
|
return url;
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
|
* Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
|
||||||
|
* Copyright (c) 2021, the SerenityOS developers.
|
||||||
*
|
*
|
||||||
* SPDX-License-Identifier: BSD-2-Clause
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
*/
|
*/
|
||||||
|
@ -215,9 +216,22 @@ TEST_CASE(data_url)
|
||||||
EXPECT(url.host().is_null());
|
EXPECT(url.host().is_null());
|
||||||
EXPECT_EQ(url.data_mime_type(), "text/html");
|
EXPECT_EQ(url.data_mime_type(), "text/html");
|
||||||
EXPECT_EQ(url.data_payload(), "test");
|
EXPECT_EQ(url.data_payload(), "test");
|
||||||
|
EXPECT(!url.data_payload_is_base64());
|
||||||
EXPECT_EQ(url.serialize(), "data:text/html,test");
|
EXPECT_EQ(url.serialize(), "data:text/html,test");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(data_url_default_mime_type)
|
||||||
|
{
|
||||||
|
URL url("data:,test");
|
||||||
|
EXPECT(url.is_valid());
|
||||||
|
EXPECT_EQ(url.scheme(), "data");
|
||||||
|
EXPECT(url.host().is_null());
|
||||||
|
EXPECT_EQ(url.data_mime_type(), "text/plain");
|
||||||
|
EXPECT_EQ(url.data_payload(), "test");
|
||||||
|
EXPECT(!url.data_payload_is_base64());
|
||||||
|
EXPECT_EQ(url.serialize(), "data:text/plain,test");
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(data_url_encoded)
|
TEST_CASE(data_url_encoded)
|
||||||
{
|
{
|
||||||
URL url("data:text/html,Hello%20friends%2C%0X%X0");
|
URL url("data:text/html,Hello%20friends%2C%0X%X0");
|
||||||
|
@ -226,6 +240,7 @@ TEST_CASE(data_url_encoded)
|
||||||
EXPECT(url.host().is_null());
|
EXPECT(url.host().is_null());
|
||||||
EXPECT_EQ(url.data_mime_type(), "text/html");
|
EXPECT_EQ(url.data_mime_type(), "text/html");
|
||||||
EXPECT_EQ(url.data_payload(), "Hello friends,%0X%X0");
|
EXPECT_EQ(url.data_payload(), "Hello friends,%0X%X0");
|
||||||
|
EXPECT(!url.data_payload_is_base64());
|
||||||
EXPECT_EQ(url.serialize(), "data:text/html,Hello friends,%0X%X0");
|
EXPECT_EQ(url.serialize(), "data:text/html,Hello friends,%0X%X0");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -237,9 +252,34 @@ TEST_CASE(data_url_base64_encoded)
|
||||||
EXPECT(url.host().is_null());
|
EXPECT(url.host().is_null());
|
||||||
EXPECT_EQ(url.data_mime_type(), "text/html");
|
EXPECT_EQ(url.data_mime_type(), "text/html");
|
||||||
EXPECT_EQ(url.data_payload(), "test");
|
EXPECT_EQ(url.data_payload(), "test");
|
||||||
|
EXPECT(url.data_payload_is_base64());
|
||||||
EXPECT_EQ(url.serialize(), "data:text/html;base64,test");
|
EXPECT_EQ(url.serialize(), "data:text/html;base64,test");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(data_url_base64_encoded_default_mime_type)
|
||||||
|
{
|
||||||
|
URL url("data:;base64,test");
|
||||||
|
EXPECT(url.is_valid());
|
||||||
|
EXPECT_EQ(url.scheme(), "data");
|
||||||
|
EXPECT(url.host().is_null());
|
||||||
|
EXPECT_EQ(url.data_mime_type(), "text/plain");
|
||||||
|
EXPECT_EQ(url.data_payload(), "test");
|
||||||
|
EXPECT(url.data_payload_is_base64());
|
||||||
|
EXPECT_EQ(url.serialize(), "data:text/plain;base64,test");
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST_CASE(data_url_base64_encoded_with_whitespace)
|
||||||
|
{
|
||||||
|
URL url("data: text/html ; bAsE64 , test with whitespace ");
|
||||||
|
EXPECT(url.is_valid());
|
||||||
|
EXPECT_EQ(url.scheme(), "data");
|
||||||
|
EXPECT(url.host().is_null());
|
||||||
|
EXPECT_EQ(url.data_mime_type(), "text/html");
|
||||||
|
EXPECT_EQ(url.data_payload(), " test with whitespace ");
|
||||||
|
EXPECT(url.data_payload_is_base64());
|
||||||
|
EXPECT_EQ(url.serialize(), "data:text/html;base64, test with whitespace ");
|
||||||
|
}
|
||||||
|
|
||||||
TEST_CASE(trailing_slash_with_complete_url)
|
TEST_CASE(trailing_slash_with_complete_url)
|
||||||
{
|
{
|
||||||
EXPECT_EQ(URL("http://a/b/").complete_url("c/").serialize(), "http://a/b/c/");
|
EXPECT_EQ(URL("http://a/b/").complete_url("c/").serialize(), "http://a/b/c/");
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue