1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-26 07:47:37 +00:00

AK: Don't store parts of URLs percent decoded

As noted in serval comments doing this goes against the WC3 spec,
and breaks parsing then re-serializing URLs that contain percent
encoded data, that was not encoded using the same character set as
the serializer.

For example, previously if you had a URL like:

https:://foo.com/what%2F%2F (the path is what + '//' percent encoded)

Creating URL("https:://foo.com/what%2F%2F").serialize() would return:

https://foo.com/what//

Which is incorrect and not the same as the URL we passed. This is
because the re-serializing uses the PercentEncodeSet::Path which
does not include '/'.

Only doing the percent encoding in the setters fixes this, which
is required to navigate to Google Street View (which includes a
percent encoded URL in its URL).

Seems to fix #13477 too
This commit is contained in:
MacDue 2023-04-09 14:21:00 +01:00 committed by Andreas Kling
parent d2fc8efd9e
commit 8283e8b88c
5 changed files with 85 additions and 51 deletions

View file

@ -194,9 +194,6 @@ Optional<URL> URLParser::parse_data_url(StringView raw_input)
// future for validation of URLs, which would then lead to infinite recursion.
// The same goes for base_url, because e.g. the port() getter does not always return m_port, and we are interested in the underlying member
// variables' values here, not what the URL class presents to its users.
// NOTE: Since the URL class's member variables contain percent decoded data, we have to deviate from the URL parser specification when setting
// some of those values. Because the specification leaves all values percent encoded in their URL data structure, we have to percent decode
// everything before setting the member variables.
URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Optional<URL> url, Optional<State> state_override)
{
dbgln_if(URL_PARSER_DEBUG, "URLParser::parse: Parsing '{}'", raw_input);
@ -310,7 +307,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
++iterator;
} else {
url->m_cannot_be_a_base_url = true;
url->append_path("");
url->append_slash();
state = State::CannotBeABaseUrlPath;
}
} else {
@ -441,13 +438,11 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
if (password_token_seen) {
builder.append(url->password());
URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
// NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
url->m_password = URL::percent_decode(builder.string_view());
url->m_password = builder.string_view();
} else {
builder.append(url->username());
URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
// NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
url->m_username = URL::percent_decode(builder.string_view());
url->m_username = builder.string_view();
}
}
buffer.clear();
@ -561,7 +556,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
url->m_host = base_url->m_host;
auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_paths[0]))
url->append_path(base_url->m_paths[0]);
url->append_path(base_url->m_paths[0], URL::ApplyPercentEncoding::No);
state = State::Path;
continue;
}
@ -616,9 +611,9 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
if (!url->m_paths.is_empty() && !(url->m_scheme == "file" && url->m_paths.size() == 1 && is_normalized_windows_drive_letter(url->m_paths[0])))
url->m_paths.remove(url->m_paths.size() - 1);
if (code_point != '/' && !(url->is_special() && code_point == '\\'))
url->append_path("");
url->append_slash();
} else if (is_single_dot_path_segment(buffer.string_view()) && code_point != '/' && !(url->is_special() && code_point == '\\')) {
url->append_path("");
url->append_slash();
} else if (!is_single_dot_path_segment(buffer.string_view())) {
if (url->m_scheme == "file" && url->m_paths.is_empty() && is_windows_drive_letter(buffer.string_view())) {
auto drive_letter = buffer.string_view()[0];
@ -626,8 +621,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
buffer.append(drive_letter);
buffer.append(':');
}
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->append_path(URL::percent_decode(buffer.string_view()));
url->append_path(buffer.string_view(), URL::ApplyPercentEncoding::No);
}
buffer.clear();
if (code_point == '?') {
@ -649,13 +643,12 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
// NOTE: Verify that the assumptions required for this simplification are correct.
VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty());
if (code_point == '?') {
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->m_paths[0] = URL::percent_decode(buffer.string_view());
url->m_paths[0] = buffer.string_view();
url->m_query = "";
state = State::Query;
} else if (code_point == '#') {
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->m_paths[0] = URL::percent_decode(buffer.string_view());
url->m_paths[0] = buffer.string_view();
url->m_fragment = "";
state = State::Fragment;
} else {
@ -665,8 +658,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
if (code_point != end_of_file) {
URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control);
} else {
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->m_paths[0] = URL::percent_decode(buffer.string_view());
url->m_paths[0] = buffer.string_view();
}
}
break;
@ -696,8 +688,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
// FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
buffer.append_code_point(code_point);
} else {
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
url->m_fragment = URL::percent_decode(buffer.string_view());
url->m_fragment = buffer.string_view();
buffer.clear();
}
break;