mirror of
https://github.com/RGBCube/serenity
synced 2025-07-26 07:47:37 +00:00
AK: Don't store parts of URLs percent decoded
As noted in serval comments doing this goes against the WC3 spec, and breaks parsing then re-serializing URLs that contain percent encoded data, that was not encoded using the same character set as the serializer. For example, previously if you had a URL like: https:://foo.com/what%2F%2F (the path is what + '//' percent encoded) Creating URL("https:://foo.com/what%2F%2F").serialize() would return: https://foo.com/what// Which is incorrect and not the same as the URL we passed. This is because the re-serializing uses the PercentEncodeSet::Path which does not include '/'. Only doing the percent encoding in the setters fixes this, which is required to navigate to Google Street View (which includes a percent encoded URL in its URL). Seems to fix #13477 too
This commit is contained in:
parent
d2fc8efd9e
commit
8283e8b88c
5 changed files with 85 additions and 51 deletions
|
@ -194,9 +194,6 @@ Optional<URL> URLParser::parse_data_url(StringView raw_input)
|
|||
// future for validation of URLs, which would then lead to infinite recursion.
|
||||
// The same goes for base_url, because e.g. the port() getter does not always return m_port, and we are interested in the underlying member
|
||||
// variables' values here, not what the URL class presents to its users.
|
||||
// NOTE: Since the URL class's member variables contain percent decoded data, we have to deviate from the URL parser specification when setting
|
||||
// some of those values. Because the specification leaves all values percent encoded in their URL data structure, we have to percent decode
|
||||
// everything before setting the member variables.
|
||||
URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Optional<URL> url, Optional<State> state_override)
|
||||
{
|
||||
dbgln_if(URL_PARSER_DEBUG, "URLParser::parse: Parsing '{}'", raw_input);
|
||||
|
@ -310,7 +307,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
++iterator;
|
||||
} else {
|
||||
url->m_cannot_be_a_base_url = true;
|
||||
url->append_path("");
|
||||
url->append_slash();
|
||||
state = State::CannotBeABaseUrlPath;
|
||||
}
|
||||
} else {
|
||||
|
@ -441,13 +438,11 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
if (password_token_seen) {
|
||||
builder.append(url->password());
|
||||
URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
|
||||
// NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
|
||||
url->m_password = URL::percent_decode(builder.string_view());
|
||||
url->m_password = builder.string_view();
|
||||
} else {
|
||||
builder.append(url->username());
|
||||
URL::append_percent_encoded_if_necessary(builder, c, URL::PercentEncodeSet::Userinfo);
|
||||
// NOTE: This is has to be encoded and then decoded because the original sequence could contain already percent-encoded sequences.
|
||||
url->m_username = URL::percent_decode(builder.string_view());
|
||||
url->m_username = builder.string_view();
|
||||
}
|
||||
}
|
||||
buffer.clear();
|
||||
|
@ -561,7 +556,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
url->m_host = base_url->m_host;
|
||||
auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string();
|
||||
if (!starts_with_windows_drive_letter(substring_from_pointer) && is_normalized_windows_drive_letter(base_url->m_paths[0]))
|
||||
url->append_path(base_url->m_paths[0]);
|
||||
url->append_path(base_url->m_paths[0], URL::ApplyPercentEncoding::No);
|
||||
state = State::Path;
|
||||
continue;
|
||||
}
|
||||
|
@ -616,9 +611,9 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
if (!url->m_paths.is_empty() && !(url->m_scheme == "file" && url->m_paths.size() == 1 && is_normalized_windows_drive_letter(url->m_paths[0])))
|
||||
url->m_paths.remove(url->m_paths.size() - 1);
|
||||
if (code_point != '/' && !(url->is_special() && code_point == '\\'))
|
||||
url->append_path("");
|
||||
url->append_slash();
|
||||
} else if (is_single_dot_path_segment(buffer.string_view()) && code_point != '/' && !(url->is_special() && code_point == '\\')) {
|
||||
url->append_path("");
|
||||
url->append_slash();
|
||||
} else if (!is_single_dot_path_segment(buffer.string_view())) {
|
||||
if (url->m_scheme == "file" && url->m_paths.is_empty() && is_windows_drive_letter(buffer.string_view())) {
|
||||
auto drive_letter = buffer.string_view()[0];
|
||||
|
@ -626,8 +621,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
buffer.append(drive_letter);
|
||||
buffer.append(':');
|
||||
}
|
||||
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
|
||||
url->append_path(URL::percent_decode(buffer.string_view()));
|
||||
url->append_path(buffer.string_view(), URL::ApplyPercentEncoding::No);
|
||||
}
|
||||
buffer.clear();
|
||||
if (code_point == '?') {
|
||||
|
@ -649,13 +643,12 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
// NOTE: Verify that the assumptions required for this simplification are correct.
|
||||
VERIFY(url->m_paths.size() == 1 && url->m_paths[0].is_empty());
|
||||
if (code_point == '?') {
|
||||
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
|
||||
url->m_paths[0] = URL::percent_decode(buffer.string_view());
|
||||
url->m_paths[0] = buffer.string_view();
|
||||
url->m_query = "";
|
||||
state = State::Query;
|
||||
} else if (code_point == '#') {
|
||||
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
|
||||
url->m_paths[0] = URL::percent_decode(buffer.string_view());
|
||||
url->m_paths[0] = buffer.string_view();
|
||||
url->m_fragment = "";
|
||||
state = State::Fragment;
|
||||
} else {
|
||||
|
@ -665,8 +658,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
if (code_point != end_of_file) {
|
||||
URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::C0Control);
|
||||
} else {
|
||||
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
|
||||
url->m_paths[0] = URL::percent_decode(buffer.string_view());
|
||||
url->m_paths[0] = buffer.string_view();
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -696,8 +688,7 @@ URL URLParser::parse(StringView raw_input, Optional<URL> const& base_url, Option
|
|||
// FIXME: If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error.
|
||||
buffer.append_code_point(code_point);
|
||||
} else {
|
||||
// NOTE: This needs to be percent decoded since the member variables contain decoded data.
|
||||
url->m_fragment = URL::percent_decode(buffer.string_view());
|
||||
url->m_fragment = buffer.string_view();
|
||||
buffer.clear();
|
||||
}
|
||||
break;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue