1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-30 01:37:36 +00:00

AK: Serialize URL hosts with 'concept-host-serializer'

In order to follow spec text to achieve this, we need to change the
underlying representation of a host in AK::URL to deserialized format.
Before this, we were parsing the host and then immediately serializing
it again.

Making that change resulted in a whole bunch of fallout.

After this change, callers can access the serialized data through
this concept-host-serializer. The functional end result of this
change is that IPv6 hosts are now correctly serialized to be
surrounded with '[' and ']'.
This commit is contained in:
Shannon Booth 2023-07-27 21:40:41 +12:00 committed by Andreas Kling
parent 768f070b86
commit 8751be09f9
36 changed files with 175 additions and 143 deletions

View file

@ -101,12 +101,18 @@ void URL::set_password(DeprecatedString password, ApplyPercentEncoding apply_per
m_valid = compute_validity();
}
void URL::set_host(DeprecatedString host)
void URL::set_host(Host host)
{
m_host = move(host);
m_valid = compute_validity();
}
// https://url.spec.whatwg.org/#concept-host-serializer
ErrorOr<String> URL::serialized_host() const
{
return URLParser::serialize_host(m_host);
}
void URL::set_port(Optional<u16> port)
{
if (port == default_port_for_scheme(m_scheme)) {
@ -157,7 +163,7 @@ bool URL::cannot_have_a_username_or_password_or_port() const
{
// A URL cannot have a username/password/port if its host is null or the empty string, or its scheme is "file".
// FIXME: The spec does not mention anything to do with 'cannot be a base URL'.
return m_host.is_null() || m_host.is_empty() || m_cannot_be_a_base_url || m_scheme == "file"sv;
return m_host.has<Empty>() || m_host == String {} || m_cannot_be_a_base_url || m_scheme == "file"sv;
}
// FIXME: This is by no means complete.
@ -192,7 +198,7 @@ bool URL::compute_validity() const
}
// NOTE: A file URL's host should be the empty string for localhost, not null.
if (m_scheme == "file" && m_host.is_null())
if (m_scheme == "file" && m_host.has<Empty>())
return false;
return true;
@ -227,7 +233,7 @@ URL URL::create_with_file_scheme(DeprecatedString const& path, DeprecatedString
url.set_scheme("file");
// NOTE: If the hostname is localhost (or null, which implies localhost), it should be set to the empty string.
// This is because a file URL always needs a non-null hostname.
url.set_host(hostname.is_null() || hostname == "localhost" ? DeprecatedString::empty() : hostname);
url.set_host(hostname.is_null() || hostname == "localhost" ? String {} : String::from_deprecated_string(hostname).release_value_but_fixme_should_propagate_errors());
url.set_paths(lexical_path.parts());
if (path.ends_with('/'))
url.append_slash();
@ -243,7 +249,8 @@ URL URL::create_with_help_scheme(DeprecatedString const& path, DeprecatedString
url.set_scheme("help");
// NOTE: If the hostname is localhost (or null, which implies localhost), it should be set to the empty string.
// This is because a file URL always needs a non-null hostname.
url.set_host(hostname.is_null() || hostname == "localhost" ? DeprecatedString::empty() : hostname);
url.set_host(hostname.is_null() || hostname == "localhost" ? String {} : String::from_deprecated_string(hostname).release_value_but_fixme_should_propagate_errors());
url.set_paths(lexical_path.parts());
if (path.ends_with('/'))
url.append_slash();
@ -309,7 +316,7 @@ DeprecatedString URL::serialize(ExcludeFragment exclude_fragment) const
output.append(':');
// 2. If urls host is non-null:
if (!m_host.is_null()) {
if (!m_host.has<Empty>()) {
// 1. Append "//" to output.
output.append("//"sv);
@ -329,7 +336,7 @@ DeprecatedString URL::serialize(ExcludeFragment exclude_fragment) const
}
// 3. Append urls host, serialized, to output.
output.append(m_host);
output.append(serialized_host().release_value_but_fixme_should_propagate_errors());
// 4. If urls port is non-null, append U+003A (:) followed by urls port, serialized, to output.
if (m_port.has_value())
@ -342,7 +349,7 @@ DeprecatedString URL::serialize(ExcludeFragment exclude_fragment) const
if (cannot_be_a_base_url()) {
output.append(m_paths[0]);
} else {
if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty())
if (m_host.has<Empty>() && m_paths.size() > 1 && m_paths[0].is_empty())
output.append("/."sv);
for (auto& segment : m_paths) {
output.append('/');
@ -379,9 +386,9 @@ DeprecatedString URL::serialize_for_display() const
builder.append(m_scheme);
builder.append(':');
if (!m_host.is_null()) {
if (!m_host.has<Empty>()) {
builder.append("//"sv);
builder.append(m_host);
builder.append(serialized_host().release_value_but_fixme_should_propagate_errors());
if (m_port.has_value())
builder.appendff(":{}", *m_port);
}
@ -389,7 +396,7 @@ DeprecatedString URL::serialize_for_display() const
if (cannot_be_a_base_url()) {
builder.append(m_paths[0]);
} else {
if (m_host.is_null() && m_paths.size() > 1 && m_paths[0].is_empty())
if (m_host.has<Empty>() && m_paths.size() > 1 && m_paths[0].is_empty())
builder.append("/."sv);
for (auto& segment : m_paths) {
builder.append('/');
@ -437,7 +444,7 @@ DeprecatedString URL::serialize_origin() const
StringBuilder builder;
builder.append(m_scheme);
builder.append("://"sv);
builder.append(m_host);
builder.append(serialized_host().release_value_but_fixme_should_propagate_errors());
if (m_port.has_value())
builder.appendff(":{}", *m_port);
return builder.to_deprecated_string();

View file

@ -79,7 +79,8 @@ public:
DeprecatedString const& scheme() const { return m_scheme; }
DeprecatedString username(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const;
DeprecatedString password(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const;
DeprecatedString const& host() const { return m_host; }
Host const& host() const { return m_host; }
ErrorOr<String> serialized_host() const;
DeprecatedString basename(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const;
DeprecatedString query(ApplyPercentDecoding = ApplyPercentDecoding::No) const;
DeprecatedString fragment(ApplyPercentDecoding = ApplyPercentDecoding::Yes) const;
@ -101,7 +102,7 @@ public:
void set_scheme(DeprecatedString);
void set_username(DeprecatedString, ApplyPercentEncoding = ApplyPercentEncoding::Yes);
void set_password(DeprecatedString, ApplyPercentEncoding = ApplyPercentEncoding::Yes);
void set_host(DeprecatedString);
void set_host(Host);
void set_port(Optional<u16>);
void set_paths(Vector<DeprecatedString>, ApplyPercentEncoding = ApplyPercentEncoding::Yes);
void set_query(DeprecatedString, ApplyPercentEncoding = ApplyPercentEncoding::Yes);
@ -178,7 +179,7 @@ private:
DeprecatedString m_password;
// A URLs host is null or a host. It is initially null.
DeprecatedString m_host;
Host m_host;
// A URLs port is either null or a 16-bit unsigned integer that identifies a networking port. It is initially null.
Optional<u16> m_port;

View file

@ -32,7 +32,7 @@ static void report_validation_error(SourceLocation const& location = SourceLocat
dbgln_if(URL_PARSER_DEBUG, "URLParser::basic_parse: Validation error! {}", location);
}
static Optional<DeprecatedString> parse_opaque_host(StringView input)
static Optional<URL::Host> parse_opaque_host(StringView input)
{
auto forbidden_host_characters_excluding_percent = "\0\t\n\r #/:<>?@[\\]^|"sv;
for (auto character : forbidden_host_characters_excluding_percent) {
@ -43,7 +43,7 @@ static Optional<DeprecatedString> parse_opaque_host(StringView input)
}
// FIXME: If input contains a code point that is not a URL code point and not U+0025 (%), validation error.
// FIXME: If input contains a U+0025 (%) and the two code points following it are not ASCII hex digits, validation error.
return URL::percent_encode(input, URL::PercentEncodeSet::C0Control);
return String::from_deprecated_string(URL::percent_encode(input, URL::PercentEncodeSet::C0Control)).release_value_but_fixme_should_propagate_errors();
}
struct ParsedIPv4Number {
@ -549,7 +549,7 @@ static bool ends_in_a_number_checker(StringView input)
// https://url.spec.whatwg.org/#concept-host-parser
// NOTE: This is a very bare-bones implementation.
static Optional<DeprecatedString> parse_host(StringView input, bool is_not_special = false)
static Optional<URL::Host> parse_host(StringView input, bool is_not_special = false)
{
// 1. If input starts with U+005B ([), then:
if (input.starts_with('[')) {
@ -563,10 +563,7 @@ static Optional<DeprecatedString> parse_host(StringView input, bool is_not_speci
auto address = parse_ipv6_address(input.substring_view(1, input.length() - 2));
if (!address.has_value())
return {};
StringBuilder output;
serialize_ipv6_address(*address, output);
return output.to_deprecated_string();
return address.release_value();
}
// 2. If isNotSpecial is true, then return the result of opaque-host parsing input.
@ -581,12 +578,16 @@ static Optional<DeprecatedString> parse_host(StringView input, bool is_not_speci
// FIXME: 5. Let asciiDomain be the result of running domain to ASCII on domain.
// FIXME: 6. If asciiDomain is failure, then return failure.
auto& ascii_domain = domain;
auto ascii_domain_or_error = String::from_deprecated_string(domain);
if (ascii_domain_or_error.is_error())
return {};
auto ascii_domain = ascii_domain_or_error.release_value();
// 7. If asciiDomain contains a forbidden domain code point, domain-invalid-code-point validation error, return failure.
auto forbidden_host_characters = "\0\t\n\r #%/:<>?@[\\]^|"sv;
for (auto character : forbidden_host_characters) {
if (ascii_domain.view().contains(character)) {
if (ascii_domain.bytes_as_string_view().contains(character)) {
report_validation_error();
return {};
}
@ -598,11 +599,7 @@ static Optional<DeprecatedString> parse_host(StringView input, bool is_not_speci
if (!ipv4_host.has_value())
return {};
auto result = serialize_ipv4_address(*ipv4_host);
if (result.is_error())
return {};
return result.release_value().to_deprecated_string();
return ipv4_host.release_value();
}
// 9. Return asciiDomain.
@ -880,7 +877,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
return *url;
// 4. If urls scheme is "file" and its host is an empty host, then return.
if (url->scheme() == "file"sv && url->host().is_empty())
if (url->scheme() == "file"sv && url->host() == String {})
return *url;
}
@ -1319,7 +1316,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
url->m_scheme = "file";
// 2. Set urls host to the empty string.
url->m_host = "";
url->m_host = String {};
// 3. If c is U+002F (/) or U+005C (\), then:
if (code_point == '/' || code_point == '\\') {
@ -1422,7 +1419,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
// 2. Otherwise, if buffer is the empty string, then:
else if (buffer.is_empty()) {
// 1. Set urls host to the empty string.
url->m_host = "";
url->m_host = String {};
// 2. If state override is given, then return.
if (state_override.has_value())
@ -1442,8 +1439,8 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
return {};
// 3. If host is "localhost", then set host to the empty string.
if (host.value() == "localhost")
host = "";
if (host.value().has<String>() && host.value().get<String>() == "localhost"sv)
host = String {};
// 4. Set urls host to host.
url->m_host = host.release_value();
@ -1498,7 +1495,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional<URL> const& base_url,
continue;
}
// 5. Otherwise, if state override is given and urls host is null, append the empty string to urls path.
else if (state_override.has_value() && url->host().is_empty()) {
else if (state_override.has_value() && url->host().has<Empty>()) {
url->append_slash();
}
break;