From 453dd0cf442a63c630b1e626fae5539506020ba9 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Sun, 17 Sep 2023 13:15:52 +1200 Subject: [PATCH] AK: Properly implement steps for shortening a URLs path Instead of implementing this inline, put it into a function. Use this new function to correctly implement shortening paths for some places where this logic was previously missing. Before these changes, the pathname for the included test was incorrectly being set to '/' as we were not considering the windows drive letter. --- AK/URLParser.cpp | 27 ++++++++++++++++++++------ AK/URLParser.h | 4 ++++ Tests/LibWeb/Text/expected/URL/url.txt | 10 ++++++++++ Tests/LibWeb/Text/input/URL/url.html | 1 + 4 files changed, 36 insertions(+), 6 deletions(-) diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp index 9e3849739b..bbd80f04bc 100644 --- a/AK/URLParser.cpp +++ b/AK/URLParser.cpp @@ -683,6 +683,24 @@ constexpr bool is_double_dot_path_segment(StringView input) return input == ".."sv || input.equals_ignoring_ascii_case(".%2e"sv) || input.equals_ignoring_ascii_case("%2e."sv) || input.equals_ignoring_ascii_case("%2e%2e"sv); } +// https://url.spec.whatwg.org/#shorten-a-urls-path +void URLParser::shorten_urls_path(URL& url) +{ + // 1. Assert: url does not have an opaque path. + VERIFY(!url.cannot_be_a_base_url()); + + // 2. Let path be url’s path. + auto& path = url.m_paths; + + // 3. If url’s scheme is "file", path’s size is 1, and path[0] is a normalized Windows drive letter, then return. + if (url.scheme() == "file" && path.size() == 1 && is_normalized_windows_drive_letter(path[0])) + return; + + // 4. Remove path’s last item, if any. + if (!path.is_empty()) + path.take_last(); +} + // https://url.spec.whatwg.org/#string-percent-encode-after-encoding ErrorOr URLParser::percent_encode_after_encoding(StringView input, URL::PercentEncodeSet percent_encode_set, bool space_as_plus) { @@ -1025,8 +1043,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, url->m_query = {}; // 2. Shorten url’s path. - if (url->m_paths.size()) - url->m_paths.remove(url->m_paths.size() - 1); + shorten_urls_path(*url); // 3. Set state to path state and decrease pointer by 1. state = State::Path; @@ -1337,8 +1354,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, // 2. If the code point substring from pointer to the end of input does not start with a Windows drive letter, then shorten url’s path. auto substring_from_pointer = input.substring_view(iterator - input.begin()).as_string(); if (!starts_with_windows_drive_letter(substring_from_pointer)) { - if (!url->m_paths.is_empty() && !(url->scheme() == "file" && url->m_paths.size() == 1 && is_normalized_windows_drive_letter(url->m_paths[0]))) - url->m_paths.remove(url->m_paths.size() - 1); + shorten_urls_path(*url); } // 3. Otherwise: else { @@ -1505,8 +1521,7 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, // 2. If buffer is a double-dot URL path segment, then: if (is_double_dot_path_segment(buffer.string_view())) { // 1. Shorten url’s path. - if (!url->m_paths.is_empty()) - url->m_paths.remove(url->m_paths.size() - 1); + shorten_urls_path(*url); // 2. If neither c is U+002F (/), nor url is special and c is U+005C (\), append the empty string to url’s path. if (code_point != '/' && !(url->is_special() && code_point == '\\')) diff --git a/AK/URLParser.h b/AK/URLParser.h index de6f945761..e43ed9a52e 100644 --- a/AK/URLParser.h +++ b/AK/URLParser.h @@ -1,5 +1,6 @@ /* * Copyright (c) 2021, Max Wipfli + * Copyright (c) 2023, Shannon Booth * * SPDX-License-Identifier: BSD-2-Clause */ @@ -63,6 +64,9 @@ public: // https://url.spec.whatwg.org/#concept-host-serializer static ErrorOr serialize_host(URL::Host const&); + + // https://url.spec.whatwg.org/#shorten-a-urls-path + static void shorten_urls_path(URL&); }; #undef ENUMERATE_STATES diff --git a/Tests/LibWeb/Text/expected/URL/url.txt b/Tests/LibWeb/Text/expected/URL/url.txt index e2a1dd214d..1129f80750 100644 --- a/Tests/LibWeb/Text/expected/URL/url.txt +++ b/Tests/LibWeb/Text/expected/URL/url.txt @@ -68,3 +68,13 @@ port => '' pathname => '/hello' search => '' hash => '' +new URL('//d:/..', 'file:///C:/a/b') +protocol => 'file:' +username => '' +password => '' +host => '' +hostname => '' +port => '' +pathname => '/d:/' +search => '' +hash => '' diff --git a/Tests/LibWeb/Text/input/URL/url.html b/Tests/LibWeb/Text/input/URL/url.html index 17fc181aeb..3994ab61c2 100644 --- a/Tests/LibWeb/Text/input/URL/url.html +++ b/Tests/LibWeb/Text/input/URL/url.html @@ -27,6 +27,7 @@ { input: 'unknown://serenityos.org:0' }, { input: 'http://serenityos.org/cat?dog#meow"woof' }, { input: '/hello', base: 'file://friends/' }, + { input: '//d:/..', base: 'file:///C:/a/b' }, ]) { printURL(url.input, url.base); }