From 177b04dcfc064933a269bc05be64631ac90954d7 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Tue, 25 Jul 2023 19:43:00 +1200 Subject: [PATCH] AK: Fix url host parsing check for 'ends in a number' I misunderstood the spec step for checking whether the host 'ends with a number'. We can't simply check for it if ends with a number, this check is actually an algorithm which is required to avoid detecting hosts that end with a number from an IPv4 host. Implement this missing step, and add a test to cover this. --- AK/URLParser.cpp | 33 ++++++++++++++++++++++++++++++++- Tests/AK/TestURL.cpp | 10 ++++++++++ 2 files changed, 42 insertions(+), 1 deletion(-) diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp index 4204cbcd1f..9e818aaaae 100644 --- a/AK/URLParser.cpp +++ b/AK/URLParser.cpp @@ -518,6 +518,37 @@ static Optional> parse_ipv6_address(StringView input) return address; } +// https://url.spec.whatwg.org/#ends-in-a-number-checker +static bool ends_in_a_number_checker(StringView input) +{ + // 1. Let parts be the result of strictly splitting input on U+002E (.). + auto parts = input.split_view("."sv, SplitBehavior::KeepEmpty); + + // 2. If the last item in parts is the empty string, then: + if (parts.last().is_empty()) { + // 1. If parts’s size is 1, then return false. + if (parts.size() == 1) + return false; + + // 2. Remove the last item from parts. + parts.take_last(); + } + + // 3. Let last be the last item in parts. + auto last = parts.last(); + + // 4. If last is non-empty and contains only ASCII digits, then return true. + if (!last.is_empty() && all_of(last, is_ascii_digit)) + return true; + + // 5. If parsing last as an IPv4 number does not return failure, then return true. + if (parse_ipv4_number(last).has_value()) + return true; + + // 6. Return false. + return false; +} + // https://url.spec.whatwg.org/#concept-host-parser // NOTE: This is a very bare-bones implementation. static Optional parse_host(StringView input, bool is_not_special = false) @@ -565,7 +596,7 @@ static Optional parse_host(StringView input, bool is_not_speci } // 8. If asciiDomain ends in a number, then return the result of IPv4 parsing asciiDomain. - if (is_ascii_digit(ascii_domain[ascii_domain.length() - 1])) { + if (ends_in_a_number_checker(ascii_domain)) { auto ipv4_host = parse_ipv4_address(ascii_domain); if (!ipv4_host.has_value()) return {}; diff --git a/Tests/AK/TestURL.cpp b/Tests/AK/TestURL.cpp index cca71b31b8..a97abe28a6 100644 --- a/Tests/AK/TestURL.cpp +++ b/Tests/AK/TestURL.cpp @@ -38,6 +38,16 @@ TEST_CASE(basic) EXPECT(url.query().is_null()); EXPECT(url.fragment().is_null()); } + { + URL url("https://www.serenityos.org1/index.html"sv); + EXPECT_EQ(url.is_valid(), true); + EXPECT_EQ(url.scheme(), "https"); + EXPECT_EQ(url.host(), "www.serenityos.org1"); + EXPECT_EQ(url.port_or_default(), 443); + EXPECT_EQ(url.serialize_path(), "/index.html"); + EXPECT(url.query().is_null()); + EXPECT(url.fragment().is_null()); + } { URL url("https://localhost:1234/~anon/test/page.html"sv); EXPECT_EQ(url.is_valid(), true);