From 99d5555134180d5f8795c31f1c3afbc012d8ec65 Mon Sep 17 00:00:00 2001 From: Max Wipfli Date: Thu, 3 Jun 2021 12:36:02 +0200 Subject: [PATCH] AK: Do not trim away non-ASCII bytes when parsing URL Because non-ASCII code points have negative byte values, trimming away control characters requires checking for negative bytes values. This also adds a test case with a URL containing non-ASCII code points. --- AK/URLParser.cpp | 4 ++-- Tests/AK/TestURL.cpp | 9 +++++++++ 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp index 06e3723b18..daedbc50c9 100644 --- a/AK/URLParser.cpp +++ b/AK/URLParser.cpp @@ -174,7 +174,7 @@ URL URLParser::parse(Badge, StringView const& raw_input, URL const* base_ur size_t start_index = 0; size_t end_index = raw_input.length(); for (size_t i = 0; i < raw_input.length(); ++i) { - if (raw_input[i] <= 0x20) { + if (0 <= raw_input[i] && raw_input[i] <= 0x20) { ++start_index; has_validation_error = true; } else { @@ -182,7 +182,7 @@ URL URLParser::parse(Badge, StringView const& raw_input, URL const* base_ur } } for (ssize_t i = raw_input.length() - 1; i >= 0; --i) { - if (raw_input[i] <= 0x20) { + if (0 <= raw_input[i] && raw_input[i] <= 0x20) { --end_index; has_validation_error = true; } else { diff --git a/Tests/AK/TestURL.cpp b/Tests/AK/TestURL.cpp index 9fff681525..1fc2a0679a 100644 --- a/Tests/AK/TestURL.cpp +++ b/Tests/AK/TestURL.cpp @@ -328,3 +328,12 @@ TEST_CASE(leading_and_trailing_whitespace) EXPECT(url.is_valid()); EXPECT_EQ(url.to_string(), "https://foo.com/"); } + +TEST_CASE(unicode) +{ + URL url { "http://example.com/_ünicöde_téxt_©" }; + EXPECT(url.is_valid()); + EXPECT_EQ(url.path(), "/_ünicöde_téxt_©"); + EXPECT(url.query().is_null()); + EXPECT(url.fragment().is_null()); +}