1
Fork 0
mirror of https://github.com/RGBCube/serenity synced 2025-07-27 12:47:35 +00:00

AK: Do not trim away non-ASCII bytes when parsing URL

Because non-ASCII code points have negative byte values, trimming away
control characters requires checking for negative bytes values.

This also adds a test case with a URL containing non-ASCII code points.
This commit is contained in:
Max Wipfli 2021-06-03 12:36:02 +02:00 committed by Andreas Kling
parent 44937e2dfc
commit 99d5555134
2 changed files with 11 additions and 2 deletions

View file

@ -174,7 +174,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
size_t start_index = 0; size_t start_index = 0;
size_t end_index = raw_input.length(); size_t end_index = raw_input.length();
for (size_t i = 0; i < raw_input.length(); ++i) { for (size_t i = 0; i < raw_input.length(); ++i) {
if (raw_input[i] <= 0x20) { if (0 <= raw_input[i] && raw_input[i] <= 0x20) {
++start_index; ++start_index;
has_validation_error = true; has_validation_error = true;
} else { } else {
@ -182,7 +182,7 @@ URL URLParser::parse(Badge<URL>, StringView const& raw_input, URL const* base_ur
} }
} }
for (ssize_t i = raw_input.length() - 1; i >= 0; --i) { for (ssize_t i = raw_input.length() - 1; i >= 0; --i) {
if (raw_input[i] <= 0x20) { if (0 <= raw_input[i] && raw_input[i] <= 0x20) {
--end_index; --end_index;
has_validation_error = true; has_validation_error = true;
} else { } else {

View file

@ -328,3 +328,12 @@ TEST_CASE(leading_and_trailing_whitespace)
EXPECT(url.is_valid()); EXPECT(url.is_valid());
EXPECT_EQ(url.to_string(), "https://foo.com/"); EXPECT_EQ(url.to_string(), "https://foo.com/");
} }
TEST_CASE(unicode)
{
URL url { "http://example.com/_ünicöde_téxt_©" };
EXPECT(url.is_valid());
EXPECT_EQ(url.path(), "/_ünicöde_téxt_©");
EXPECT(url.query().is_null());
EXPECT(url.fragment().is_null());
}