From e233de1e48d00197d761d31636c4fc3f35d912f0 Mon Sep 17 00:00:00 2001 From: Shannon Booth Date: Sun, 13 Aug 2023 11:17:02 +1200 Subject: [PATCH] AK: Report validation error for URLs with invalid percent encodings This part of the spec is mostly useful for our debugging for now, but could eventually be hooked up so that the user can see any reported validation errors. --- AK/URLParser.cpp | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/AK/URLParser.cpp b/AK/URLParser.cpp index 95188ff7c5..c908672f93 100644 --- a/AK/URLParser.cpp +++ b/AK/URLParser.cpp @@ -786,6 +786,10 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, return input.substring_view(iterator - input.begin() + iterator.underlying_code_point_length_in_bytes()).as_string(); }; + auto remaining_starts_with_two_ascii_hex_digits = [&]() { + return is_ascii_hex_digit(iterator.peek(1).value_or(end_of_file)) && is_ascii_hex_digit(iterator.peek(2).value_or(end_of_file)); + }; + // 9. Keep running the following state machine by switching on state. If after a run pointer points to the EOF code point, go to the next step. Otherwise, increase pointer by 1 and continue with the state machine. // NOTE: "continue" should only be used to prevent incrementing the iterator, as this is done at the end of the loop. // ++iterator : "increase pointer by 1" @@ -1528,7 +1532,9 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, if (!is_url_code_point(code_point) && code_point != '%') report_validation_error(); - // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. + // 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. + if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits()) + report_validation_error(); // 3. UTF-8 percent-encode c using the path percent-encode set and append the result to buffer. URL::append_percent_encoded_if_necessary(buffer, code_point, URL::PercentEncodeSet::Path); @@ -1560,7 +1566,9 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, if (code_point != end_of_file && !is_url_code_point(code_point) && code_point != '%') report_validation_error(); - // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. + // 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. + if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits()) + report_validation_error(); // 3. If c is not the EOF code point, UTF-8 percent-encode c using the C0 control percent-encode set and append the result to url’s path. if (code_point != end_of_file) { @@ -1606,7 +1614,9 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, if (!is_url_code_point(code_point) && code_point != '%') report_validation_error(); - // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. + // 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. + if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits()) + report_validation_error(); // 3. Append c to buffer. buffer.append_code_point(code_point); @@ -1621,7 +1631,9 @@ URL URLParser::basic_parse(StringView raw_input, Optional const& base_url, if (!is_url_code_point(code_point) && code_point != '%') report_validation_error(); - // FIXME: 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. + // 2. If c is U+0025 (%) and remaining does not start with two ASCII hex digits, validation error. + if (code_point == '%' && !remaining_starts_with_two_ascii_hex_digits()) + report_validation_error(); // FIXME: 3. UTF-8 percent-encode c using the fragment percent-encode set and append the result to url’s fragment. buffer.append_code_point(code_point);