diff --git a/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp b/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp index f6cac4a5ed..244db6f262 100644 --- a/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp +++ b/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.cpp @@ -425,8 +425,16 @@ bool is_cors_safelisted_request_header(Header const& header) } // `range` else if (name.equals_ignoring_case("range"sv)) { - // If value is not a simple range header value, then return false. - if (!is_simple_range_header_value(value)) + // 1. Let rangeValue be the result of parsing a single range header value given value. + auto range_value = parse_single_range_header_value(value); + + // 2. If rangeValue is failure, then return false. + if (!range_value.has_value()) + return false; + + // 3. If rangeValue[0] is null, then return false. + // NOTE: As web browsers have historically not emitted ranges such as `bytes=-500` this algorithm does not safelist them. + if (!range_value->start.has_value()) return false; } // Otherwise @@ -616,16 +624,14 @@ bool is_request_body_header_name(ReadonlyBytes header_name) // TODO: https://fetch.spec.whatwg.org/#extract-header-list-values // https://fetch.spec.whatwg.org/#simple-range-header-value -bool is_simple_range_header_value(ReadonlyBytes value) +Optional parse_single_range_header_value(ReadonlyBytes value) { - // To determine if a byte sequence value is a simple range header value, perform the following steps. They return a boolean. - // 1. Let data be the isomorphic decoding of value. auto data = StringView { value }; - // 2. If data does not start with "bytes=", then return false. + // 2. If data does not start with "bytes=", then return failure. if (!data.starts_with("bytes="sv)) - return false; + return {}; // 3. Let position be a position variable for data, initially pointing at the 6th code point of data. auto lexer = GenericLexer { data }; @@ -634,34 +640,35 @@ bool is_simple_range_header_value(ReadonlyBytes value) // 4. Let rangeStart be the result of collecting a sequence of code points that are ASCII digits, from data given position. auto range_start = lexer.consume_while(is_ascii_digit); - // FIXME: I believe the case of an empty rangeStart string (`bytes=-`) is not handled, - // and would later fail 'interpreted as decimal number'. - if (range_start.is_empty()) - return false; + // 5. Let rangeStartValue be rangeStart, interpreted as decimal number, if rangeStart is not the empty string; otherwise null. + auto range_start_value = range_start.to_uint(); - // 5. If the code point at position within data is not U+002D (-), then return false. - // 6. Advance position by 1. + // 6. If the code point at position within data is not U+002D (-), then return failure. + // 7. Advance position by 1. if (!lexer.consume_specific('-')) - return false; + return {}; - // 7. Let rangeEnd be the result of collecting a sequence of code points that are ASCII digits, from data given position. + // 8. Let rangeEnd be the result of collecting a sequence of code points that are ASCII digits, from data given position. auto range_end = lexer.consume_while(is_ascii_digit); - // 8. If position is not past the end of data, then return false. + // 9. Let rangeEndValue be rangeEnd, interpreted as decimal number, if rangeEnd is not the empty string; otherwise null. + auto range_end_value = range_end.to_uint(); + + // 10. If position is not past the end of data, then return failure. if (!lexer.is_eof()) - return false; + return {}; - // 9. If rangeEnd’s length is 0, then return true. - // NOTE: The range end can be omitted, e.g., `bytes=0-` is valid. - if (range_end.is_empty()) - return true; + // 11. If rangeEndValue and rangeStartValue are null, then return failure. + if (!range_end_value.has_value() && !range_start_value.has_value()) + return {}; - // 10. If rangeStart, interpreted as decimal number, is greater than rangeEnd, interpreted as decimal number, then return false. - if (*range_start.to_uint() > *range_end.to_uint()) - return false; + // 12. If rangeStartValue and rangeEndValue are numbers, and rangeStartValue is greater than rangeEndValue, then return failure. + if (range_start_value.has_value() && range_end_value.has_value() && *range_start_value > *range_end_value) + return {}; - // 11. Return true. - return true; + // 13. Return (rangeStartValue, rangeEndValue). + // NOTE: The range end or start can be omitted, e.g., `bytes=0-` or `bytes=-500` are valid ranges. + return RangeHeaderValue { move(range_start_value), move(range_end_value) }; } } diff --git a/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.h b/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.h index dda8bcbc95..4c4dfbecc6 100644 --- a/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.h +++ b/Userland/Libraries/LibWeb/Fetch/Infrastructure/HTTP/Headers.h @@ -45,6 +45,11 @@ public: [[nodiscard]] Optional extract_mime_type() const; }; +struct RangeHeaderValue { + Optional start; + Optional end; +}; + [[nodiscard]] ErrorOr> convert_header_names_to_a_sorted_lowercase_set(Span); [[nodiscard]] bool is_header_name(ReadonlyBytes); [[nodiscard]] bool is_header_value(ReadonlyBytes); @@ -60,6 +65,6 @@ public: [[nodiscard]] bool is_forbidden_header_name(ReadonlyBytes); [[nodiscard]] bool is_forbidden_response_header_name(ReadonlyBytes); [[nodiscard]] bool is_request_body_header_name(ReadonlyBytes); -[[nodiscard]] bool is_simple_range_header_value(ReadonlyBytes); +[[nodiscard]] Optional parse_single_range_header_value(ReadonlyBytes); }