From 80238f29d20f51713153576dc8280538e0f3657e Mon Sep 17 00:00:00 2001 From: Tim Ledbetter Date: Thu, 5 Oct 2023 18:42:27 +0100 Subject: [PATCH] LibIMAP: Handle invalid escape sequences in Quoted-Printable parser When an invalid escape sequence is encountered, we now output the characters unaltered instead of crashing. --- Tests/LibIMAP/TestQuotedPrintable.cpp | 13 +++++++ .../Libraries/LibIMAP/QuotedPrintable.cpp | 36 ++++++++++++------- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/Tests/LibIMAP/TestQuotedPrintable.cpp b/Tests/LibIMAP/TestQuotedPrintable.cpp index 16feb08e44..9a97ce61ca 100644 --- a/Tests/LibIMAP/TestQuotedPrintable.cpp +++ b/Tests/LibIMAP/TestQuotedPrintable.cpp @@ -46,4 +46,17 @@ TEST_CASE(test_decode) auto illegal_character_decode = MUST(IMAP::decode_quoted_printable(illegal_character_builder.to_deprecated_string())); EXPECT(illegal_character_decode.is_empty()); + + // If an escape sequence is invalid the characters are output unaltered. Illegal characters are ignored as usual. + decode_equal("="sv, "="sv); + decode_equal("=Z"sv, "=Z"sv); + decode_equal("=\x7F"sv, "="sv); + decode_equal("=\x7F\x7F"sv, "="sv); + decode_equal("=A\x7F"sv, "=A"sv); + decode_equal("=A"sv, "=A"sv); + decode_equal("=AZ"sv, "=AZ"sv); + decode_equal("=\r"sv, "=\r"sv); + decode_equal("=\r\r"sv, "=\r\r"sv); + decode_equal("=\n\r"sv, "=\n\r"sv); + decode_equal("=\rA"sv, "=\rA"sv); } diff --git a/Userland/Libraries/LibIMAP/QuotedPrintable.cpp b/Userland/Libraries/LibIMAP/QuotedPrintable.cpp index 7e0fd50939..27a3fc7cdf 100644 --- a/Userland/Libraries/LibIMAP/QuotedPrintable.cpp +++ b/Userland/Libraries/LibIMAP/QuotedPrintable.cpp @@ -22,6 +22,18 @@ ErrorOr decode_quoted_printable(StringView input) GenericLexer lexer(input); StringBuilder output; + // For any invalid escape sequence. RFC 2045 says a reasonable solution is just to append '=' followed by the unaltered escape characters. + auto append_invalid_escape_sequence = [&](Optional first = {}, Optional second = {}) -> ErrorOr { + TRY(output.try_append('=')); + if (first.has_value() && !is_illegal_character(first.value())) + TRY(output.try_append(first.value())); + + if (second.has_value() && !is_illegal_character(second.value())) + TRY(output.try_append(second.value())); + + return {}; + }; + // NOTE: The RFC says that encoded lines must not be longer than 76 characters. // However, the RFC says implementations can ignore this and parse as is, // which is the approach we're taking. @@ -34,7 +46,8 @@ ErrorOr decode_quoted_printable(StringView input) if (potential_character == '=') { if (lexer.is_eof()) { - TODO(); + TRY(append_invalid_escape_sequence()); + continue; } char first_escape_character = lexer.consume(); @@ -43,7 +56,8 @@ ErrorOr decode_quoted_printable(StringView input) // Thus we can use is_ascii_hex_digit. if (is_ascii_hex_digit(first_escape_character)) { if (lexer.is_eof()) { - TODO(); + TRY(append_invalid_escape_sequence(first_escape_character)); + continue; } char second_escape_character = lexer.consume(); @@ -52,11 +66,13 @@ ErrorOr decode_quoted_printable(StringView input) u8 actual_character = (parse_ascii_hex_digit(first_escape_character) << 4) | parse_ascii_hex_digit(second_escape_character); TRY(output.try_append(actual_character)); } else { - TODO(); + TRY(append_invalid_escape_sequence(first_escape_character, second_escape_character)); + continue; } } else if (first_escape_character == '\r') { if (lexer.is_eof()) { - TODO(); + TRY(append_invalid_escape_sequence(first_escape_character)); + continue; } char second_escape_character = lexer.consume(); @@ -64,16 +80,12 @@ ErrorOr decode_quoted_printable(StringView input) if (second_escape_character == '\n') { // This is a soft line break. Don't append anything to the output. } else { - TODO(); + TRY(append_invalid_escape_sequence(first_escape_character, second_escape_character)); + continue; } } else { - if (is_illegal_character(first_escape_character)) { - TODO(); - } - - // Invalid escape sequence. RFC 2045 says a reasonable solution is just to append '=' followed by the character. - TRY(output.try_append('=')); - TRY(output.try_append(first_escape_character)); + TRY(append_invalid_escape_sequence(first_escape_character)); + continue; } } else { TRY(output.try_append(potential_character));