mirror of
https://github.com/RGBCube/serenity
synced 2025-05-28 22:05:06 +00:00
LibIMAP+Mail: Implement RFC2047 message header encoding
This enables us to display email subject fields with non-ASCII characters in Mail :^)
This commit is contained in:
parent
34adf9eeae
commit
077a8058c3
7 changed files with 176 additions and 1 deletions
97
Userland/Libraries/LibIMAP/MessageHeaderEncoding.cpp
Normal file
97
Userland/Libraries/LibIMAP/MessageHeaderEncoding.cpp
Normal file
|
@ -0,0 +1,97 @@
|
|||
/*
|
||||
* Copyright (c) 2023, Valtteri Koskivuori <vkoskiv@gmail.com>
|
||||
*
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*/
|
||||
|
||||
#include "QuotedPrintable.h"
|
||||
#include <AK/Base64.h>
|
||||
#include <AK/GenericLexer.h>
|
||||
#include <AK/StringBuilder.h>
|
||||
#include <LibIMAP/MessageHeaderEncoding.h>
|
||||
#include <LibTextCodec/Decoder.h>
|
||||
|
||||
namespace IMAP {
|
||||
|
||||
ErrorOr<ByteBuffer> decode_rfc2047_encoded_words(StringView input)
|
||||
{
|
||||
GenericLexer lexer(input);
|
||||
StringBuilder output;
|
||||
|
||||
while (!lexer.is_eof()) {
|
||||
auto ascii_view = lexer.consume_until("=?"sv);
|
||||
DeprecatedString ascii = ascii_view.replace("\r"sv, " "sv, ReplaceMode::All);
|
||||
ascii = ascii.replace("\n"sv, " "sv, ReplaceMode::All);
|
||||
TRY(output.try_append(ascii));
|
||||
if (lexer.is_eof())
|
||||
break;
|
||||
lexer.consume_specific("=?"sv);
|
||||
auto charset = lexer.consume_until('?');
|
||||
lexer.consume();
|
||||
auto encoding = lexer.consume_until('?');
|
||||
lexer.consume();
|
||||
auto encoded_text = lexer.consume_until("?=");
|
||||
lexer.consume_specific("?="sv);
|
||||
|
||||
// RFC 2047 Section 6.2, "...any 'linear-white-space' that separates a pair of adjacent 'encoded-word's is ignored."
|
||||
// https://datatracker.ietf.org/doc/html/rfc2047#section-6.2
|
||||
bool found_next_start = false;
|
||||
int spaces = 0;
|
||||
for (size_t i = 0; i < lexer.tell_remaining(); ++i) {
|
||||
if (lexer.peek(i) == ' ' || lexer.peek(i) == '\r' || lexer.peek(i) == '\n') {
|
||||
spaces++;
|
||||
if (lexer.peek(i + 1) == '=' && lexer.peek(i + 2) == '?') {
|
||||
found_next_start = true;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (found_next_start) {
|
||||
for (int i = 0; i < spaces; i++) {
|
||||
lexer.consume();
|
||||
}
|
||||
}
|
||||
|
||||
ByteBuffer first_pass_decoded;
|
||||
if (encoding == 'Q' || encoding == 'q') {
|
||||
auto maybe_decoded_data = decode_quoted_printable(encoded_text);
|
||||
if (maybe_decoded_data.is_error()) {
|
||||
dbgln("Failed to decode quoted-printable rfc2047 text, skipping.");
|
||||
continue;
|
||||
}
|
||||
// RFC 2047 Section 4.2.2, https://datatracker.ietf.org/doc/html/rfc2047#section-4.2
|
||||
auto decoded_data = maybe_decoded_data.release_value();
|
||||
for (auto character : decoded_data.bytes()) {
|
||||
if (character == '_')
|
||||
first_pass_decoded.append(' ');
|
||||
else
|
||||
first_pass_decoded.append(character);
|
||||
}
|
||||
} else if (encoding == 'B' || encoding == 'b') {
|
||||
auto maybe_decoded_data = AK::decode_base64(encoded_text);
|
||||
if (maybe_decoded_data.is_error()) {
|
||||
dbgln("Failed to decode base64-encoded rfc2047 text, skipping.");
|
||||
continue;
|
||||
}
|
||||
first_pass_decoded = maybe_decoded_data.release_value();
|
||||
} else {
|
||||
dbgln("Unknown encoding \"{}\" found, skipping, original string: \"{}\"", encoding, input);
|
||||
continue;
|
||||
}
|
||||
if (first_pass_decoded.is_empty())
|
||||
continue;
|
||||
auto maybe_decoder = TextCodec::decoder_for(charset);
|
||||
if (!maybe_decoder.has_value()) {
|
||||
dbgln("No decoder found for charset \"{}\", skipping.", charset);
|
||||
continue;
|
||||
}
|
||||
auto decoded_text = TRY(maybe_decoder->to_utf8(first_pass_decoded));
|
||||
TRY(output.try_append(decoded_text));
|
||||
}
|
||||
|
||||
return output.to_byte_buffer();
|
||||
}
|
||||
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue