mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 05:08:13 +00:00
LibIMAP+Mail: Implement RFC2047 message header encoding
This enables us to display email subject fields with non-ASCII characters in Mail :^)
This commit is contained in:
parent
34adf9eeae
commit
077a8058c3
7 changed files with 176 additions and 1 deletions
|
@ -658,6 +658,7 @@ if (BUILD_LAGOM)
|
||||||
LibCompress
|
LibCompress
|
||||||
LibGL
|
LibGL
|
||||||
LibGfx
|
LibGfx
|
||||||
|
LibIMAP
|
||||||
LibLocale
|
LibLocale
|
||||||
LibMarkdown
|
LibMarkdown
|
||||||
LibPDF
|
LibPDF
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
set(TEST_SOURCES
|
set(TEST_SOURCES
|
||||||
TestQuotedPrintable.cpp
|
TestQuotedPrintable.cpp
|
||||||
|
TestMessageHeaderEncoding.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
foreach(source IN LISTS TEST_SOURCES)
|
foreach(source IN LISTS TEST_SOURCES)
|
||||||
|
|
55
Tests/LibIMAP/TestMessageHeaderEncoding.cpp
Normal file
55
Tests/LibIMAP/TestMessageHeaderEncoding.cpp
Normal file
|
@ -0,0 +1,55 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023, Valtteri Koskivuori <vkoskiv@gmail.com>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <AK/CharacterTypes.h>
|
||||||
|
#include <LibIMAP/MessageHeaderEncoding.h>
|
||||||
|
#include <LibTest/TestCase.h>
|
||||||
|
|
||||||
|
TEST_CASE(test_decode)
|
||||||
|
{
|
||||||
|
auto decode_equal = [](StringView input, StringView expected) {
|
||||||
|
auto decoded = MUST(IMAP::decode_rfc2047_encoded_words(input));
|
||||||
|
EXPECT_EQ(StringView(decoded), StringView(expected));
|
||||||
|
};
|
||||||
|
|
||||||
|
// Underscores should end up as spaces
|
||||||
|
decode_equal("=?utf-8?Q?Spaces_should_be_spaces_!?="sv, "Spaces should be spaces !"sv);
|
||||||
|
|
||||||
|
// RFC 2047 Section 8 "Examples", https://datatracker.ietf.org/doc/html/rfc2047#section-8
|
||||||
|
|
||||||
|
decode_equal("=?ISO-8859-1?Q?a?="sv, "a"sv);
|
||||||
|
|
||||||
|
decode_equal("=?ISO-8859-1?Q?a?= b"sv, "a b"sv);
|
||||||
|
|
||||||
|
// White space between adjacent 'encoded-word's is not displayed.
|
||||||
|
decode_equal("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="sv, "ab"sv);
|
||||||
|
|
||||||
|
// Even multiple SPACEs between 'encoded-word's are ignored for the purpose of display.
|
||||||
|
decode_equal("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?="sv, "ab"sv);
|
||||||
|
decode_equal("=?ISO-8859-1?Q?a?= =?ISO-8859-1?Q?b?= =?ISO-8859-1?Q?c?==?ISO-8859-1?Q?d?="sv, "abcd"sv);
|
||||||
|
|
||||||
|
// Any amount of linear-space-white between 'encoded-word's, even if it includes a CRLF followed by one or more SPACEs, is ignored for the purposes of display.
|
||||||
|
decode_equal("=?utf-8?Q?a?=\r\n=?utf-8?Q?b?= \r\n=?utf-8?Q?c?=\r\n =?utf-8?Q?d?="sv, "abcd"sv);
|
||||||
|
|
||||||
|
// In order to cause a SPACE to be displayed within a portion of encoded text, the SPACE MUST be encoded as part of the 'encoded-word'.
|
||||||
|
decode_equal("=?ISO-8859-1?Q?a_b?="sv, "a b"sv);
|
||||||
|
|
||||||
|
// In order to cause a SPACE to be displayed between two strings of encoded text, the SPACE MAY be encoded as part of one of the 'encoded-word's.
|
||||||
|
decode_equal("=?ISO-8859-1?Q?a?= =?ISO-8859-2?Q?_b?="sv, "a b"sv);
|
||||||
|
|
||||||
|
// More examples from the RFC document, a nice mix of different charsets & encodings.
|
||||||
|
auto long_input = "From: =?US-ASCII?Q?Keith_Moore?= <moore@cs.utk.edu>"
|
||||||
|
"To: =?ISO-8859-1?Q?Keld_J=F8rn_Simonsen?= <keld@dkuug.dk>"
|
||||||
|
"CC: =?ISO-8859-1?Q?Andr=E9?= Pirard <PIRARD@vm1.ulg.ac.be>"
|
||||||
|
"Subject: =?ISO-8859-1?B?SWYgeW91IGNhbiByZWFkIHRoaXMgeW8=?="
|
||||||
|
"=?ISO-8859-2?B?dSB1bmRlcnN0YW5kIHRoZSBleGFtcGxlLg==?="sv;
|
||||||
|
|
||||||
|
auto long_expected = "From: Keith Moore <moore@cs.utk.edu>"
|
||||||
|
"To: Keld Jørn Simonsen <keld@dkuug.dk>"
|
||||||
|
"CC: André Pirard <PIRARD@vm1.ulg.ac.be>"
|
||||||
|
"Subject: If you can read this you understand the example."sv;
|
||||||
|
decode_equal(long_input, long_expected);
|
||||||
|
}
|
|
@ -20,6 +20,7 @@
|
||||||
#include <LibGUI/Statusbar.h>
|
#include <LibGUI/Statusbar.h>
|
||||||
#include <LibGUI/TableView.h>
|
#include <LibGUI/TableView.h>
|
||||||
#include <LibGUI/TreeView.h>
|
#include <LibGUI/TreeView.h>
|
||||||
|
#include <LibIMAP/MessageHeaderEncoding.h>
|
||||||
#include <LibIMAP/QuotedPrintable.h>
|
#include <LibIMAP/QuotedPrintable.h>
|
||||||
|
|
||||||
MailWidget::MailWidget()
|
MailWidget::MailWidget()
|
||||||
|
@ -396,6 +397,10 @@ void MailWidget::selected_mailbox()
|
||||||
if (subject.is_empty())
|
if (subject.is_empty())
|
||||||
subject = "(No subject)";
|
subject = "(No subject)";
|
||||||
|
|
||||||
|
if (subject.contains("=?"sv) && subject.contains("?="sv)) {
|
||||||
|
subject = MUST(IMAP::decode_rfc2047_encoded_words(subject));
|
||||||
|
}
|
||||||
|
|
||||||
auto& from_iterator_value = from_iterator->get<1>().value();
|
auto& from_iterator_value = from_iterator->get<1>().value();
|
||||||
auto from_index = from_iterator_value.find("From:"sv);
|
auto from_index = from_iterator_value.find("From:"sv);
|
||||||
if (!from_index.has_value())
|
if (!from_index.has_value())
|
||||||
|
|
|
@ -3,9 +3,10 @@ set(SOURCES
|
||||||
Objects.cpp
|
Objects.cpp
|
||||||
Parser.cpp
|
Parser.cpp
|
||||||
QuotedPrintable.cpp
|
QuotedPrintable.cpp
|
||||||
|
MessageHeaderEncoding.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
set(GENERATED_SOURCES)
|
set(GENERATED_SOURCES)
|
||||||
|
|
||||||
serenity_lib(LibIMAP imap)
|
serenity_lib(LibIMAP imap)
|
||||||
target_link_libraries(LibIMAP PRIVATE LibCore LibCrypto LibTLS)
|
target_link_libraries(LibIMAP PRIVATE LibCore LibCrypto LibTextCodec LibTLS)
|
||||||
|
|
97
Userland/Libraries/LibIMAP/MessageHeaderEncoding.cpp
Normal file
97
Userland/Libraries/LibIMAP/MessageHeaderEncoding.cpp
Normal file
|
@ -0,0 +1,97 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023, Valtteri Koskivuori <vkoskiv@gmail.com>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include "QuotedPrintable.h"
|
||||||
|
#include <AK/Base64.h>
|
||||||
|
#include <AK/GenericLexer.h>
|
||||||
|
#include <AK/StringBuilder.h>
|
||||||
|
#include <LibIMAP/MessageHeaderEncoding.h>
|
||||||
|
#include <LibTextCodec/Decoder.h>
|
||||||
|
|
||||||
|
namespace IMAP {
|
||||||
|
|
||||||
|
ErrorOr<ByteBuffer> decode_rfc2047_encoded_words(StringView input)
|
||||||
|
{
|
||||||
|
GenericLexer lexer(input);
|
||||||
|
StringBuilder output;
|
||||||
|
|
||||||
|
while (!lexer.is_eof()) {
|
||||||
|
auto ascii_view = lexer.consume_until("=?"sv);
|
||||||
|
DeprecatedString ascii = ascii_view.replace("\r"sv, " "sv, ReplaceMode::All);
|
||||||
|
ascii = ascii.replace("\n"sv, " "sv, ReplaceMode::All);
|
||||||
|
TRY(output.try_append(ascii));
|
||||||
|
if (lexer.is_eof())
|
||||||
|
break;
|
||||||
|
lexer.consume_specific("=?"sv);
|
||||||
|
auto charset = lexer.consume_until('?');
|
||||||
|
lexer.consume();
|
||||||
|
auto encoding = lexer.consume_until('?');
|
||||||
|
lexer.consume();
|
||||||
|
auto encoded_text = lexer.consume_until("?=");
|
||||||
|
lexer.consume_specific("?="sv);
|
||||||
|
|
||||||
|
// RFC 2047 Section 6.2, "...any 'linear-white-space' that separates a pair of adjacent 'encoded-word's is ignored."
|
||||||
|
// https://datatracker.ietf.org/doc/html/rfc2047#section-6.2
|
||||||
|
bool found_next_start = false;
|
||||||
|
int spaces = 0;
|
||||||
|
for (size_t i = 0; i < lexer.tell_remaining(); ++i) {
|
||||||
|
if (lexer.peek(i) == ' ' || lexer.peek(i) == '\r' || lexer.peek(i) == '\n') {
|
||||||
|
spaces++;
|
||||||
|
if (lexer.peek(i + 1) == '=' && lexer.peek(i + 2) == '?') {
|
||||||
|
found_next_start = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found_next_start) {
|
||||||
|
for (int i = 0; i < spaces; i++) {
|
||||||
|
lexer.consume();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
ByteBuffer first_pass_decoded;
|
||||||
|
if (encoding == 'Q' || encoding == 'q') {
|
||||||
|
auto maybe_decoded_data = decode_quoted_printable(encoded_text);
|
||||||
|
if (maybe_decoded_data.is_error()) {
|
||||||
|
dbgln("Failed to decode quoted-printable rfc2047 text, skipping.");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
// RFC 2047 Section 4.2.2, https://datatracker.ietf.org/doc/html/rfc2047#section-4.2
|
||||||
|
auto decoded_data = maybe_decoded_data.release_value();
|
||||||
|
for (auto character : decoded_data.bytes()) {
|
||||||
|
if (character == '_')
|
||||||
|
first_pass_decoded.append(' ');
|
||||||
|
else
|
||||||
|
first_pass_decoded.append(character);
|
||||||
|
}
|
||||||
|
} else if (encoding == 'B' || encoding == 'b') {
|
||||||
|
auto maybe_decoded_data = AK::decode_base64(encoded_text);
|
||||||
|
if (maybe_decoded_data.is_error()) {
|
||||||
|
dbgln("Failed to decode base64-encoded rfc2047 text, skipping.");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
first_pass_decoded = maybe_decoded_data.release_value();
|
||||||
|
} else {
|
||||||
|
dbgln("Unknown encoding \"{}\" found, skipping, original string: \"{}\"", encoding, input);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (first_pass_decoded.is_empty())
|
||||||
|
continue;
|
||||||
|
auto maybe_decoder = TextCodec::decoder_for(charset);
|
||||||
|
if (!maybe_decoder.has_value()) {
|
||||||
|
dbgln("No decoder found for charset \"{}\", skipping.", charset);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
auto decoded_text = TRY(maybe_decoder->to_utf8(first_pass_decoded));
|
||||||
|
TRY(output.try_append(decoded_text));
|
||||||
|
}
|
||||||
|
|
||||||
|
return output.to_byte_buffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
15
Userland/Libraries/LibIMAP/MessageHeaderEncoding.h
Normal file
15
Userland/Libraries/LibIMAP/MessageHeaderEncoding.h
Normal file
|
@ -0,0 +1,15 @@
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2023, Valtteri Koskivuori <vkoskiv@gmail.com>
|
||||||
|
*
|
||||||
|
* SPDX-License-Identifier: BSD-2-Clause
|
||||||
|
*/
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <AK/ByteBuffer.h>
|
||||||
|
|
||||||
|
namespace IMAP {
|
||||||
|
|
||||||
|
ErrorOr<ByteBuffer> decode_rfc2047_encoded_words(StringView input);
|
||||||
|
|
||||||
|
}
|
Loading…
Add table
Add a link
Reference in a new issue