mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 14:18:12 +00:00
LibTextCodec: Make utf-16be and utf-16le codecs actually work
There were two problems: 1. They didn't handle surrogates 2. They used signed chars, leading to eg 0x00e4 being treated as 0xffe4 Also add a basic test that catches both issues. There's some code duplication with Utf16CodePointIterator::operator*(), but let's get things working first.
This commit is contained in:
parent
aa9037eed4
commit
3423b54eb9
2 changed files with 101 additions and 4 deletions
|
@ -23,3 +23,37 @@ TEST_CASE(test_utf8_decode)
|
|||
|
||||
EXPECT(decoder.to_utf8(test_string) == test_string);
|
||||
}
|
||||
|
||||
TEST_CASE(test_utf16be_decode)
|
||||
{
|
||||
auto decoder = TextCodec::UTF16BEDecoder();
|
||||
// This is the output of `python3 -c "print('säk😀'.encode('utf-16be'))"`.
|
||||
auto test_string = "\x00s\x00\xe4\x00k\xd8=\xde\x00"sv;
|
||||
|
||||
Vector<u32> processed_code_points;
|
||||
decoder.process(test_string, [&](u32 code_point) {
|
||||
processed_code_points.append(code_point);
|
||||
});
|
||||
EXPECT(processed_code_points.size() == 4);
|
||||
EXPECT(processed_code_points[0] == 0x73);
|
||||
EXPECT(processed_code_points[1] == 0xE4);
|
||||
EXPECT(processed_code_points[2] == 0x6B);
|
||||
EXPECT(processed_code_points[3] == 0x1F600);
|
||||
}
|
||||
|
||||
TEST_CASE(test_utf16le_decode)
|
||||
{
|
||||
auto decoder = TextCodec::UTF16LEDecoder();
|
||||
// This is the output of `python3 -c "print('säk😀'.encode('utf-16le'))"`.
|
||||
auto test_string = "s\x00\xe4\x00k\x00=\xd8\x00\xde"sv;
|
||||
|
||||
Vector<u32> processed_code_points;
|
||||
decoder.process(test_string, [&](u32 code_point) {
|
||||
processed_code_points.append(code_point);
|
||||
});
|
||||
EXPECT(processed_code_points.size() == 4);
|
||||
EXPECT(processed_code_points[0] == 0x73);
|
||||
EXPECT(processed_code_points[1] == 0xE4);
|
||||
EXPECT(processed_code_points[2] == 0x6B);
|
||||
EXPECT(processed_code_points[3] == 0x1F600);
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue