mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 11:08:11 +00:00
LibC: Implement wcrtomb
This function converts a single wide character into its multibyte representation (UTF-8 in our case). It is called from libc++'s `std::basic_ostream<wchar_t>::flush`, which gets called at program exit from a global destructor in order to flush `std::wcout`.
This commit is contained in:
parent
9c29e6cde7
commit
c8367df746
3 changed files with 72 additions and 3 deletions
|
@ -17,4 +17,29 @@ constexpr bool is_unicode_control_code_point(u32 code_point)
|
||||||
|
|
||||||
Optional<StringView> get_unicode_control_code_point_alias(u32);
|
Optional<StringView> get_unicode_control_code_point_alias(u32);
|
||||||
|
|
||||||
|
template<typename Callback>
|
||||||
|
[[nodiscard]] constexpr int code_point_to_utf8(u32 code_point, Callback callback)
|
||||||
|
{
|
||||||
|
if (code_point <= 0x7f) {
|
||||||
|
callback((char)code_point);
|
||||||
|
return 1;
|
||||||
|
} else if (code_point <= 0x07ff) {
|
||||||
|
callback((char)(((code_point >> 6) & 0x1f) | 0xc0));
|
||||||
|
callback((char)(((code_point >> 0) & 0x3f) | 0x80));
|
||||||
|
return 2;
|
||||||
|
} else if (code_point <= 0xffff) {
|
||||||
|
callback((char)(((code_point >> 12) & 0x0f) | 0xe0));
|
||||||
|
callback((char)(((code_point >> 6) & 0x3f) | 0x80));
|
||||||
|
callback((char)(((code_point >> 0) & 0x3f) | 0x80));
|
||||||
|
return 3;
|
||||||
|
} else if (code_point <= 0x10ffff) {
|
||||||
|
callback((char)(((code_point >> 18) & 0x07) | 0xf0));
|
||||||
|
callback((char)(((code_point >> 12) & 0x3f) | 0x80));
|
||||||
|
callback((char)(((code_point >> 6) & 0x3f) | 0x80));
|
||||||
|
callback((char)(((code_point >> 0) & 0x3f) | 0x80));
|
||||||
|
return 4;
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -285,3 +285,34 @@ TEST_CASE(mbrtowc)
|
||||||
EXPECT_EQ(ret, -1ul);
|
EXPECT_EQ(ret, -1ul);
|
||||||
EXPECT_EQ(errno, EILSEQ);
|
EXPECT_EQ(errno, EILSEQ);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
TEST_CASE(wcrtomb)
|
||||||
|
{
|
||||||
|
char buf[MB_LEN_MAX];
|
||||||
|
size_t ret = 0;
|
||||||
|
|
||||||
|
// Ensure that `wc` is ignored when buf is a nullptr.
|
||||||
|
ret = wcrtomb(nullptr, L'a', nullptr);
|
||||||
|
EXPECT_EQ(ret, 1ul);
|
||||||
|
|
||||||
|
ret = wcrtomb(nullptr, L'\U0001F41E', nullptr);
|
||||||
|
EXPECT_EQ(ret, 1ul);
|
||||||
|
|
||||||
|
// When the buffer is non-null, the multibyte representation is written into it.
|
||||||
|
ret = wcrtomb(buf, L'a', nullptr);
|
||||||
|
EXPECT_EQ(ret, 1ul);
|
||||||
|
EXPECT_EQ(memcmp(buf, "a", ret), 0);
|
||||||
|
|
||||||
|
ret = wcrtomb(buf, L'\U0001F41E', nullptr);
|
||||||
|
EXPECT_EQ(ret, 4ul);
|
||||||
|
EXPECT_EQ(memcmp(buf, "\xf0\x9f\x90\x9e", ret), 0);
|
||||||
|
|
||||||
|
// When the wide character is invalid, -1 is returned and errno is set to EILSEQ.
|
||||||
|
ret = wcrtomb(buf, 0x110000, nullptr);
|
||||||
|
EXPECT_EQ(ret, (size_t)-1);
|
||||||
|
EXPECT_EQ(errno, EILSEQ);
|
||||||
|
|
||||||
|
// Replacement characters and conversion errors are not confused.
|
||||||
|
ret = wcrtomb(buf, L'\uFFFD', nullptr);
|
||||||
|
EXPECT_NE(ret, (size_t)-1);
|
||||||
|
}
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
|
|
||||||
#include <AK/Assertions.h>
|
#include <AK/Assertions.h>
|
||||||
#include <AK/Format.h>
|
#include <AK/Format.h>
|
||||||
|
#include <AK/UnicodeUtils.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <wchar.h>
|
#include <wchar.h>
|
||||||
|
|
||||||
|
@ -292,10 +293,22 @@ size_t mbrlen(const char*, size_t, mbstate_t*)
|
||||||
TODO();
|
TODO();
|
||||||
}
|
}
|
||||||
|
|
||||||
size_t wcrtomb(char*, wchar_t, mbstate_t*)
|
size_t wcrtomb(char* s, wchar_t wc, mbstate_t*)
|
||||||
{
|
{
|
||||||
dbgln("FIXME: Implement wcrtomb()");
|
if (s == nullptr)
|
||||||
TODO();
|
wc = L'\0';
|
||||||
|
|
||||||
|
auto nwritten = AK::UnicodeUtils::code_point_to_utf8(wc, [&s](char byte) {
|
||||||
|
if (s != nullptr)
|
||||||
|
*s++ = byte;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (nwritten < 0) {
|
||||||
|
errno = EILSEQ;
|
||||||
|
return (size_t)-1;
|
||||||
|
} else {
|
||||||
|
return nwritten;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int wcscoll(const wchar_t* ws1, const wchar_t* ws2)
|
int wcscoll(const wchar_t* ws1, const wchar_t* ws2)
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue