diff --git a/Tests/LibC/TestWchar.cpp b/Tests/LibC/TestWchar.cpp index c8b6f71f30..0cc11732ff 100644 --- a/Tests/LibC/TestWchar.cpp +++ b/Tests/LibC/TestWchar.cpp @@ -539,3 +539,44 @@ TEST_CASE(mbrlen) EXPECT_EQ(errno, EILSEQ); state = {}; } + +TEST_CASE(mbtowc) +{ + int ret = 0; + wchar_t wc = 0; + + // Ensure that we can parse normal ASCII characters. + ret = mbtowc(&wc, "Hello", 5); + EXPECT_EQ(ret, 1); + EXPECT_EQ(wc, 'H'); + + // Try two three-byte codepoints (™™), only one of which should be consumed. + ret = mbtowc(&wc, "\xe2\x84\xa2\xe2\x84\xa2", 6); + EXPECT_EQ(ret, 3); + EXPECT_EQ(wc, 0x2122); + + // Try a null character, which should return 0. + ret = mbtowc(&wc, "\x00\x00", 2); + EXPECT_EQ(ret, 0); + EXPECT_EQ(wc, 0); + + // Try an incomplete multibyte character. + ret = mbtowc(&wc, "\xe2\x84", 2); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EILSEQ); + + // Ask if we support shift states and reset the internal state in the process. + ret = mbtowc(nullptr, nullptr, 2); + EXPECT_EQ(ret, 0); // We don't support shift states. + ret = mbtowc(nullptr, "\x00", 1); + EXPECT_EQ(ret, 0); // No error likely means that the state is working again. + + // Try an invalid multibyte sequence. + ret = mbtowc(&wc, "\xff", 1); + EXPECT_EQ(ret, -1); + EXPECT_EQ(errno, EILSEQ); + + // Try a successful conversion, but without target address. + ret = mbtowc(nullptr, "\xe2\x84\xa2\xe2\x84\xa2", 6); + EXPECT_EQ(ret, 3); +} diff --git a/Userland/Libraries/LibC/stdlib.cpp b/Userland/Libraries/LibC/stdlib.cpp index 395de13bbf..38ae888cd9 100644 --- a/Userland/Libraries/LibC/stdlib.cpp +++ b/Userland/Libraries/LibC/stdlib.cpp @@ -884,19 +884,26 @@ size_t mbstowcs(wchar_t* pwcs, const char* s, size_t n) return mbsrtowcs(pwcs, &s, n, &state); } -int mbtowc(wchar_t* wch, const char* data, [[maybe_unused]] size_t data_size) +int mbtowc(wchar_t* pwc, const char* s, size_t n) { - // FIXME: This needs a real implementation. - if (wch && data) { - *wch = *data; - return 1; + static mbstate_t internal_state = {}; + + // Reset the internal state and ask whether we have shift states. + if (s == nullptr) { + internal_state = {}; + return 0; } - if (!wch && data) { - return 1; + size_t ret = mbrtowc(pwc, s, n, &internal_state); + + // Incomplete characters get returned as illegal sequence. + // Internal state is undefined, so don't bother with resetting. + if (ret == -2ul) { + errno = EILSEQ; + return -1; } - return 0; + return ret; } int wctomb(char* s, wchar_t wc)