mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 16:42:44 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			347 lines
		
	
	
	
		
			7.3 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			347 lines
		
	
	
	
		
			7.3 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2018-2020, Andreas Kling <kling@serenityos.org>
 | |
|  *
 | |
|  * SPDX-License-Identifier: BSD-2-Clause
 | |
|  */
 | |
| 
 | |
| #include <AK/Assertions.h>
 | |
| #include <AK/Format.h>
 | |
| #include <errno.h>
 | |
| #include <wchar.h>
 | |
| 
 | |
| static void mbstate_reset(mbstate_t* state)
 | |
| {
 | |
|     *state = { 0 };
 | |
| }
 | |
| 
 | |
| static unsigned int mbstate_stored_bytes(mbstate_t* state)
 | |
| {
 | |
|     for (unsigned int i = 0; i < sizeof(state->bytes); i++) {
 | |
|         if (!state->bytes[i]) {
 | |
|             return i;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return sizeof(state->bytes);
 | |
| }
 | |
| 
 | |
| static unsigned int mbstate_expected_bytes(mbstate_t* state)
 | |
| {
 | |
|     unsigned char first = state->bytes[0];
 | |
| 
 | |
|     // Single-byte sequences have their first bit unset
 | |
|     if ((first & 0b10000000) == 0) {
 | |
|         return 1;
 | |
|     }
 | |
| 
 | |
|     // Two-byte sequences start with 0b110xxxxx
 | |
|     if ((first & 0b11100000) == 0b11000000) {
 | |
|         return 2;
 | |
|     }
 | |
| 
 | |
|     // Three-byte sequences start with 0b1110xxxx
 | |
|     if ((first & 0b11110000) == 0b11100000) {
 | |
|         return 3;
 | |
|     }
 | |
| 
 | |
|     // Four-byte sequences start with 0b11110xxx
 | |
|     if ((first & 0b11111000) == 0b11110000) {
 | |
|         return 4;
 | |
|     }
 | |
| 
 | |
|     // Everything else is invalid
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| extern "C" {
 | |
| 
 | |
| size_t wcslen(const wchar_t* str)
 | |
| {
 | |
|     size_t len = 0;
 | |
|     while (*(str++))
 | |
|         ++len;
 | |
|     return len;
 | |
| }
 | |
| 
 | |
| wchar_t* wcscpy(wchar_t* dest, const wchar_t* src)
 | |
| {
 | |
|     wchar_t* original_dest = dest;
 | |
|     while ((*dest++ = *src++) != '\0')
 | |
|         ;
 | |
|     return original_dest;
 | |
| }
 | |
| 
 | |
| wchar_t* wcsncpy(wchar_t* dest, const wchar_t* src, size_t num)
 | |
| {
 | |
|     wchar_t* original_dest = dest;
 | |
|     while (((*dest++ = *src++) != '\0') && ((size_t)(dest - original_dest) < num))
 | |
|         ;
 | |
|     return original_dest;
 | |
| }
 | |
| 
 | |
| int wcscmp(const wchar_t* s1, const wchar_t* s2)
 | |
| {
 | |
|     while (*s1 == *s2++)
 | |
|         if (*s1++ == 0)
 | |
|             return 0;
 | |
|     return *(const wchar_t*)s1 - *(const wchar_t*)--s2;
 | |
| }
 | |
| 
 | |
| int wcsncmp(const wchar_t* s1, const wchar_t* s2, size_t n)
 | |
| {
 | |
|     if (!n)
 | |
|         return 0;
 | |
|     do {
 | |
|         if (*s1 != *s2++)
 | |
|             return *(const wchar_t*)s1 - *(const wchar_t*)--s2;
 | |
|         if (*s1++ == 0)
 | |
|             break;
 | |
|     } while (--n);
 | |
|     return 0;
 | |
| }
 | |
| 
 | |
| wchar_t* wcschr(const wchar_t* str, int c)
 | |
| {
 | |
|     wchar_t ch = c;
 | |
|     for (;; ++str) {
 | |
|         if (*str == ch)
 | |
|             return const_cast<wchar_t*>(str);
 | |
|         if (!*str)
 | |
|             return nullptr;
 | |
|     }
 | |
| }
 | |
| 
 | |
| const wchar_t* wcsrchr(const wchar_t* str, wchar_t wc)
 | |
| {
 | |
|     wchar_t* last = nullptr;
 | |
|     wchar_t c;
 | |
|     for (; (c = *str); ++str) {
 | |
|         if (c == wc)
 | |
|             last = const_cast<wchar_t*>(str);
 | |
|     }
 | |
|     return last;
 | |
| }
 | |
| 
 | |
| wchar_t* wcscat(wchar_t* dest, const wchar_t* src)
 | |
| {
 | |
|     size_t dest_length = wcslen(dest);
 | |
|     size_t i;
 | |
|     for (i = 0; src[i] != '\0'; i++)
 | |
|         dest[dest_length + i] = src[i];
 | |
|     dest[dest_length + i] = '\0';
 | |
|     return dest;
 | |
| }
 | |
| 
 | |
| wchar_t* wcsncat(wchar_t* dest, const wchar_t* src, size_t n)
 | |
| {
 | |
|     size_t dest_length = wcslen(dest);
 | |
|     size_t i;
 | |
|     for (i = 0; i < n && src[i] != '\0'; i++)
 | |
|         dest[dest_length + i] = src[i];
 | |
|     dest[dest_length + i] = '\0';
 | |
|     return dest;
 | |
| }
 | |
| 
 | |
| wchar_t* wcstok(wchar_t* str, const wchar_t* delim, wchar_t** ptr)
 | |
| {
 | |
|     wchar_t* used_str = str;
 | |
|     if (!used_str) {
 | |
|         used_str = *ptr;
 | |
|     }
 | |
| 
 | |
|     size_t token_start = 0;
 | |
|     size_t token_end = 0;
 | |
|     size_t str_len = wcslen(used_str);
 | |
|     size_t delim_len = wcslen(delim);
 | |
| 
 | |
|     for (size_t i = 0; i < str_len; ++i) {
 | |
|         bool is_proper_delim = false;
 | |
| 
 | |
|         for (size_t j = 0; j < delim_len; ++j) {
 | |
|             if (used_str[i] == delim[j]) {
 | |
|                 // Skip beginning delimiters
 | |
|                 if (token_end - token_start == 0) {
 | |
|                     ++token_start;
 | |
|                     break;
 | |
|                 }
 | |
| 
 | |
|                 is_proper_delim = true;
 | |
|             }
 | |
|         }
 | |
| 
 | |
|         ++token_end;
 | |
|         if (is_proper_delim && token_end > 0) {
 | |
|             --token_end;
 | |
|             break;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     if (used_str[token_start] == '\0')
 | |
|         return nullptr;
 | |
| 
 | |
|     if (token_end == 0) {
 | |
|         return &used_str[token_start];
 | |
|     }
 | |
| 
 | |
|     used_str[token_end] = '\0';
 | |
|     return &used_str[token_start];
 | |
| }
 | |
| 
 | |
| long wcstol(const wchar_t*, wchar_t**, int)
 | |
| {
 | |
|     dbgln("FIXME: Implement wcstol()");
 | |
|     TODO();
 | |
| }
 | |
| 
 | |
| long long wcstoll(const wchar_t*, wchar_t**, int)
 | |
| {
 | |
|     dbgln("FIXME: Implement wcstoll()");
 | |
|     TODO();
 | |
| }
 | |
| 
 | |
| wint_t btowc(int c)
 | |
| {
 | |
|     if (c == EOF) {
 | |
|         return WEOF;
 | |
|     }
 | |
| 
 | |
|     // Multi-byte sequences in UTF-8 have their highest bit set
 | |
|     if (c & (1 << 7)) {
 | |
|         return WEOF;
 | |
|     }
 | |
| 
 | |
|     return c;
 | |
| }
 | |
| 
 | |
| size_t mbrtowc(wchar_t* pwc, const char* s, size_t n, mbstate_t* state)
 | |
| {
 | |
|     static mbstate_t _anonymous_state = { 0 };
 | |
| 
 | |
|     if (state == nullptr) {
 | |
|         state = &_anonymous_state;
 | |
|     }
 | |
| 
 | |
|     // If s is nullptr, check if the state contains a complete multibyte character
 | |
|     if (s == nullptr) {
 | |
|         if (mbstate_expected_bytes(state) == mbstate_stored_bytes(state)) {
 | |
|             mbstate_reset(state);
 | |
|             return 0;
 | |
|         } else {
 | |
|             mbstate_reset(state);
 | |
|             errno = EILSEQ;
 | |
|             return -1;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     // Stop early if we can't read anything
 | |
|     if (n == 0) {
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     size_t consumed_bytes = 0;
 | |
|     size_t stored_bytes = mbstate_stored_bytes(state);
 | |
| 
 | |
|     // Fill the first byte if we haven't done that yet
 | |
|     if (state->bytes[0] == 0) {
 | |
|         state->bytes[0] = s[0];
 | |
|         consumed_bytes++;
 | |
|     }
 | |
| 
 | |
|     size_t expected_bytes = mbstate_expected_bytes(state);
 | |
| 
 | |
|     // Check if the first byte is invalid
 | |
|     if (expected_bytes == 0) {
 | |
|         mbstate_reset(state);
 | |
|         errno = EILSEQ;
 | |
|         return -1;
 | |
|     }
 | |
| 
 | |
|     size_t needed_bytes = expected_bytes - stored_bytes;
 | |
| 
 | |
|     while (consumed_bytes < needed_bytes) {
 | |
|         if (consumed_bytes == n) {
 | |
|             // No complete multibyte character
 | |
|             return -2;
 | |
|         }
 | |
| 
 | |
|         unsigned char c = s[consumed_bytes];
 | |
| 
 | |
|         // Continuation bytes have to start with 0b10xxxxxx
 | |
|         if ((c & 0b11000000) != 0b10000000) {
 | |
|             // Invalid multibyte character
 | |
|             mbstate_reset(state);
 | |
|             errno = EILSEQ;
 | |
|             return -1;
 | |
|         }
 | |
| 
 | |
|         state->bytes[mbstate_stored_bytes(state)] = c;
 | |
|         consumed_bytes++;
 | |
|     }
 | |
| 
 | |
|     wchar_t codepoint = state->bytes[0];
 | |
| 
 | |
|     // Mask out the "length" bits if necessary
 | |
|     if (expected_bytes > 1) {
 | |
|         codepoint &= (1 << (7 - expected_bytes)) - 1;
 | |
|     }
 | |
| 
 | |
|     for (unsigned int i = 1; i < expected_bytes; i++) {
 | |
|         // Each continuation byte contains 6 bits of data
 | |
|         codepoint = codepoint << 6;
 | |
|         codepoint |= state->bytes[i] & 0b111111;
 | |
|     }
 | |
| 
 | |
|     if (pwc) {
 | |
|         *pwc = codepoint;
 | |
|     }
 | |
| 
 | |
|     // We don't have a shift state that we need to keep, so just clear the entire state
 | |
|     mbstate_reset(state);
 | |
| 
 | |
|     if (codepoint == 0) {
 | |
|         return 0;
 | |
|     }
 | |
| 
 | |
|     return consumed_bytes;
 | |
| }
 | |
| 
 | |
| size_t mbrlen(const char*, size_t, mbstate_t*)
 | |
| {
 | |
|     dbgln("FIXME: Implement mbrlen()");
 | |
|     TODO();
 | |
| }
 | |
| 
 | |
| size_t wcrtomb(char*, wchar_t, mbstate_t*)
 | |
| {
 | |
|     dbgln("FIXME: Implement wcrtomb()");
 | |
|     TODO();
 | |
| }
 | |
| 
 | |
| int wcscoll(const wchar_t* ws1, const wchar_t* ws2)
 | |
| {
 | |
|     // TODO: Actually implement a sensible sort order for this,
 | |
|     // because right now we are doing what LC_COLLATE=C would do.
 | |
|     return wcscmp(ws1, ws2);
 | |
| }
 | |
| 
 | |
| int wctob(wint_t)
 | |
| {
 | |
|     dbgln("FIXME: Implement wctob()");
 | |
|     TODO();
 | |
| }
 | |
| 
 | |
| int mbsinit(const mbstate_t* state)
 | |
| {
 | |
|     if (!state) {
 | |
|         return 1;
 | |
|     }
 | |
| 
 | |
|     for (unsigned char byte : state->bytes) {
 | |
|         if (byte) {
 | |
|             return 0;
 | |
|         }
 | |
|     }
 | |
| 
 | |
|     return 1;
 | |
| }
 | |
| }
 | 
