From e864444fe3b1fcbb2839f05df20463a99efb3850 Mon Sep 17 00:00:00 2001 From: Luke Wilde Date: Tue, 28 Feb 2023 03:47:40 +0000 Subject: [PATCH] LibTextCodec/Latin1: Iterate over input string with u8 instead of char Using char causes bytes equal to or over 0x80 to be treated as a negative value and produce incorrect results when implicitly casting to u32. For example, `atob` in LibWeb uses this decoder to convert non-ASCII values to UTF-8, but non-ASCII values are >= 0x80 and thus produces incorrect results in such cases: ```js Uint8Array.from(atob("u660"), c => c.charCodeAt(0)); ``` This used to produce [253, 253, 253] instead of [187, 174, 180]. Required by Cloudflare's IUAM challenges. --- Userland/Libraries/LibTextCodec/Decoder.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Userland/Libraries/LibTextCodec/Decoder.cpp b/Userland/Libraries/LibTextCodec/Decoder.cpp index 79c3d8f760..958e7f0839 100644 --- a/Userland/Libraries/LibTextCodec/Decoder.cpp +++ b/Userland/Libraries/LibTextCodec/Decoder.cpp @@ -353,7 +353,7 @@ ErrorOr UTF16LEDecoder::to_utf8(StringView input) ErrorOr Latin1Decoder::process(StringView input, Function(u32)> on_code_point) { - for (auto ch : input) { + for (u8 ch : input) { // Latin1 is the same as the first 256 Unicode code_points, so no mapping is needed, just utf-8 encoding. TRY(on_code_point(ch)); }