From fc41c282eca1fddba222a6f37208d71de4f7324a Mon Sep 17 00:00:00 2001 From: MacDue Date: Fri, 5 Jan 2024 00:49:08 +0000 Subject: [PATCH] LibWeb: Fix utf16-be check in HTMLEncodingDetection The utf-16be check mistakenly skipped index 3, so was not checking the correct bytes. This meant UTF16-BE files could fail to decode. --- ...-be-xhtml-file-should-decode-correctly.txt | 20 ++++++++++++++++++ ...be-xhtml-file-should-decode-correctly.html | Bin 0 -> 470 bytes .../HTML/Parser/HTMLEncodingDetection.cpp | 6 ++++-- 3 files changed, 24 insertions(+), 2 deletions(-) create mode 100644 Tests/LibWeb/Layout/expected/utf-16-be-xhtml-file-should-decode-correctly.txt create mode 100644 Tests/LibWeb/Layout/input/utf-16-be-xhtml-file-should-decode-correctly.html diff --git a/Tests/LibWeb/Layout/expected/utf-16-be-xhtml-file-should-decode-correctly.txt b/Tests/LibWeb/Layout/expected/utf-16-be-xhtml-file-should-decode-correctly.txt new file mode 100644 index 0000000000..fbcb69e25f --- /dev/null +++ b/Tests/LibWeb/Layout/expected/utf-16-be-xhtml-file-should-decode-correctly.txt @@ -0,0 +1,20 @@ +Viewport <#document> at (0,0) content-size 800x600 children: not-inline + BlockContainer at (0,0) content-size 800x41.46875 [BFC] children: not-inline + BlockContainer at (8,16) content-size 784x17.46875 children: not-inline + BlockContainer <(anonymous)> at (8,16) content-size 784x0 children: inline + TextNode <#text> + BlockContainer

at (8,16) content-size 784x17.46875 children: inline + line 0 width: 29.21875, height: 17.46875, bottom: 17.46875, baseline: 13.53125 + frag 0 from TextNode start: 1, length: 15, rect: [8,16 29.21875x17.46875] + "好啦朋友們" + TextNode <#text> + BlockContainer <(anonymous)> at (8,49.46875) content-size 784x0 children: inline + TextNode <#text> + +ViewportPaintable (Viewport<#document>) [0,0 800x600] + PaintableWithLines (BlockContainer) [0,0 800x41.46875] overflow: [0,0 800x49.46875] + PaintableWithLines (BlockContainer) [8,16 784x17.46875] overflow: [8,16 784x33.46875] + PaintableWithLines (BlockContainer(anonymous)) [8,16 784x0] + PaintableWithLines (BlockContainer

) [8,16 784x17.46875] + TextPaintable (TextNode<#text>) + PaintableWithLines (BlockContainer(anonymous)) [8,49.46875 784x0] diff --git a/Tests/LibWeb/Layout/input/utf-16-be-xhtml-file-should-decode-correctly.html b/Tests/LibWeb/Layout/input/utf-16-be-xhtml-file-should-decode-correctly.html new file mode 100644 index 0000000000000000000000000000000000000000..25d0f7fa1ab2b0e6f22b043dfb5bd2fe9fac50e8 GIT binary patch literal 470 zcmZRuVX$YYV8~_2VNhTwV@PEvVkl7$O)v7(y6)8GINNpyn$u1T&N{6ftBnBr}wN)w(kHF(@%`F(`md0@+c*P{3dX zmMdofK|O|Y24e<2hJ1!1hI9sfh7g7z27QJKxO)s4^cf&FqKfJ=6eHQC$B@EM!jQtC z1oa=tCP-L7!X%HO7#t2DaY8OIWUvH7Wc!pD6riDN#gN00$dJd74mK~9ArES&0)q`h w5<@;i3PUB>4Iq92l%)_^8=98R9eg@K5F8dZ4EhW(IS?CJABYcfCCK%50D+`P4FCWD literal 0 HcmV?d00001 diff --git a/Userland/Libraries/LibWeb/HTML/Parser/HTMLEncodingDetection.cpp b/Userland/Libraries/LibWeb/HTML/Parser/HTMLEncodingDetection.cpp index 9e8c8d9d68..2be4e255b1 100644 --- a/Userland/Libraries/LibWeb/HTML/Parser/HTMLEncodingDetection.cpp +++ b/Userland/Libraries/LibWeb/HTML/Parser/HTMLEncodingDetection.cpp @@ -251,10 +251,12 @@ Optional run_prescan_byte_stream_algorithm(DOM::Document& document, // https://html.spec.whatwg.org/multipage/parsing.html#prescan-a-byte-stream-to-determine-its-encoding // Detects '