From 5b31d1208f7d0622a8fd57024568368286bf8372 Mon Sep 17 00:00:00 2001 From: Aliaksandr Kalenik Date: Sun, 26 Mar 2023 02:51:55 +0300 Subject: [PATCH] LibWeb: Run XML parser input through encoding decoder Fixes the issue that XML parser fails when loader passes input that is prefixed with byte order mark. Also it generally makes sense to pass text source through encoding decoder before parsing. Probably we would even want to introduce method similar to `create_with_uncertain_encoding` in `HTMLParser` but for `XMLParser` to be make harder unconsciously pass non-UTF8 input to XML parser. --- Userland/Libraries/LibWeb/Loader/FrameLoader.cpp | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp b/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp index a9bf71728e..e31e8f02b3 100644 --- a/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp +++ b/Userland/Libraries/LibWeb/Loader/FrameLoader.cpp @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -18,6 +19,7 @@ #include #include #include +#include #include #include #include @@ -171,8 +173,11 @@ static bool build_gemini_document(DOM::Document& document, ByteBuffer const& dat static bool build_xml_document(DOM::Document& document, ByteBuffer const& data) { - - XML::Parser parser(data, { .resolve_external_resource = resolve_xml_resource }); + auto encoding = HTML::run_encoding_sniffing_algorithm(document, data); + auto decoder = TextCodec::decoder_for(encoding); + VERIFY(decoder.has_value()); + auto source = decoder->to_utf8(data).release_value_but_fixme_should_propagate_errors(); + XML::Parser parser(source, { .resolve_external_resource = resolve_xml_resource }); XMLDocumentBuilder builder { document }; auto result = parser.parse_with_listener(builder); return !result.is_error() && !builder.has_error();