From ed2689c00a08dbed5deac559e5b77d61792c720c Mon Sep 17 00:00:00 2001 From: Luke Date: Fri, 13 Nov 2020 11:16:28 +0000 Subject: [PATCH] LibWeb: Use standardized encoding names, add encoding attribute to document --- Libraries/LibWeb/Bindings/WindowObject.cpp | 4 +++- Libraries/LibWeb/DOM/Document.h | 9 +++++++++ Libraries/LibWeb/DOM/Document.idl | 3 +++ Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp | 5 ++++- Libraries/LibWeb/Loader/FrameLoader.cpp | 4 +--- 5 files changed, 20 insertions(+), 5 deletions(-) diff --git a/Libraries/LibWeb/Bindings/WindowObject.cpp b/Libraries/LibWeb/Bindings/WindowObject.cpp index 26b6ba1311..44f424c6ec 100644 --- a/Libraries/LibWeb/Bindings/WindowObject.cpp +++ b/Libraries/LibWeb/Bindings/WindowObject.cpp @@ -283,7 +283,9 @@ JS_DEFINE_NATIVE_FUNCTION(WindowObject::atob) auto decoded = decode_base64(StringView(string)); // decode_base64() returns a byte string. LibJS uses UTF-8 for strings. Use Latin1Decoder to convert bytes 128-255 to UTF-8. - return JS::js_string(vm, TextCodec::decoder_for("iso-8859-1")->to_utf8(decoded)); + auto decoder = TextCodec::decoder_for("windows-1252"); + ASSERT(decoder); + return JS::js_string(vm, decoder->to_utf8(decoded)); } JS_DEFINE_NATIVE_FUNCTION(WindowObject::btoa) diff --git a/Libraries/LibWeb/DOM/Document.h b/Libraries/LibWeb/DOM/Document.h index 50e34e3b94..c3d4dc523c 100644 --- a/Libraries/LibWeb/DOM/Document.h +++ b/Libraries/LibWeb/DOM/Document.h @@ -199,6 +199,14 @@ public: const String& content_type() const { return m_content_type; } void set_content_type(const String& content_type) { m_content_type = content_type; } + const String& encoding() const { return m_encoding; } + void set_encoding(const String& encoding) { m_encoding = encoding; } + + // NOTE: These are intended for the JS bindings + const String& character_set() const { return encoding(); } + const String& charset() const { return encoding(); } + const String& input_encoding() const { return encoding(); } + const NonnullRefPtr implementation() { return m_implementation; } private: @@ -262,6 +270,7 @@ private: String m_ready_state { "loading" }; String m_content_type { "application/xml" }; + String m_encoding { "UTF-8" }; NonnullRefPtr m_implementation; }; diff --git a/Libraries/LibWeb/DOM/Document.idl b/Libraries/LibWeb/DOM/Document.idl index c10a095068..0c4f46d1cc 100644 --- a/Libraries/LibWeb/DOM/Document.idl +++ b/Libraries/LibWeb/DOM/Document.idl @@ -2,6 +2,9 @@ interface Document : Node { readonly attribute DOMImplementation implementation; + readonly attribute DOMString characterSet; + readonly attribute DOMString charset; + readonly attribute DOMString inputEncoding; readonly attribute DOMString contentType; Element? getElementById(DOMString id); diff --git a/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp b/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp index 6872bf1546..520d062fef 100644 --- a/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp +++ b/Libraries/LibWeb/HTML/Parser/HTMLDocumentParser.cpp @@ -27,6 +27,7 @@ //#define PARSER_DEBUG #include +#include #include #include #include @@ -110,7 +111,7 @@ static Vector s_quirks_public_ids = { RefPtr parse_html_document(const StringView& data, const URL& url, const String& encoding) { - HTMLDocumentParser parser(data, encoding); + HTMLDocumentParser parser(data, TextCodec::get_standardized_encoding(encoding)); parser.run(url); return parser.document(); } @@ -119,12 +120,14 @@ HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& en : m_tokenizer(input, encoding) { m_document = DOM::Document::create(); + m_document->set_encoding(encoding); } HTMLDocumentParser::HTMLDocumentParser(const StringView& input, const String& encoding, DOM::Document& existing_document) : m_tokenizer(input, encoding) , m_document(existing_document) { + m_document->set_encoding(encoding); } HTMLDocumentParser::~HTMLDocumentParser() diff --git a/Libraries/LibWeb/Loader/FrameLoader.cpp b/Libraries/LibWeb/Loader/FrameLoader.cpp index 54cbe6edf7..c5c417e7cc 100644 --- a/Libraries/LibWeb/Loader/FrameLoader.cpp +++ b/Libraries/LibWeb/Loader/FrameLoader.cpp @@ -135,9 +135,7 @@ RefPtr FrameLoader::create_document_from_mime_type(const ByteBuff RefPtr document; if (mime_type == "text/html" || mime_type == "image/svg+xml") { - HTML::HTMLDocumentParser parser(data, encoding); - parser.run(url); - document = parser.document(); + document = HTML::parse_html_document(data, url, encoding); } else if (mime_type.starts_with("image/")) { document = create_image_document(data, url); } else if (mime_type == "text/plain") {