mirror of
https://github.com/RGBCube/serenity
synced 2025-05-31 08:38:12 +00:00
LibWeb: Implement encoding sniffing algorithm
This patch implements the HTML specification's "encoding sniffing algorithm", which is used when no encoding can be obtained from the Content-Type header (either because it doesn't contain a charset=...) value or the file has not been opened via HTTP (as with local files). It also modifies the creator of the HTMLDocumentParser to use the new HTMLDocumentParser::create_with_uncertain_encoding static method, which runs the encoding sniffing algorithm before instantiating the parser. This now allows us to load local HTML pages (or remote pages without a charset specified in the 'Content-Type' header) with a non-UTF-8 encoding such as 'windows-1252'. This would previously crash the browser. :^)
This commit is contained in:
parent
67a9ebc817
commit
f808279769
6 changed files with 261 additions and 2 deletions
|
@ -113,8 +113,8 @@ bool FrameLoader::parse_document(DOM::Document& document, const ByteBuffer& data
|
|||
{
|
||||
auto& mime_type = document.content_type();
|
||||
if (mime_type == "text/html" || mime_type == "image/svg+xml") {
|
||||
HTML::HTMLDocumentParser parser(document, data, document.encoding_or_default());
|
||||
parser.run(document.url());
|
||||
auto parser = HTML::HTMLDocumentParser::create_with_uncertain_encoding(document, data);
|
||||
parser->run(document.url());
|
||||
return true;
|
||||
}
|
||||
if (mime_type.starts_with("image/"))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue