mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-26 21:42:06 +00:00 
			
		
		
		
	 f808279769
			
		
	
	
		f808279769
		
	
	
	
	
		
			
			This patch implements the HTML specification's "encoding sniffing algorithm", which is used when no encoding can be obtained from the Content-Type header (either because it doesn't contain a charset=...) value or the file has not been opened via HTTP (as with local files). It also modifies the creator of the HTMLDocumentParser to use the new HTMLDocumentParser::create_with_uncertain_encoding static method, which runs the encoding sniffing algorithm before instantiating the parser. This now allows us to load local HTML pages (or remote pages without a charset specified in the 'Content-Type' header) with a non-UTF-8 encoding such as 'windows-1252'. This would previously crash the browser. :^)
		
			
				
	
	
		
			22 lines
		
	
	
	
		
			671 B
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			22 lines
		
	
	
	
		
			671 B
		
	
	
	
		
			C++
		
	
	
	
	
	
| /*
 | |
|  * Copyright (c) 2021, Max Wipfli <mail@maxwipfli.ch>
 | |
|  *
 | |
|  * SPDX-License-Identifier: BSD-2-Clause
 | |
|  */
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include <AK/Optional.h>
 | |
| #include <AK/String.h>
 | |
| #include <LibWeb/DOM/Attribute.h>
 | |
| 
 | |
| namespace Web::HTML {
 | |
| 
 | |
| bool prescan_should_abort(const ByteBuffer& input, const size_t& position);
 | |
| bool prescan_is_whitespace_or_slash(const u8& byte);
 | |
| bool prescan_skip_whitespace_and_slashes(const ByteBuffer& input, size_t& position);
 | |
| Optional<Attribute> prescan_get_attribute(const ByteBuffer& input, size_t& position);
 | |
| Optional<String> run_prescan_byte_stream_algorithm(const ByteBuffer& input);
 | |
| String run_encoding_sniffing_algorithm(const ByteBuffer& input);
 | |
| 
 | |
| }
 |