mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 15:12:45 +00:00 
			
		
		
		
	LibWeb: Let Resource figure out its own encoding and MIME type
Also, if the request URL is a data: URL, use the MIME type from the URL itself if available. This makes it possible to load arbitrary MIME type data: URLs in the browser :^)
This commit is contained in:
		
							parent
							
								
									efe9d36eba
								
							
						
					
					
						commit
						f88146c7c9
					
				
					 3 changed files with 53 additions and 46 deletions
				
			
		|  | @ -118,37 +118,6 @@ static RefPtr<Document> create_gemini_document(const ByteBuffer& data, const URL | ||||||
|     return parse_html_document(markdown_document->render_to_html(), url); |     return parse_html_document(markdown_document->render_to_html(), url); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| String encoding_from_content_type(const String& content_type) |  | ||||||
| { |  | ||||||
|     auto offset = content_type.index_of("charset="); |  | ||||||
|     if (offset.has_value()) |  | ||||||
|         return content_type.substring(offset.value() + 8, content_type.length() - offset.value() - 8).to_lowercase(); |  | ||||||
| 
 |  | ||||||
|     return "utf-8"; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| String mime_type_from_content_type(const String& content_type) |  | ||||||
| { |  | ||||||
|     auto offset = content_type.index_of(";"); |  | ||||||
|     if (offset.has_value()) |  | ||||||
|         return content_type.substring(0, offset.value()).to_lowercase(); |  | ||||||
| 
 |  | ||||||
|     return content_type; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| static String guess_mime_type_based_on_filename(const URL& url) |  | ||||||
| { |  | ||||||
|     if (url.path().ends_with(".png")) |  | ||||||
|         return "image/png"; |  | ||||||
|     if (url.path().ends_with(".gif")) |  | ||||||
|         return "image/gif"; |  | ||||||
|     if (url.path().ends_with(".md")) |  | ||||||
|         return "text/markdown"; |  | ||||||
|     if (url.path().ends_with(".html") || url.path().ends_with(".htm")) |  | ||||||
|         return "text/html"; |  | ||||||
|     return "text/plain"; |  | ||||||
| } |  | ||||||
| 
 |  | ||||||
| RefPtr<Document> FrameLoader::create_document_from_mime_type(const ByteBuffer& data, const URL& url, const String& mime_type, const String& encoding) | RefPtr<Document> FrameLoader::create_document_from_mime_type(const ByteBuffer& data, const URL& url, const String& mime_type, const String& encoding) | ||||||
| { | { | ||||||
|     if (mime_type.starts_with("image/")) |     if (mime_type.starts_with("image/")) | ||||||
|  | @ -250,21 +219,8 @@ void FrameLoader::resource_did_load() | ||||||
|         return; |         return; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     String encoding = "utf-8"; |     dbg() << "I believe this content has MIME type '" << resource()->mime_type() << "', encoding '" << resource()->encoding() << "'"; | ||||||
|     String mime_type; |     auto document = create_document_from_mime_type(resource()->encoded_data(), url, resource()->mime_type(), resource()->encoding()); | ||||||
| 
 |  | ||||||
|     auto content_type = resource()->response_headers().get("Content-Type"); |  | ||||||
|     if (content_type.has_value()) { |  | ||||||
|         dbg() << "Content-Type header: _" << content_type.value() << "_"; |  | ||||||
|         encoding = encoding_from_content_type(content_type.value()); |  | ||||||
|         mime_type = mime_type_from_content_type(content_type.value()); |  | ||||||
|     } else { |  | ||||||
|         dbg() << "No Content-Type header to go on! Guessing based on filename..."; |  | ||||||
|         mime_type = guess_mime_type_based_on_filename(url); |  | ||||||
|     } |  | ||||||
| 
 |  | ||||||
|     dbg() << "I believe this content has MIME type '" << mime_type << "', encoding '" << encoding << "'"; |  | ||||||
|     auto document = create_document_from_mime_type(resource()->encoded_data(), url, mime_type, encoding); |  | ||||||
|     ASSERT(document); |     ASSERT(document); | ||||||
|     frame().set_document(document); |     frame().set_document(document); | ||||||
| 
 | 
 | ||||||
|  |  | ||||||
|  | @ -59,6 +59,37 @@ void Resource::for_each_client(Function<void(ResourceClient&)> callback) | ||||||
|     } |     } | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
|  | String encoding_from_content_type(const String& content_type) | ||||||
|  | { | ||||||
|  |     auto offset = content_type.index_of("charset="); | ||||||
|  |     if (offset.has_value()) | ||||||
|  |         return content_type.substring(offset.value() + 8, content_type.length() - offset.value() - 8).to_lowercase(); | ||||||
|  | 
 | ||||||
|  |     return "utf-8"; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | String mime_type_from_content_type(const String& content_type) | ||||||
|  | { | ||||||
|  |     auto offset = content_type.index_of(";"); | ||||||
|  |     if (offset.has_value()) | ||||||
|  |         return content_type.substring(0, offset.value()).to_lowercase(); | ||||||
|  | 
 | ||||||
|  |     return content_type; | ||||||
|  | } | ||||||
|  | 
 | ||||||
|  | static String guess_mime_type_based_on_filename(const URL& url) | ||||||
|  | { | ||||||
|  |     if (url.path().ends_with(".png")) | ||||||
|  |         return "image/png"; | ||||||
|  |     if (url.path().ends_with(".gif")) | ||||||
|  |         return "image/gif"; | ||||||
|  |     if (url.path().ends_with(".md")) | ||||||
|  |         return "text/markdown"; | ||||||
|  |     if (url.path().ends_with(".html") || url.path().ends_with(".htm")) | ||||||
|  |         return "text/html"; | ||||||
|  |     return "text/plain"; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| void Resource::did_load(Badge<ResourceLoader>, const ByteBuffer& data, const HashMap<String, String, CaseInsensitiveStringTraits>& headers) | void Resource::did_load(Badge<ResourceLoader>, const ByteBuffer& data, const HashMap<String, String, CaseInsensitiveStringTraits>& headers) | ||||||
| { | { | ||||||
|     ASSERT(!m_loaded); |     ASSERT(!m_loaded); | ||||||
|  | @ -66,6 +97,21 @@ void Resource::did_load(Badge<ResourceLoader>, const ByteBuffer& data, const Has | ||||||
|     m_response_headers = headers; |     m_response_headers = headers; | ||||||
|     m_loaded = true; |     m_loaded = true; | ||||||
| 
 | 
 | ||||||
|  |     auto content_type = headers.get("Content-Type"); | ||||||
|  |     if (content_type.has_value()) { | ||||||
|  |         dbg() << "Content-Type header: _" << content_type.value() << "_"; | ||||||
|  |         m_encoding = encoding_from_content_type(content_type.value()); | ||||||
|  |         m_mime_type = mime_type_from_content_type(content_type.value()); | ||||||
|  |     } else if (url().protocol() == "data" && !url().data_mime_type().is_empty()) { | ||||||
|  |         dbg() << "This is a data URL with mime-type _" << url().data_mime_type() << "_"; | ||||||
|  |         m_encoding = "utf-8"; // FIXME: This doesn't seem nice.
 | ||||||
|  |         m_mime_type = url().data_mime_type(); | ||||||
|  |     } else { | ||||||
|  |         dbg() << "No Content-Type header to go on! Guessing based on filename..."; | ||||||
|  |         m_encoding = "utf-8"; // FIXME: This doesn't seem nice.
 | ||||||
|  |         m_mime_type = guess_mime_type_based_on_filename(url()); | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|     for_each_client([](auto& client) { |     for_each_client([](auto& client) { | ||||||
|         client.resource_did_load(); |         client.resource_did_load(); | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
|  | @ -72,6 +72,9 @@ public: | ||||||
|     void register_client(Badge<ResourceClient>, ResourceClient&); |     void register_client(Badge<ResourceClient>, ResourceClient&); | ||||||
|     void unregister_client(Badge<ResourceClient>, ResourceClient&); |     void unregister_client(Badge<ResourceClient>, ResourceClient&); | ||||||
| 
 | 
 | ||||||
|  |     const String& encoding() const { return m_encoding; } | ||||||
|  |     const String& mime_type() const { return m_mime_type; } | ||||||
|  | 
 | ||||||
|     void for_each_client(Function<void(ResourceClient&)>); |     void for_each_client(Function<void(ResourceClient&)>); | ||||||
| 
 | 
 | ||||||
|     void did_load(Badge<ResourceLoader>, const ByteBuffer& data, const HashMap<String, String, CaseInsensitiveStringTraits>& headers); |     void did_load(Badge<ResourceLoader>, const ByteBuffer& data, const HashMap<String, String, CaseInsensitiveStringTraits>& headers); | ||||||
|  | @ -87,6 +90,8 @@ private: | ||||||
|     bool m_loaded { false }; |     bool m_loaded { false }; | ||||||
|     bool m_failed { false }; |     bool m_failed { false }; | ||||||
|     String m_error; |     String m_error; | ||||||
|  |     String m_encoding; | ||||||
|  |     String m_mime_type; | ||||||
|     HashMap<String, String, CaseInsensitiveStringTraits> m_response_headers; |     HashMap<String, String, CaseInsensitiveStringTraits> m_response_headers; | ||||||
|     HashTable<ResourceClient*> m_clients; |     HashTable<ResourceClient*> m_clients; | ||||||
| }; | }; | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Andreas Kling
						Andreas Kling