mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 14:12:44 +00:00 
			
		
		
		
	LibUnicode: Dynamically load the generated UnicodeData symbols
The generated data for libunicodedata.so is quite large, and loading it is a price paid by nearly every application by way of depending on LibRegex. In order to defer this cost until an application actually uses one of the surrounding APIs, dynamically load the generated symbols. To be able to load the symbols dynamically, the generated methods must have demangled names. Typically, this is accomplished with `extern "C"` blocks. The clang toolchain complains about this here because the types returned from the generators are strictly C++ types. So to demangle the names, we use the asm() compiler directive to manually define a symbol name; the caveat is that we *must* be sure the symbols are unique. As an extra precaution, we prefix each symbol name with "unicode_". For more details, see: https://gcc.gnu.org/onlinedocs/gcc/Asm-Labels.html This symbol loader used in this implementation provides the additional benefit of removing many [[maybe_unused]] attributes from the LibUnicode methods. Internally, if ENABLE_UNICODE_DATABASE_DOWNLOAD is OFF, the loader is able to stub out the function pointers it returns. Note that as of this commit, LibUnicode is still directly linked against LibUnicodeData. This commit is just a first step towards removing that.
This commit is contained in:
		
							parent
							
								
									749d5ebd68
								
							
						
					
					
						commit
						3fd53baa25
					
				
					 7 changed files with 256 additions and 101 deletions
				
			
		
							
								
								
									
										44
									
								
								Userland/Libraries/LibUnicode/UnicodeSymbols.h
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								Userland/Libraries/LibUnicode/UnicodeSymbols.h
									
										
									
									
									
										Normal file
									
								
							|  | @ -0,0 +1,44 @@ | |||
| /*
 | ||||
|  * Copyright (c) 2021, Tim Flynn <trflynn89@pm.me> | ||||
|  * | ||||
|  * SPDX-License-Identifier: BSD-2-Clause | ||||
|  */ | ||||
| 
 | ||||
| #pragma once | ||||
| 
 | ||||
| #include <AK/Optional.h> | ||||
| #include <AK/String.h> | ||||
| #include <AK/StringView.h> | ||||
| #include <AK/Types.h> | ||||
| #include <LibUnicode/Forward.h> | ||||
| 
 | ||||
| namespace Unicode::Detail { | ||||
| 
 | ||||
| struct Symbols { | ||||
|     static Symbols const& ensure_loaded(); | ||||
| 
 | ||||
|     // Loaded from UnicodeData.cpp:
 | ||||
| 
 | ||||
|     Optional<String> (*code_point_display_name)(u32) { nullptr }; | ||||
| 
 | ||||
|     u32 (*canonical_combining_class)(u32 code_point) { nullptr }; | ||||
| 
 | ||||
|     u32 (*simple_uppercase_mapping)(u32) { nullptr }; | ||||
|     u32 (*simple_lowercase_mapping)(u32) { nullptr }; | ||||
|     Span<SpecialCasing const* const> (*special_case_mapping)(u32 code_point) { nullptr }; | ||||
| 
 | ||||
|     Optional<GeneralCategory> (*general_category_from_string)(StringView) { nullptr }; | ||||
|     bool (*code_point_has_general_category)(u32, GeneralCategory) { nullptr }; | ||||
| 
 | ||||
|     Optional<Property> (*property_from_string)(StringView) { nullptr }; | ||||
|     bool (*code_point_has_property)(u32, Property) { nullptr }; | ||||
| 
 | ||||
|     Optional<Script> (*script_from_string)(StringView) { nullptr }; | ||||
|     bool (*code_point_has_script)(u32, Script) { nullptr }; | ||||
|     bool (*code_point_has_script_extension)(u32, Script) { nullptr }; | ||||
| 
 | ||||
| private: | ||||
|     Symbols() = default; | ||||
| }; | ||||
| 
 | ||||
| } | ||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Flynn
						Timothy Flynn