mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 18:42:43 +00:00 
			
		
		
		
	LibRegex+LibUnicode: Begin implementing Unicode property escapes
This supports some binary property matching. It does not support any properties not yet parsed by LibUnicode, nor does it support value matching (such as Script_Extensions=Latin).
This commit is contained in:
		
							parent
							
								
									f1dd770a8a
								
							
						
					
					
						commit
						d485cf29d7
					
				
					 11 changed files with 230 additions and 33 deletions
				
			
		|  | @ -222,4 +222,73 @@ bool code_point_has_property([[maybe_unused]] u32 code_point, [[maybe_unused]] P | |||
| #endif | ||||
| } | ||||
| 
 | ||||
| bool is_ecma262_property([[maybe_unused]] Property property) | ||||
| { | ||||
| #if ENABLE_UNICODE_DATA | ||||
|     // EMCA-262 only allows a subset of Unicode properties: https://tc39.es/ecma262/#table-binary-unicode-properties
 | ||||
|     // Note: Some of the properties in the above link are not yet parsed by the LibUnicode generator. They are left
 | ||||
|     //       commented out here until they are parsed and can be used.
 | ||||
|     switch (property) { | ||||
|     case Unicode::Property::ASCII: | ||||
|     case Unicode::Property::ASCII_Hex_Digit: | ||||
|     case Unicode::Property::Alphabetic: | ||||
|     case Unicode::Property::Any: | ||||
|     case Unicode::Property::Assigned: | ||||
|     case Unicode::Property::Bidi_Control: | ||||
|     // case Unicode::Property::Bidi_Mirrored:
 | ||||
|     case Unicode::Property::Case_Ignorable: | ||||
|     case Unicode::Property::Cased: | ||||
|     case Unicode::Property::Changes_When_Casefolded: | ||||
|     case Unicode::Property::Changes_When_Casemapped: | ||||
|     case Unicode::Property::Changes_When_Lowercased: | ||||
|     // case Unicode::Property::Changes_When_NFKC_Casefolded:
 | ||||
|     case Unicode::Property::Changes_When_Titlecased: | ||||
|     case Unicode::Property::Changes_When_Uppercased: | ||||
|     case Unicode::Property::Dash: | ||||
|     case Unicode::Property::Default_Ignorable_Code_Point: | ||||
|     case Unicode::Property::Deprecated: | ||||
|     case Unicode::Property::Diacritic: | ||||
|     // case Unicode::Property::Emoji:
 | ||||
|     // case Unicode::Property::Emoji_Component:
 | ||||
|     // case Unicode::Property::Emoji_Modifier:
 | ||||
|     // case Unicode::Property::Emoji_Modifier_Base:
 | ||||
|     // case Unicode::Property::Emoji_Presentation:
 | ||||
|     // case Unicode::Property::Extended_Pictographic:
 | ||||
|     case Unicode::Property::Extender: | ||||
|     case Unicode::Property::Grapheme_Base: | ||||
|     case Unicode::Property::Grapheme_Extend: | ||||
|     case Unicode::Property::Hex_Digit: | ||||
|     case Unicode::Property::IDS_Binary_Operator: | ||||
|     case Unicode::Property::IDS_Trinary_Operator: | ||||
|     case Unicode::Property::ID_Continue: | ||||
|     case Unicode::Property::ID_Start: | ||||
|     case Unicode::Property::Ideographic: | ||||
|     case Unicode::Property::Join_Control: | ||||
|     case Unicode::Property::Logical_Order_Exception: | ||||
|     case Unicode::Property::Lowercase: | ||||
|     case Unicode::Property::Math: | ||||
|     case Unicode::Property::Noncharacter_Code_Point: | ||||
|     case Unicode::Property::Pattern_Syntax: | ||||
|     case Unicode::Property::Pattern_White_Space: | ||||
|     case Unicode::Property::Quotation_Mark: | ||||
|     case Unicode::Property::Radical: | ||||
|     case Unicode::Property::Regional_Indicator: | ||||
|     case Unicode::Property::Sentence_Terminal: | ||||
|     case Unicode::Property::Soft_Dotted: | ||||
|     case Unicode::Property::Terminal_Punctuation: | ||||
|     case Unicode::Property::Unified_Ideograph: | ||||
|     case Unicode::Property::Uppercase: | ||||
|     case Unicode::Property::Variation_Selector: | ||||
|     case Unicode::Property::White_Space: | ||||
|     case Unicode::Property::XID_Continue: | ||||
|     case Unicode::Property::XID_Start: | ||||
|         return true; | ||||
|     default: | ||||
|         return false; | ||||
|     } | ||||
| #else | ||||
|     return false; | ||||
| #endif | ||||
| } | ||||
| 
 | ||||
| } | ||||
|  |  | |||
|  | @ -23,5 +23,6 @@ String to_unicode_uppercase_full(StringView const&); | |||
| 
 | ||||
| Optional<Property> property_from_string(StringView const&); | ||||
| bool code_point_has_property(u32 code_point, Property property); | ||||
| bool is_ecma262_property(Property); | ||||
| 
 | ||||
| } | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Flynn
						Timothy Flynn