mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 00:42:44 +00:00 
			
		
		
		
	LibUnicode: Check property list when deciding if a code point is cased
This commit is contained in:
		
							parent
							
								
									38adfd8874
								
							
						
					
					
						commit
						c45a014645
					
				
					 2 changed files with 23 additions and 3 deletions
				
			
		|  | @ -110,6 +110,14 @@ TEST_CASE(to_unicode_lowercase_special_casing_sigma) | ||||||
|     result = Unicode::to_unicode_lowercase_full("A\u03A3"sv); |     result = Unicode::to_unicode_lowercase_full("A\u03A3"sv); | ||||||
|     EXPECT_EQ(result, "a\u03C2"); |     EXPECT_EQ(result, "a\u03C2"); | ||||||
| 
 | 
 | ||||||
|  |     // Sigma preceded by FEMININE ORDINAL INDICATOR
 | ||||||
|  |     result = Unicode::to_unicode_lowercase_full("\u00AA\u03A3"sv); | ||||||
|  |     EXPECT_EQ(result, "\u00AA\u03C2"); | ||||||
|  | 
 | ||||||
|  |     // Sigma preceded by ROMAN NUMERAL ONE
 | ||||||
|  |     result = Unicode::to_unicode_lowercase_full("\u2160\u03A3"sv); | ||||||
|  |     EXPECT_EQ(result, "\u2170\u03C2"); | ||||||
|  | 
 | ||||||
|     // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
 |     // Sigma preceded by A and MONGOLIAN VOWEL SEPARATOR
 | ||||||
|     result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv); |     result = Unicode::to_unicode_lowercase_full("A\u180E\u03A3"sv); | ||||||
|     EXPECT_EQ(result, "a\u180E\u03C2"); |     EXPECT_EQ(result, "a\u180E\u03C2"); | ||||||
|  |  | ||||||
|  | @ -23,18 +23,30 @@ namespace Unicode { | ||||||
| 
 | 
 | ||||||
| #if ENABLE_UNICODE_DATA | #if ENABLE_UNICODE_DATA | ||||||
| 
 | 
 | ||||||
|  | static bool has_property(UnicodeData const& unicode_data, Property property) | ||||||
|  | { | ||||||
|  |     for (u32 i = 0; i < unicode_data.prop_list_size; ++i) { | ||||||
|  |         if (unicode_data.prop_list[i] == property) | ||||||
|  |             return true; | ||||||
|  |     } | ||||||
|  | 
 | ||||||
|  |     return false; | ||||||
|  | } | ||||||
|  | 
 | ||||||
| static bool is_cased_letter(UnicodeData const& unicode_data) | static bool is_cased_letter(UnicodeData const& unicode_data) | ||||||
| { | { | ||||||
|     // A character C is defined to be cased if and only if C has the Lowercase or Uppercase property
 |     // A character C is defined to be cased if and only if C has the Lowercase or Uppercase property
 | ||||||
|     // or has a General_Category value of Titlecase_Letter.
 |     // or has a General_Category value of Titlecase_Letter.
 | ||||||
|     switch (unicode_data.general_category) { |     switch (unicode_data.general_category) { | ||||||
|     case GeneralCategory::Ll: // FIXME: Should be Ll + Other_Lowercase (PropList.txt).
 |     case GeneralCategory::Ll: | ||||||
|     case GeneralCategory::Lu: // FIXME: Should be Lu + Other_Uppercase (PropList.txt).
 |     case GeneralCategory::Lu: | ||||||
|     case GeneralCategory::Lt: |     case GeneralCategory::Lt: | ||||||
|         return true; |         return true; | ||||||
|     default: |     default: | ||||||
|         return false; |         break; | ||||||
|     } |     } | ||||||
|  | 
 | ||||||
|  |     return has_property(unicode_data, Property::OtherLowercase) || has_property(unicode_data, Property::OtherUppercase); | ||||||
| } | } | ||||||
| 
 | 
 | ||||||
| static bool is_case_ignorable(UnicodeData const& unicode_data) | static bool is_case_ignorable(UnicodeData const& unicode_data) | ||||||
|  |  | ||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Flynn
						Timothy Flynn