mirror of
				https://github.com/RGBCube/serenity
				synced 2025-10-31 13:32:45 +00:00 
			
		
		
		
	LibJS: Reject structurally invalid Unicode locale extensions
This commit is contained in:
		
							parent
							
								
									f897c2edb3
								
							
						
					
					
						commit
						94e66f500c
					
				
					 2 changed files with 59 additions and 12 deletions
				
			
		|  | @ -18,6 +18,20 @@ namespace JS::Intl { | |||
| // 6.2.2 IsStructurallyValidLanguageTag ( locale ), https://tc39.es/ecma402/#sec-isstructurallyvalidlanguagetag
 | ||||
| static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView locale) | ||||
| { | ||||
|     auto contains_duplicate_variant = [](Vector<StringView>& variants) { | ||||
|         if (variants.is_empty()) | ||||
|             return false; | ||||
| 
 | ||||
|         quick_sort(variants); | ||||
| 
 | ||||
|         for (size_t i = 0; i < variants.size() - 1; ++i) { | ||||
|             if (variants[i] == variants[i + 1]) | ||||
|                 return true; | ||||
|         } | ||||
| 
 | ||||
|         return false; | ||||
|     }; | ||||
| 
 | ||||
|     // IsStructurallyValidLanguageTag returns true if all of the following conditions hold, false otherwise:
 | ||||
| 
 | ||||
|     // locale can be generated from the EBNF grammar for unicode_locale_id in Unicode Technical Standard #35 LDML § 3.2 Unicode Locale Identifier;
 | ||||
|  | @ -31,23 +45,33 @@ static Optional<Unicode::LocaleID> is_structurally_valid_language_tag(StringView | |||
|         return {}; | ||||
| 
 | ||||
|     // the unicode_language_id within locale contains no duplicate unicode_variant_subtag subtags; and
 | ||||
|     if (auto& variants = locale_id->language_id.variants; !variants.is_empty()) { | ||||
|         quick_sort(variants); | ||||
|     if (contains_duplicate_variant(locale_id->language_id.variants)) | ||||
|         return {}; | ||||
| 
 | ||||
|         for (size_t i = 0; i < variants.size() - 1; ++i) { | ||||
|             if (variants[i] == variants[i + 1]) | ||||
|     // if locale contains an extensions* component, that component
 | ||||
|     Vector<char> unique_keys; | ||||
|     for (auto& extension : locale_id->extensions) { | ||||
|         // does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags,
 | ||||
|         // contains at most one unicode_locale_extensions component,
 | ||||
|         // contains at most one transformed_extensions component, and
 | ||||
|         char key = extension.visit( | ||||
|             [](Unicode::LocaleExtension const&) { return 'u'; }, | ||||
|             [](Unicode::TransformedExtension const&) { return 't'; }, | ||||
|             [](Unicode::OtherExtension const& ext) { return static_cast<char>(to_ascii_lowercase(ext.key)); }); | ||||
| 
 | ||||
|         if (unique_keys.contains_slow(key)) | ||||
|             return {}; | ||||
|         unique_keys.append(key); | ||||
| 
 | ||||
|         // if a transformed_extensions component that contains a tlang component is present, then
 | ||||
|         // the tlang component contains no duplicate unicode_variant_subtag subtags.
 | ||||
|         if (auto* transformed = extension.get_pointer<Unicode::TransformedExtension>()) { | ||||
|             auto& language = transformed->language; | ||||
|             if (language.has_value() && contains_duplicate_variant(language->variants)) | ||||
|                 return {}; | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     // FIXME: Handle extensions.
 | ||||
|     // if locale contains an extensions* component, that component
 | ||||
|     //     does not contain any other_extensions components with duplicate [alphanum-[tTuUxX]] subtags,
 | ||||
|     //     contains at most one unicode_locale_extensions component,
 | ||||
|     //     contains at most one transformed_extensions component, and
 | ||||
|     //     if a transformed_extensions component that contains a tlang component is present, then
 | ||||
|     //         the tlang component contains no duplicate unicode_variant_subtag subtags.
 | ||||
| 
 | ||||
|     return locale_id; | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -45,6 +45,29 @@ describe("errors", () => { | |||
|             Intl.getCanonicalLocales([true]); | ||||
|         }).toThrowWithMessage(TypeError, "true is neither an object nor a string"); | ||||
|     }); | ||||
| 
 | ||||
|     test("duplicate extension components", () => { | ||||
|         expect(() => { | ||||
|             Intl.getCanonicalLocales("en-u-aa-U-aa"); | ||||
|         }).toThrowWithMessage(RangeError, "en-u-aa-U-aa is not a structurally valid language tag"); | ||||
| 
 | ||||
|         expect(() => { | ||||
|             Intl.getCanonicalLocales("en-t-aa-T-aa"); | ||||
|         }).toThrowWithMessage(RangeError, "en-t-aa-T-aa is not a structurally valid language tag"); | ||||
| 
 | ||||
|         expect(() => { | ||||
|             Intl.getCanonicalLocales("en-z-aa-Z-aa"); | ||||
|         }).toThrowWithMessage(RangeError, "en-z-aa-Z-aa is not a structurally valid language tag"); | ||||
|     }); | ||||
| 
 | ||||
|     test("duplicate transformed extension variant subtags", () => { | ||||
|         expect(() => { | ||||
|             Intl.getCanonicalLocales("en-t-en-POSIX-POSIX"); | ||||
|         }).toThrowWithMessage( | ||||
|             RangeError, | ||||
|             "en-t-en-POSIX-POSIX is not a structurally valid language tag" | ||||
|         ); | ||||
|     }); | ||||
| }); | ||||
| 
 | ||||
| describe("normal behavior", () => { | ||||
|  |  | |||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue
	
	 Timothy Flynn
						Timothy Flynn