From 5b2078b02dd5ebdad5f2ed64d773f99d7d38a071 Mon Sep 17 00:00:00 2001 From: Sylvestre Ledru Date: Wed, 4 Jun 2025 22:30:52 +0200 Subject: [PATCH] l10n: Disable Unicode directional isolate characters (U+2068, U+2069) --- src/uucore/src/lib/mods/locale.rs | 45 ++++++++++++++++++------------- 1 file changed, 26 insertions(+), 19 deletions(-) diff --git a/src/uucore/src/lib/mods/locale.rs b/src/uucore/src/lib/mods/locale.rs index 7bd3c0988..bb4040cd5 100644 --- a/src/uucore/src/lib/mods/locale.rs +++ b/src/uucore/src/lib/mods/locale.rs @@ -150,6 +150,13 @@ fn create_bundle( let mut bundle = FluentBundle::new(vec![locale.clone()]); + // Disable Unicode directional isolate characters (U+2068, U+2069) + // By default, Fluent wraps variables for security + // and proper text rendering in mixed-script environments (Arabic + English). + // Disabling gives cleaner output: "Welcome, Alice!" but reduces protection + // against bidirectional text attacks. Safe for English-only applications. + bundle.set_use_isolating(false); + bundle.add_resource(resource).map_err(|errs| { LocalizationError::Bundle(format!( "Failed to add resource to bundle for {}: {:?}", @@ -527,7 +534,7 @@ invalid-syntax = This is { $missing args.set("name", "Alice"); let result = localizer.format("welcome", Some(&args)); - assert_eq!(result, "Welcome, \u{2068}Alice\u{2069}!"); + assert_eq!(result, "Welcome, Alice!"); } #[test] @@ -687,7 +694,7 @@ invalid-syntax = This is { $missing args.insert("name".to_string(), "Bob".to_string()); let message = get_message_with_args("welcome", args); - assert_eq!(message, "Welcome, \u{2068}Bob\u{2069}!"); + assert_eq!(message, "Welcome, Bob!"); }) .join() .unwrap(); @@ -705,18 +712,17 @@ invalid-syntax = This is { $missing let mut args1 = HashMap::new(); args1.insert("count".to_string(), "1".to_string()); let message1 = get_message_with_args("count-items", args1); - assert_eq!(message1, "You have \u{2068}\u{2068}1\u{2069} item\u{2069}"); + assert_eq!(message1, "You have 1 item"); // Test plural let mut args2 = HashMap::new(); args2.insert("count".to_string(), "5".to_string()); let message2 = get_message_with_args("count-items", args2); - assert_eq!(message2, "You have \u{2068}\u{2068}5\u{2069} items\u{2069}"); + assert_eq!(message2, "You have 5 items"); }) .join() .unwrap(); } - #[test] fn test_detect_system_locale_from_lang_env() { // Save current LANG value @@ -917,13 +923,13 @@ invalid-syntax = This is { $missing let mut args = HashMap::new(); args.insert("name".to_string(), "田中".to_string()); let welcome = get_message_with_args("welcome", args); - assert_eq!(welcome, "ようこそ、\u{2068}田中\u{2069}さん!"); + assert_eq!(welcome, "ようこそ、田中さん!"); // Test Japanese count (no pluralization) let mut count_args = HashMap::new(); count_args.insert("count".to_string(), "5".to_string()); let count_message = get_message_with_args("count-items", count_args); - assert_eq!(count_message, "\u{2068}5\u{2069}個のアイテムがあります"); + assert_eq!(count_message, "5個のアイテムがあります"); }) .join() .unwrap(); @@ -946,37 +952,38 @@ invalid-syntax = This is { $missing let mut args = HashMap::new(); args.insert("name".to_string(), "أحمد".to_string()); let welcome = get_message_with_args("welcome", args); - assert_eq!(welcome, "أهلاً وسهلاً، \u{2068}أحمد\u{2069}!"); + + assert_eq!(welcome, "أهلاً وسهلاً، أحمد!"); // Test Arabic pluralization (zero case) let mut args_zero = HashMap::new(); args_zero.insert("count".to_string(), "0".to_string()); let message_zero = get_message_with_args("count-items", args_zero); - assert_eq!(message_zero, "لديك \u{2068}لا عناصر\u{2069}"); + assert_eq!(message_zero, "لديك لا عناصر"); // Test Arabic pluralization (one case) let mut args_one = HashMap::new(); args_one.insert("count".to_string(), "1".to_string()); let message_one = get_message_with_args("count-items", args_one); - assert_eq!(message_one, "لديك \u{2068}عنصر واحد\u{2069}"); + assert_eq!(message_one, "لديك عنصر واحد"); // Test Arabic pluralization (two case) let mut args_two = HashMap::new(); args_two.insert("count".to_string(), "2".to_string()); let message_two = get_message_with_args("count-items", args_two); - assert_eq!(message_two, "لديك \u{2068}عنصران\u{2069}"); + assert_eq!(message_two, "لديك عنصران"); // Test Arabic pluralization (few case - 3-10) let mut args_few = HashMap::new(); args_few.insert("count".to_string(), "5".to_string()); let message_few = get_message_with_args("count-items", args_few); - assert_eq!(message_few, "لديك \u{2068}\u{2068}5\u{2069} عناصر\u{2069}"); + assert_eq!(message_few, "لديك 5 عناصر"); // Test Arabic pluralization (other case - 11+) let mut args_many = HashMap::new(); args_many.insert("count".to_string(), "15".to_string()); let message_many = get_message_with_args("count-items", args_many); - assert_eq!(message_many, "لديك \u{2068}\u{2068}15\u{2069} عنصر\u{2069}"); + assert_eq!(message_many, "لديك 15 عنصر"); }) .join() .unwrap(); @@ -1002,23 +1009,23 @@ invalid-syntax = This is { $missing .join() .unwrap(); } - #[test] - fn test_unicode_directional_isolation() { + fn test_unicode_directional_isolation_disabled() { std::thread::spawn(|| { let temp_dir = create_test_locales_dir(); let locale = LanguageIdentifier::from_str("ar-SA").unwrap(); init_localization(&locale, temp_dir.path()).unwrap(); - // Test that Latin script names are properly isolated in RTL context + // Test that Latin script names are NOT isolated in RTL context + // since we disabled Unicode directional isolation let mut args = HashMap::new(); args.insert("name".to_string(), "John Smith".to_string()); let message = get_message_with_args("welcome", args); - // The Latin name should be wrapped in directional isolate characters - assert!(message.contains("\u{2068}John Smith\u{2069}")); - assert_eq!(message, "أهلاً وسهلاً، \u{2068}John Smith\u{2069}!"); + // The Latin name should NOT be wrapped in directional isolate characters + assert!(!message.contains("\u{2068}John Smith\u{2069}")); + assert_eq!(message, "أهلاً وسهلاً، John Smith!"); }) .join() .unwrap();