mas_i18n/
translator.rs

1// Copyright 2024, 2025 New Vector Ltd.
2// Copyright 2023, 2024 The Matrix.org Foundation C.I.C.
3//
4// SPDX-License-Identifier: AGPL-3.0-only OR LicenseRef-Element-Commercial
5// Please see LICENSE files in the repository root for full details.
6
7use std::{collections::HashMap, fs::File, io::BufReader};
8
9use camino::{Utf8Path, Utf8PathBuf};
10use icu_experimental::relativetime::{
11    RelativeTimeFormatter, RelativeTimeFormatterOptions, options::Numeric,
12};
13use icu_locale::fallback::{LocaleFallbackConfig, LocaleFallbacker};
14use icu_locale_core::{Locale, ParseError};
15use icu_plurals::PluralRules;
16use icu_provider::prelude::*;
17use thiserror::Error;
18use writeable::Writeable;
19
20use crate::{sprintf::Message, translations::TranslationTree};
21
22/// Convert a DataLocale to a Locale for use with ICU4X formatters.
23///
24/// Since there's no direct conversion from DataLocale to Locale, we use string
25/// parsing. Falls back to the "und" (undetermined) locale on parse errors.
26fn data_locale_to_locale(locale: &DataLocale) -> Locale {
27    locale
28        .to_string()
29        .parse()
30        .unwrap_or_else(|_| icu_locale_core::locale!("und"))
31}
32
33/// Error type for loading translations
34#[derive(Debug, Error)]
35pub enum LoadError {
36    #[error("Failed to load translation directory {path:?}")]
37    ReadDir {
38        path: Utf8PathBuf,
39        #[source]
40        source: std::io::Error,
41    },
42
43    #[error("Failed to read translation file {path:?}")]
44    ReadFile {
45        path: Utf8PathBuf,
46        #[source]
47        source: std::io::Error,
48    },
49
50    #[error("Failed to deserialize translation file {path:?}")]
51    Deserialize {
52        path: Utf8PathBuf,
53        #[source]
54        source: serde_json::Error,
55    },
56
57    #[error("Invalid locale for file {path:?}")]
58    InvalidLocale {
59        path: Utf8PathBuf,
60        #[source]
61        source: ParseError,
62    },
63
64    #[error("Invalid file name {path:?}")]
65    InvalidFileName { path: Utf8PathBuf },
66}
67
68/// A translator for a set of translations.
69#[derive(Debug)]
70pub struct Translator {
71    translations: HashMap<DataLocale, TranslationTree>,
72    fallbacker: LocaleFallbacker,
73    default_locale: DataLocale,
74}
75
76impl Translator {
77    /// Create a new translator from a set of translations.
78    #[must_use]
79    pub fn new(translations: HashMap<DataLocale, TranslationTree>) -> Self {
80        let fallbacker = LocaleFallbacker::new().static_to_owned();
81
82        Self {
83            translations,
84            fallbacker,
85            // TODO: make this configurable
86            default_locale: icu_locale_core::locale!("en").into(),
87        }
88    }
89
90    /// Load a set of translations from a directory.
91    ///
92    /// The directory should contain one JSON file per locale, with the locale
93    /// being the filename without the extension, e.g. `en-US.json`.
94    ///
95    /// # Parameters
96    ///
97    /// * `path` - The path to load from.
98    ///
99    /// # Errors
100    ///
101    /// Returns an error if the directory cannot be read, or if any of the files
102    /// cannot be parsed.
103    pub fn load_from_path(path: &Utf8Path) -> Result<Self, LoadError> {
104        let mut translations = HashMap::new();
105
106        let dir = path.read_dir_utf8().map_err(|source| LoadError::ReadDir {
107            path: path.to_owned(),
108            source,
109        })?;
110
111        for entry in dir {
112            let entry = entry.map_err(|source| LoadError::ReadDir {
113                path: path.to_owned(),
114                source,
115            })?;
116            let path = entry.into_path();
117            let Some(name) = path.file_stem() else {
118                return Err(LoadError::InvalidFileName { path });
119            };
120
121            let locale: Locale = match name.parse() {
122                Ok(locale) => locale,
123                Err(source) => return Err(LoadError::InvalidLocale { path, source }),
124            };
125
126            let file = match File::open(&path) {
127                Ok(file) => file,
128                Err(source) => return Err(LoadError::ReadFile { path, source }),
129            };
130
131            let mut reader = BufReader::new(file);
132
133            let content = match serde_json::from_reader(&mut reader) {
134                Ok(content) => content,
135                Err(source) => return Err(LoadError::Deserialize { path, source }),
136            };
137
138            translations.insert(locale.into(), content);
139        }
140
141        Ok(Self::new(translations))
142    }
143
144    /// Get a message from the tree by key, with locale fallback.
145    ///
146    /// Returns the message and the locale it was found in.
147    /// If the message is not found, returns `None`.
148    ///
149    /// # Parameters
150    ///
151    /// * `locale` - The locale to use.
152    /// * `key` - The key to look up, which is a dot-separated path.
153    #[must_use]
154    pub fn message_with_fallback(
155        &self,
156        locale: DataLocale,
157        key: &str,
158    ) -> Option<(&Message, DataLocale)> {
159        if let Ok(message) = self.message(&locale, key) {
160            return Some((message, locale));
161        }
162
163        let mut iter = self
164            .fallbacker
165            .for_config(LocaleFallbackConfig::default())
166            .fallback_for(locale);
167
168        loop {
169            let locale = iter.get();
170
171            if let Ok(message) = self.message(locale, key) {
172                return Some((message, iter.take()));
173            }
174
175            // Try the defaut locale if we hit the `und` locale
176            if locale.is_unknown() {
177                let message = self.message(&self.default_locale, key).ok()?;
178                return Some((message, self.default_locale.clone()));
179            }
180
181            iter.step();
182        }
183    }
184
185    /// Get a message from the tree by key.
186    ///
187    /// # Parameters
188    ///
189    /// * `locale` - The locale to use.
190    /// * `key` - The key to look up, which is a dot-separated path.
191    ///
192    /// # Errors
193    ///
194    /// Returns an error if the requested locale is not found, or if the
195    /// requested key is not found.
196    pub fn message(&self, locale: &DataLocale, key: &str) -> Result<&Message, DataError> {
197        let tree = self
198            .translations
199            .get(locale)
200            .ok_or_else(|| DataErrorKind::IdentifierNotFound.into_error())?;
201
202        let message = tree
203            .message(key)
204            .ok_or_else(|| DataErrorKind::MarkerNotFound.into_error())?;
205
206        Ok(message)
207    }
208
209    /// Get a plural message from the tree by key, with locale fallback.
210    ///
211    /// Returns the message and the locale it was found in.
212    /// If the message is not found, returns `None`.
213    ///
214    /// # Parameters
215    ///
216    /// * `locale` - The locale to use.
217    /// * `key` - The key to look up, which is a dot-separated path.
218    /// * `count` - The count to use for pluralization.
219    #[must_use]
220    pub fn plural_with_fallback(
221        &self,
222        locale: DataLocale,
223        key: &str,
224        count: usize,
225    ) -> Option<(&Message, DataLocale)> {
226        let mut iter = self
227            .fallbacker
228            .for_config(LocaleFallbackConfig::default())
229            .fallback_for(locale);
230
231        loop {
232            let locale = iter.get();
233
234            if let Ok(message) = self.plural(locale, key, count) {
235                return Some((message, iter.take()));
236            }
237
238            // Stop if we hit the `und` locale
239            if locale.is_unknown() {
240                return None;
241            }
242
243            iter.step();
244        }
245    }
246
247    /// Get a plural message from the tree by key.
248    ///
249    /// # Parameters
250    ///
251    /// * `locale` - The locale to use.
252    /// * `key` - The key to look up, which is a dot-separated path.
253    /// * `count` - The count to use for pluralization.
254    ///
255    /// # Errors
256    ///
257    /// Returns an error if the requested locale is not found, or if the
258    /// requested key is not found.
259    pub fn plural(
260        &self,
261        locale: &DataLocale,
262        key: &str,
263        count: usize,
264    ) -> Result<&Message, DataError> {
265        let plurals = PluralRules::try_new_cardinal(data_locale_to_locale(locale).into())?;
266        let category = plurals.category_for(count);
267
268        let tree = self
269            .translations
270            .get(locale)
271            .ok_or_else(|| DataErrorKind::IdentifierNotFound.into_error())?;
272
273        let message = tree
274            .pluralize(key, category)
275            .ok_or_else(|| DataErrorKind::MarkerNotFound.into_error())?;
276
277        Ok(message)
278    }
279
280    /// Format a relative date
281    ///
282    /// # Parameters
283    ///
284    /// * `locale` - The locale to use.
285    /// * `days` - The number of days to format, where 0 = today, 1 = tomorrow,
286    ///   -1 = yesterday, etc.
287    ///
288    /// # Errors
289    ///
290    /// Returns an error if the requested locale is not found.
291    pub fn relative_date(&self, locale: &DataLocale, days: i64) -> Result<String, DataError> {
292        // TODO: this is not using the fallbacker
293        let formatter = RelativeTimeFormatter::try_new_long_day(
294            data_locale_to_locale(locale).into(),
295            RelativeTimeFormatterOptions {
296                numeric: Numeric::Auto,
297            },
298        )?;
299
300        let date = formatter.format(days.into());
301        Ok(date.write_to_string().into_owned())
302    }
303
304    /// Format time
305    ///
306    /// # Parameters
307    ///
308    /// * `locale` - The locale to use.
309    /// * `time` - The time to format.
310    ///
311    /// # Errors
312    ///
313    /// Returns an error if the requested locale is not found.
314    pub fn short_time(
315        &self,
316        locale: &DataLocale,
317        time: &icu_datetime::input::Time,
318    ) -> Result<String, icu_datetime::DateTimeFormatterLoadError> {
319        // TODO: this is not using the fallbacker
320        let formatter = icu_datetime::NoCalendarFormatter::try_new(
321            data_locale_to_locale(locale).into(),
322            icu_datetime::fieldsets::T::short(),
323        )?;
324
325        Ok(formatter.format(time).to_string())
326    }
327
328    /// Get a list of available locales.
329    #[must_use]
330    pub fn available_locales(&self) -> Vec<DataLocale> {
331        self.translations.keys().cloned().collect()
332    }
333
334    /// Check if a locale is available.
335    #[must_use]
336    pub fn has_locale(&self, locale: &DataLocale) -> bool {
337        self.translations.contains_key(locale)
338    }
339
340    /// Choose the best available locale from a list of candidates.
341    #[must_use]
342    pub fn choose_locale(&self, iter: impl Iterator<Item = DataLocale>) -> DataLocale {
343        for locale in iter {
344            if self.has_locale(&locale) {
345                return locale;
346            }
347
348            let mut fallbacker = self
349                .fallbacker
350                .for_config(LocaleFallbackConfig::default())
351                .fallback_for(locale);
352
353            loop {
354                if fallbacker.get().is_unknown() {
355                    break;
356                }
357
358                if self.has_locale(fallbacker.get()) {
359                    return fallbacker.take();
360                }
361                fallbacker.step();
362            }
363        }
364
365        self.default_locale.clone()
366    }
367}
368
369#[cfg(test)]
370mod tests {
371    use camino::Utf8PathBuf;
372    use icu_locale_core::locale;
373
374    use crate::{sprintf::arg_list, translator::Translator};
375
376    fn translator() -> Translator {
377        let root: Utf8PathBuf = env!("CARGO_MANIFEST_DIR").parse().unwrap();
378        let test_data = root.join("test_data");
379        Translator::load_from_path(&test_data).unwrap()
380    }
381
382    #[test]
383    fn test_message() {
384        let translator = translator();
385
386        let message = translator.message(&locale!("en").into(), "hello").unwrap();
387        let formatted = message.format(&arg_list!()).unwrap();
388        assert_eq!(formatted, "Hello!");
389
390        let message = translator.message(&locale!("fr").into(), "hello").unwrap();
391        let formatted = message.format(&arg_list!()).unwrap();
392        assert_eq!(formatted, "Bonjour !");
393
394        let message = translator
395            .message(&locale!("en-US").into(), "hello")
396            .unwrap();
397        let formatted = message.format(&arg_list!()).unwrap();
398        assert_eq!(formatted, "Hey!");
399
400        // Try the fallback chain
401        let result = translator.message(&locale!("en-US").into(), "goodbye");
402        assert!(result.is_err());
403
404        let (message, locale) = translator
405            .message_with_fallback(locale!("en-US").into(), "goodbye")
406            .unwrap();
407        let formatted = message.format(&arg_list!()).unwrap();
408        assert_eq!(formatted, "Goodbye!");
409        assert_eq!(locale, locale!("en").into());
410    }
411
412    #[test]
413    fn test_plurals() {
414        let translator = translator();
415
416        let message = translator
417            .plural(&locale!("en").into(), "active_sessions", 1)
418            .unwrap();
419        let formatted = message.format(&arg_list!(count = 1)).unwrap();
420        assert_eq!(formatted, "1 active session.");
421
422        let message = translator
423            .plural(&locale!("en").into(), "active_sessions", 2)
424            .unwrap();
425        let formatted = message.format(&arg_list!(count = 2)).unwrap();
426        assert_eq!(formatted, "2 active sessions.");
427
428        // In english, zero is plural
429        let message = translator
430            .plural(&locale!("en").into(), "active_sessions", 0)
431            .unwrap();
432        let formatted = message.format(&arg_list!(count = 0)).unwrap();
433        assert_eq!(formatted, "0 active sessions.");
434
435        let message = translator
436            .plural(&locale!("fr").into(), "active_sessions", 1)
437            .unwrap();
438        let formatted = message.format(&arg_list!(count = 1)).unwrap();
439        assert_eq!(formatted, "1 session active.");
440
441        let message = translator
442            .plural(&locale!("fr").into(), "active_sessions", 2)
443            .unwrap();
444        let formatted = message.format(&arg_list!(count = 2)).unwrap();
445        assert_eq!(formatted, "2 sessions actives.");
446
447        // In french, zero is singular
448        let message = translator
449            .plural(&locale!("fr").into(), "active_sessions", 0)
450            .unwrap();
451        let formatted = message.format(&arg_list!(count = 0)).unwrap();
452        assert_eq!(formatted, "0 session active.");
453
454        // Try the fallback chain
455        let result = translator.plural(&locale!("en-US").into(), "active_sessions", 1);
456        assert!(result.is_err());
457
458        let (message, locale) = translator
459            .plural_with_fallback(locale!("en-US").into(), "active_sessions", 1)
460            .unwrap();
461        let formatted = message.format(&arg_list!(count = 1)).unwrap();
462        assert_eq!(formatted, "1 active session.");
463        assert_eq!(locale, locale!("en").into());
464    }
465}