| Index: extensions/renderer/i18n_custom_bindings.cc
|
| diff --git a/extensions/renderer/i18n_custom_bindings.cc b/extensions/renderer/i18n_custom_bindings.cc
|
| index 34dd6c89c0f9be01558bf8098b37fab95dc18ce9..57e0e96e2e8bfb4f1037fc3a2523056540731e56 100644
|
| --- a/extensions/renderer/i18n_custom_bindings.cc
|
| +++ b/extensions/renderer/i18n_custom_bindings.cc
|
| @@ -41,6 +41,10 @@ namespace {
|
| // Max number of languages to detect.
|
| const int kCldNumLangs = 3;
|
|
|
| +// CLD3 minimum reliable byte threshold. Predictions for inputs below this size
|
| +// in bytes will be considered unreliable.
|
| +const int kCld3MinimumByteThreshold = 50;
|
| +
|
| struct DetectedLanguage {
|
| DetectedLanguage(const std::string& language, int percentage)
|
| : language(language), percentage(percentage) {}
|
| @@ -310,8 +314,17 @@ void I18NCustomBindings::DetectTextLanguage(
|
| #elif BUILDFLAG(CLD_VERSION) == 3
|
| chrome_lang_id::NNetLanguageIdentifier nnet_lang_id(/*min_num_bytes=*/0,
|
| /*max_num_bytes=*/512);
|
| - const std::vector<chrome_lang_id::NNetLanguageIdentifier::Result>
|
| - lang_results = nnet_lang_id.FindTopNMostFreqLangs(text, kCldNumLangs);
|
| + std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> lang_results =
|
| + nnet_lang_id.FindTopNMostFreqLangs(text, kCldNumLangs);
|
| +
|
| + // is_reliable is set to false if we believe the input is too short to be
|
| + // accurately identified by the current model.
|
| + if (text.size() < kCld3MinimumByteThreshold) {
|
| + for (auto& result : lang_results) {
|
| + result.is_reliable = false;
|
| + }
|
| + }
|
| +
|
| LanguageDetectionResult result;
|
|
|
| // Populate LanguageDetectionResult with prediction reliability, languages,
|
|
|