Chromium Code Reviews| Index: extensions/renderer/i18n_custom_bindings.cc |
| diff --git a/extensions/renderer/i18n_custom_bindings.cc b/extensions/renderer/i18n_custom_bindings.cc |
| index 34dd6c89c0f9be01558bf8098b37fab95dc18ce9..fada9d8780a8a23ec9996025e35621a3671e3af8 100644 |
| --- a/extensions/renderer/i18n_custom_bindings.cc |
| +++ b/extensions/renderer/i18n_custom_bindings.cc |
| @@ -41,6 +41,10 @@ namespace { |
| // Max number of languages to detect. |
| const int kCldNumLangs = 3; |
| +// CLD3 minimum reliable byte threshold. Predictions for inputs below this size |
| +// in bytes will be considered unreliable. |
| +const int kCld3MinimumByteThreshold = 50; |
| + |
| struct DetectedLanguage { |
| DetectedLanguage(const std::string& language, int percentage) |
| : language(language), percentage(percentage) {} |
| @@ -310,8 +314,21 @@ void I18NCustomBindings::DetectTextLanguage( |
| #elif BUILDFLAG(CLD_VERSION) == 3 |
| chrome_lang_id::NNetLanguageIdentifier nnet_lang_id(/*min_num_bytes=*/0, |
| /*max_num_bytes=*/512); |
| - const std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> |
| - lang_results = nnet_lang_id.FindTopNMostFreqLangs(text, kCldNumLangs); |
| + std::vector<chrome_lang_id::NNetLanguageIdentifier::Result> lang_results = |
| + nnet_lang_id.FindTopNMostFreqLangs(text, kCldNumLangs); |
| + |
| + // is_reliable is set to false if we believe the input is too short to be |
| + // accurately identified by the current model. |
| + // |
| + // Note that when is_reliable is false, the TranslateExtension .js code |
|
Devlin
2017/04/03 20:46:10
This is a public API, so we shouldn't include impl
|
| + // gathers additional surrounding context and will try the prediction once |
| + // more. |
| + if (text.size() < kCld3MinimumByteThreshold) { |
| + for (auto& result : lang_results) { |
| + result.is_reliable = false; |
| + } |
| + } |
| + |
| LanguageDetectionResult result; |
| // Populate LanguageDetectionResult with prediction reliability, languages, |