Chromium Code Reviews| Index: components/translate/core/language_detection/language_detection_util.cc |
| diff --git a/components/translate/core/language_detection/language_detection_util.cc b/components/translate/core/language_detection/language_detection_util.cc |
| index 35a21b9019e007035260299103fcf259c355070c..983ac40a1c954fdb193869eecac2b8e0cbe2cca7 100644 |
| --- a/components/translate/core/language_detection/language_detection_util.cc |
| +++ b/components/translate/core/language_detection/language_detection_util.cc |
| @@ -111,12 +111,18 @@ std::string DetermineTextLanguage(const base::string16& text, |
| #if !defined(CLD_VERSION) || CLD_VERSION==2 |
| case 2: { |
| std::string utf8_text(base::UTF16ToUTF8(text)); |
| - CLD2::Language language3[3]; |
| - int percent3[3]; |
| - CLD2::DetectLanguageSummary( |
| - utf8_text.c_str(), (int)utf8_text.size(), true, language3, percent3, |
| - &text_bytes, &is_reliable); |
| - cld_language = language3[0]; |
| + cld_language = CLD2::DetectLanguageCheckUTF8( |
| + utf8_text.c_str(), (int)utf8_text.size(), true /* is_plain_text */, |
|
droger
2015/05/07 11:56:57
Could you use static_cast here instead of C-style
Andrew Hayden (chromium.org)
2015/05/07 12:24:05
Done.
|
| + &is_reliable, &text_bytes); |
| + if (text_bytes < (int)utf8_text.size() && |
|
droger
2015/05/07 11:56:56
same here.
Andrew Hayden (chromium.org)
2015/05/07 12:24:05
Done.
|
| + cld_language != CLD2::UNKNOWN_LANGUAGE) { |
|
Andrew Hayden (chromium.org)
2015/05/07 12:24:05
That should be a ==, not a !=.
|
| + // Invalid UTF8 encountered, retry using just text_bytes of data. |
| + // In practice this shouldn't happen, as Chromium should sanitize the |
|
droger
2015/05/07 11:56:56
Do you want to add a NOTREACHED here?
Andrew Hayden (chromium.org)
2015/05/07 12:24:05
Done.
|
| + // text data prior to exposing it to any processing internally. |
| + cld_language = CLD2::DetectLanguageCheckUTF8( |
| + utf8_text.c_str(), text_bytes, true /* is_plain_text */, |
| + &is_reliable, &text_bytes); |
| + } |
| is_valid_language = cld_language != CLD2::NUM_LANGUAGES && |
| cld_language != CLD2::UNKNOWN_LANGUAGE && |
| cld_language != CLD2::TG_UNKNOWN_LANGUAGE; |