| Index: components/translate/core/language_detection/language_detection_util.cc
|
| diff --git a/components/translate/core/language_detection/language_detection_util.cc b/components/translate/core/language_detection/language_detection_util.cc
|
| index 5c751a7ffd59fb1427bb7597a306bfb2ba9a0d3c..4ab86fc305b21694e1346b3fd06aa6bf9d5bf11c 100644
|
| --- a/components/translate/core/language_detection/language_detection_util.cc
|
| +++ b/components/translate/core/language_detection/language_detection_util.cc
|
| @@ -5,7 +5,6 @@
|
| #include "components/translate/core/language_detection/language_detection_util.h"
|
|
|
| #include "base/logging.h"
|
| -#include "base/metrics/field_trial.h"
|
| #include "base/strings/string_split.h"
|
| #include "base/strings/string_util.h"
|
| #include "base/strings/utf_string_conversions.h"
|
| @@ -14,12 +13,12 @@
|
| #include "components/translate/core/common/translate_metrics.h"
|
| #include "components/translate/core/common/translate_util.h"
|
|
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==1
|
| +#if CLD_VERSION==1
|
| #include "third_party/cld/encodings/compact_lang_det/compact_lang_det.h"
|
| #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h"
|
| #endif
|
|
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==2
|
| +#if CLD_VERSION==2
|
| #include "third_party/cld_2/src/public/compact_lang_det.h"
|
| #endif
|
|
|
| @@ -70,18 +69,6 @@ void ApplyLanguageCodeCorrection(std::string* code) {
|
| translate::ToTranslateLanguageSynonym(code);
|
| }
|
|
|
| -int GetCLDMajorVersion() {
|
| -#if !defined(CLD_VERSION)
|
| - std::string group_name = base::FieldTrialList::FindFullName("CLD1VsCLD2");
|
| - if (group_name == "CLD2")
|
| - return 2;
|
| - else
|
| - return 1;
|
| -#else
|
| - return CLD_VERSION;
|
| -#endif
|
| -}
|
| -
|
| // Returns the ISO 639 language code of the specified |text|, or 'unknown' if it
|
| // failed.
|
| // |is_cld_reliable| will be set as true if CLD says the detection is reliable.
|
| @@ -96,45 +83,36 @@ std::string DetermineTextLanguage(const base::string16& text,
|
| int cld_language = 0;
|
| bool is_valid_language = false;
|
|
|
| - switch (GetCLDMajorVersion()) {
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==1
|
| - case 1: {
|
| - int num_languages = 0;
|
| - cld_language = DetectLanguageOfUnicodeText(
|
| - NULL, text.c_str(), is_plain_text, &is_reliable, &num_languages, NULL,
|
| - &num_bytes_evaluated);
|
| - is_valid_language = cld_language != NUM_LANGUAGES &&
|
| - cld_language != UNKNOWN_LANGUAGE &&
|
| - cld_language != TG_UNKNOWN_LANGUAGE;
|
| - break;
|
| - }
|
| -#endif
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==2
|
| - case 2: {
|
| - const std::string utf8_text(base::UTF16ToUTF8(text));
|
| - const int num_utf8_bytes = static_cast<int>(utf8_text.size());
|
| - const char* raw_utf8_bytes = utf8_text.c_str();
|
| - cld_language = CLD2::DetectLanguageCheckUTF8(
|
| - raw_utf8_bytes, num_utf8_bytes, is_plain_text, &is_reliable,
|
| - &num_bytes_evaluated);
|
| -
|
| - if (num_bytes_evaluated < num_utf8_bytes &&
|
| - cld_language == CLD2::UNKNOWN_LANGUAGE) {
|
| - // Invalid UTF8 encountered, see bug http://crbug.com/444258.
|
| - // Retry using only the valid characters. This time the check for valid
|
| - // UTF8 can be skipped since the precise number of valid bytes is known.
|
| - cld_language = CLD2::DetectLanguage(raw_utf8_bytes, num_bytes_evaluated,
|
| - is_plain_text, &is_reliable);
|
| - }
|
| - is_valid_language = cld_language != CLD2::NUM_LANGUAGES &&
|
| - cld_language != CLD2::UNKNOWN_LANGUAGE &&
|
| - cld_language != CLD2::TG_UNKNOWN_LANGUAGE;
|
| - break;
|
| - }
|
| -#endif
|
| - default:
|
| - NOTREACHED();
|
| +#if CLD_VERSION==1
|
| + int num_languages = 0;
|
| + cld_language = DetectLanguageOfUnicodeText(
|
| + NULL, text.c_str(), is_plain_text, &is_reliable, &num_languages, NULL,
|
| + &num_bytes_evaluated);
|
| + is_valid_language = cld_language != NUM_LANGUAGES &&
|
| + cld_language != UNKNOWN_LANGUAGE &&
|
| + cld_language != TG_UNKNOWN_LANGUAGE;
|
| +#elif CLD_VERSION==2
|
| + const std::string utf8_text(base::UTF16ToUTF8(text));
|
| + const int num_utf8_bytes = static_cast<int>(utf8_text.size());
|
| + const char* raw_utf8_bytes = utf8_text.c_str();
|
| + cld_language = CLD2::DetectLanguageCheckUTF8(
|
| + raw_utf8_bytes, num_utf8_bytes, is_plain_text, &is_reliable,
|
| + &num_bytes_evaluated);
|
| +
|
| + if (num_bytes_evaluated < num_utf8_bytes &&
|
| + cld_language == CLD2::UNKNOWN_LANGUAGE) {
|
| + // Invalid UTF8 encountered, see bug http://crbug.com/444258.
|
| + // Retry using only the valid characters. This time the check for valid
|
| + // UTF8 can be skipped since the precise number of valid bytes is known.
|
| + cld_language = CLD2::DetectLanguage(raw_utf8_bytes, num_bytes_evaluated,
|
| + is_plain_text, &is_reliable);
|
| }
|
| + is_valid_language = cld_language != CLD2::NUM_LANGUAGES &&
|
| + cld_language != CLD2::UNKNOWN_LANGUAGE &&
|
| + cld_language != CLD2::TG_UNKNOWN_LANGUAGE;
|
| +#else
|
| +# error "CLD_VERSION must be 1 or 2"
|
| +#endif
|
|
|
| if (is_cld_reliable != NULL)
|
| *is_cld_reliable = is_reliable;
|
| @@ -152,37 +130,27 @@ std::string DetermineTextLanguage(const base::string16& text,
|
| // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and
|
| // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN
|
| // for Simplified Chinese.
|
| - switch (GetCLDMajorVersion()) {
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==1
|
| - case 1:
|
| - language =
|
| - LanguageCodeWithDialects(static_cast<Language>(cld_language));
|
| - break;
|
| -#endif
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==2
|
| - case 2:
|
| - // (1) CLD2's LanguageCode returns general Chinese 'zh' for
|
| - // CLD2::CHINESE, but Translate server doesn't accept it. This is
|
| - // converted to 'zh-CN' in the same way as CLD1's
|
| - // LanguageCodeWithDialects.
|
| - //
|
| - // (2) CLD2's LanguageCode returns zh-Hant instead of zh-TW for
|
| - // CLD2::CHINESE_T. This is technically more precise for the language
|
| - // code of traditional Chinese, while Translate server hasn't accepted
|
| - // zh-Hant yet.
|
| - if (cld_language == CLD2::CHINESE) {
|
| - language = "zh-CN";
|
| - } else if (cld_language == CLD2::CHINESE_T) {
|
| - language = "zh-TW";
|
| - } else {
|
| - language =
|
| - CLD2::LanguageCode(static_cast<CLD2::Language>(cld_language));
|
| - }
|
| - break;
|
| +#if CLD_VERSION==1
|
| + language = LanguageCodeWithDialects(static_cast<Language>(cld_language));
|
| +#elif CLD_VERSION==2
|
| + // (1) CLD2's LanguageCode returns general Chinese 'zh' for
|
| + // CLD2::CHINESE, but Translate server doesn't accept it. This is
|
| + // converted to 'zh-CN' in the same way as CLD1's
|
| + // LanguageCodeWithDialects.
|
| + //
|
| + // (2) CLD2's LanguageCode returns zh-Hant instead of zh-TW for
|
| + // CLD2::CHINESE_T. This is technically more precise for the language
|
| + // code of traditional Chinese, while Translate server hasn't accepted
|
| + // zh-Hant yet.
|
| + if (cld_language == CLD2::CHINESE)
|
| + language = "zh-CN";
|
| + else if (cld_language == CLD2::CHINESE_T)
|
| + language = "zh-TW";
|
| + else
|
| + language = CLD2::LanguageCode(static_cast<CLD2::Language>(cld_language));
|
| +#else
|
| +# error "CLD_VERSION must be 1 or 2"
|
| #endif
|
| - default:
|
| - NOTREACHED();
|
| - }
|
| }
|
| VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text
|
| << "\n*************************************\n";
|
|
|