| Index: trunk/src/chrome/common/translate/language_detection_util.cc
|
| ===================================================================
|
| --- trunk/src/chrome/common/translate/language_detection_util.cc (revision 221390)
|
| +++ trunk/src/chrome/common/translate/language_detection_util.cc (working copy)
|
| @@ -5,24 +5,15 @@
|
| #include "chrome/common/translate/language_detection_util.h"
|
|
|
| #include "base/logging.h"
|
| -#include "base/metrics/field_trial.h"
|
| #include "base/strings/string_split.h"
|
| #include "base/strings/string_util.h"
|
| -#include "base/strings/utf_string_conversions.h"
|
| #include "base/time/time.h"
|
| #include "chrome/common/chrome_constants.h"
|
| #include "chrome/common/translate/translate_common_metrics.h"
|
| #include "chrome/common/translate/translate_util.h"
|
| -
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==1
|
| #include "third_party/cld/encodings/compact_lang_det/compact_lang_det.h"
|
| #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h"
|
| -#endif
|
|
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==2
|
| -#include "third_party/cld_2/src/public/compact_lang_det.h"
|
| -#endif
|
| -
|
| namespace {
|
|
|
| // Similar language code list. Some languages are very similar and difficult
|
| @@ -70,63 +61,18 @@
|
| TranslateUtil::ToTranslateLanguageSynonym(code);
|
| }
|
|
|
| -int GetCLDMajorVersion() {
|
| -#if !defined(CLD_VERSION)
|
| - std::string group_name = base::FieldTrialList::FindFullName("CLD1VsCLD2");
|
| - if (group_name == "CLD2")
|
| - return 2;
|
| - else
|
| - return 1;
|
| -#else
|
| - return CLD_VERSION;
|
| -#endif
|
| -}
|
| -
|
| // Returns the ISO 639 language code of the specified |text|, or 'unknown' if it
|
| // failed.
|
| // |is_cld_reliable| will be set as true if CLD says the detection is reliable.
|
| std::string DetermineTextLanguage(const base::string16& text,
|
| bool* is_cld_reliable) {
|
| std::string language = chrome::kUnknownLanguageCode;
|
| + int num_languages = 0;
|
| int text_bytes = 0;
|
| bool is_reliable = false;
|
| -
|
| - // Language or CLD2::Language
|
| - int cld_language = 0;
|
| - bool is_valid_language = false;
|
| -
|
| - switch (GetCLDMajorVersion()) {
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==1
|
| - case 1: {
|
| - int num_languages = 0;
|
| - cld_language =
|
| - DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
|
| - &num_languages, NULL, &text_bytes);
|
| - is_valid_language = cld_language != NUM_LANGUAGES &&
|
| - cld_language != UNKNOWN_LANGUAGE &&
|
| - cld_language != TG_UNKNOWN_LANGUAGE;
|
| - break;
|
| - }
|
| -#endif
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==2
|
| - case 2: {
|
| - std::string utf8_text(UTF16ToUTF8(text));
|
| - CLD2::Language language3[3];
|
| - int percent3[3];
|
| - cld_language =
|
| - CLD2::DetectLanguageSummary(utf8_text.c_str(), utf8_text.size(), true,
|
| - language3, percent3,
|
| - &text_bytes, &is_reliable);
|
| - is_valid_language = cld_language != CLD2::NUM_LANGUAGES &&
|
| - cld_language != CLD2::UNKNOWN_LANGUAGE &&
|
| - cld_language != CLD2::TG_UNKNOWN_LANGUAGE;
|
| - break;
|
| - }
|
| -#endif
|
| - default:
|
| - NOTREACHED();
|
| - }
|
| -
|
| + Language cld_language =
|
| + DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
|
| + &num_languages, NULL, &text_bytes);
|
| if (is_cld_reliable != NULL)
|
| *is_cld_reliable = is_reliable;
|
|
|
| @@ -136,33 +82,15 @@
|
| // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that
|
| // the determined language code is correct with 50% confidence. Chrome should
|
| // handle the real confidence value to judge.
|
| - if (is_reliable && text_bytes >= 100 && is_valid_language) {
|
| + if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES &&
|
| + cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) {
|
| // We should not use LanguageCode_ISO_639_1 because it does not cover all
|
| // the languages CLD can detect. As a result, it'll return the invalid
|
| // language code for tradtional Chinese among others.
|
| // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and
|
| // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN
|
| // for Simplified Chinese.
|
| - switch (GetCLDMajorVersion()) {
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==1
|
| - case 1:
|
| - language =
|
| - LanguageCodeWithDialects(static_cast<Language>(cld_language));
|
| - break;
|
| -#endif
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==2
|
| - case 2:
|
| - if (cld_language == CLD2::CHINESE) {
|
| - language = "zh-CN";
|
| - } else {
|
| - language =
|
| - CLD2::LanguageCode(static_cast<CLD2::Language>(cld_language));
|
| - }
|
| - break;
|
| -#endif
|
| - default:
|
| - NOTREACHED();
|
| - }
|
| + language = LanguageCodeWithDialects(cld_language);
|
| }
|
| VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text
|
| << "\n*************************************\n";
|
| @@ -363,19 +291,7 @@
|
| }
|
|
|
| std::string GetCLDVersion() {
|
| - switch (GetCLDMajorVersion()) {
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==1
|
| - case 1:
|
| - return CompactLangDet::DetectLanguageVersion();
|
| -#endif
|
| -#if !defined(CLD_VERSION) || CLD_VERSION==2
|
| - case 2:
|
| - return CLD2::DetectLanguageVersion();
|
| -#endif
|
| - default:
|
| - NOTREACHED();
|
| - }
|
| - return "";
|
| + return CompactLangDet::DetectLanguageVersion();
|
| }
|
|
|
| } // namespace LanguageDetectionUtil
|
|
|