components/translate/core/language_detection/language_detection_util.cc - Issue 1259883007: Remove the Finch test 'CLD1VsCLD2'

Unified Diff: components/translate/core/language_detection/language_detection_util.cc

Issue 1259883007: Remove the Finch test 'CLD1VsCLD2' (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master

Patch Set: Created 5 years, 5 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: components/translate/core/language_detection/language_detection_util.cc

diff --git a/components/translate/core/language_detection/language_detection_util.cc b/components/translate/core/language_detection/language_detection_util.cc

index 5c751a7ffd59fb1427bb7597a306bfb2ba9a0d3c..26430b9bcbb241d11a06b178fd3671d31cc65a20 100644

--- a/components/translate/core/language_detection/language_detection_util.cc

+++ b/components/translate/core/language_detection/language_detection_util.cc

@@ -5,7 +5,6 @@

#include "components/translate/core/language_detection/language_detection_util.h"

#include "base/logging.h"

-#include "base/metrics/field_trial.h"

#include "base/strings/string_split.h"

#include "base/strings/string_util.h"

#include "base/strings/utf_string_conversions.h"

@@ -14,12 +13,12 @@

#include "components/translate/core/common/translate_metrics.h"

#include "components/translate/core/common/translate_util.h"

-#if !defined(CLD_VERSION) || CLD_VERSION==1

+#if CLD_VERSION==1

#include "third_party/cld/encodings/compact_lang_det/compact_lang_det.h"

#include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h"

#endif

-#if !defined(CLD_VERSION) || CLD_VERSION==2

+#if CLD_VERSION==2

#include "third_party/cld_2/src/public/compact_lang_det.h"

#endif

@@ -70,18 +69,6 @@ void ApplyLanguageCodeCorrection(std::string* code) {

translate::ToTranslateLanguageSynonym(code);

}

-int GetCLDMajorVersion() {

-#if !defined(CLD_VERSION)

- std::string group_name = base::FieldTrialList::FindFullName("CLD1VsCLD2");

- if (group_name == "CLD2")

- return 2;

- else

- return 1;

-#else

- return CLD_VERSION;

-#endif

// Returns the ISO 639 language code of the specified |text|, or 'unknown' if it

// failed.

// |is_cld_reliable| will be set as true if CLD says the detection is reliable.

@@ -96,45 +83,35 @@ std::string DetermineTextLanguage(const base::string16& text,

int cld_language = 0;

bool is_valid_language = false;

- switch (GetCLDMajorVersion()) {

-#if !defined(CLD_VERSION) || CLD_VERSION==1

- case 1: {

- int num_languages = 0;

- cld_language = DetectLanguageOfUnicodeText(

- NULL, text.c_str(), is_plain_text, &is_reliable, &num_languages, NULL,

- &num_bytes_evaluated);

- is_valid_language = cld_language != NUM_LANGUAGES &&

- cld_language != UNKNOWN_LANGUAGE &&

- cld_language != TG_UNKNOWN_LANGUAGE;

- break;

- }

+#if CLD_VERSION==1

+ int num_languages = 0;

+ cld_language = DetectLanguageOfUnicodeText(

+ NULL, text.c_str(), is_plain_text, &is_reliable, &num_languages, NULL,

+ &num_bytes_evaluated);

+ is_valid_language = cld_language != NUM_LANGUAGES &&

+ cld_language != UNKNOWN_LANGUAGE &&

Takashi Toyoshima 2015/07/30 08:41:05 wrong indent

hajimehoshi 2015/07/30 08:51:29 Done.

+ cld_language != TG_UNKNOWN_LANGUAGE;

#endif

-#if !defined(CLD_VERSION) || CLD_VERSION==2

- case 2: {

- const std::string utf8_text(base::UTF16ToUTF8(text));

- const int num_utf8_bytes = static_cast<int>(utf8_text.size());

- const char* raw_utf8_bytes = utf8_text.c_str();

- cld_language = CLD2::DetectLanguageCheckUTF8(

- raw_utf8_bytes, num_utf8_bytes, is_plain_text, &is_reliable,

- &num_bytes_evaluated);

- if (num_bytes_evaluated < num_utf8_bytes &&

- cld_language == CLD2::UNKNOWN_LANGUAGE) {

- // Invalid UTF8 encountered, see bug http://crbug.com/444258.

- // Retry using only the valid characters. This time the check for valid

- // UTF8 can be skipped since the precise number of valid bytes is known.

- cld_language = CLD2::DetectLanguage(raw_utf8_bytes, num_bytes_evaluated,

- is_plain_text, &is_reliable);

- }

- is_valid_language = cld_language != CLD2::NUM_LANGUAGES &&

- cld_language != CLD2::UNKNOWN_LANGUAGE &&

- cld_language != CLD2::TG_UNKNOWN_LANGUAGE;

- break;

- }

-#endif

- default:

- NOTREACHED();

+#if CLD_VERSION==2

+ const std::string utf8_text(base::UTF16ToUTF8(text));

+ const int num_utf8_bytes = static_cast<int>(utf8_text.size());

+ const char* raw_utf8_bytes = utf8_text.c_str();

+ cld_language = CLD2::DetectLanguageCheckUTF8(

+ raw_utf8_bytes, num_utf8_bytes, is_plain_text, &is_reliable,

+ &num_bytes_evaluated);

+ if (num_bytes_evaluated < num_utf8_bytes &&

+ cld_language == CLD2::UNKNOWN_LANGUAGE) {

+ // Invalid UTF8 encountered, see bug http://crbug.com/444258.

+ // Retry using only the valid characters. This time the check for valid

+ // UTF8 can be skipped since the precise number of valid bytes is known.

+ cld_language = CLD2::DetectLanguage(raw_utf8_bytes, num_bytes_evaluated,

+ is_plain_text, &is_reliable);

}

+ is_valid_language = cld_language != CLD2::NUM_LANGUAGES &&

+ cld_language != CLD2::UNKNOWN_LANGUAGE &&

Takashi Toyoshima 2015/07/30 08:41:05 wrong indent

hajimehoshi 2015/07/30 08:51:29 Done.

+ cld_language != CLD2::TG_UNKNOWN_LANGUAGE;

+#endif

Takashi Toyoshima 2015/07/30 08:41:05 up to you, but could be something like this? #if

hajimehoshi 2015/07/30 08:51:29 Done.

if (is_cld_reliable != NULL)

*is_cld_reliable = is_reliable;

@@ -152,37 +129,26 @@ std::string DetermineTextLanguage(const base::string16& text,

// |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and

// 'other' tables to do the 'right' thing. In addition, it'll return zh-CN

// for Simplified Chinese.

- switch (GetCLDMajorVersion()) {

-#if !defined(CLD_VERSION) || CLD_VERSION==1

- case 1:

- language =

- LanguageCodeWithDialects(static_cast<Language>(cld_language));

- break;

+#if CLD_VERSION==1

+ language = LanguageCodeWithDialects(static_cast<Language>(cld_language));

#endif

-#if !defined(CLD_VERSION) || CLD_VERSION==2

- case 2:

- // (1) CLD2's LanguageCode returns general Chinese 'zh' for

- // CLD2::CHINESE, but Translate server doesn't accept it. This is

- // converted to 'zh-CN' in the same way as CLD1's

- // LanguageCodeWithDialects.

- //

- // (2) CLD2's LanguageCode returns zh-Hant instead of zh-TW for

- // CLD2::CHINESE_T. This is technically more precise for the language

- // code of traditional Chinese, while Translate server hasn't accepted

- // zh-Hant yet.

- if (cld_language == CLD2::CHINESE) {

- language = "zh-CN";

- } else if (cld_language == CLD2::CHINESE_T) {

- language = "zh-TW";

- } else {

- language =

- CLD2::LanguageCode(static_cast<CLD2::Language>(cld_language));

- }

- break;

+#if CLD_VERSION==2

+ // (1) CLD2's LanguageCode returns general Chinese 'zh' for

+ // CLD2::CHINESE, but Translate server doesn't accept it. This is

+ // converted to 'zh-CN' in the same way as CLD1's

+ // LanguageCodeWithDialects.

+ //

+ // (2) CLD2's LanguageCode returns zh-Hant instead of zh-TW for

+ // CLD2::CHINESE_T. This is technically more precise for the language

+ // code of traditional Chinese, while Translate server hasn't accepted

+ // zh-Hant yet.

+ if (cld_language == CLD2::CHINESE)

+ language = "zh-CN";

+ else if (cld_language == CLD2::CHINESE_T)

+ language = "zh-TW";

+ else

+ language = CLD2::LanguageCode(static_cast<CLD2::Language>(cld_language));

#endif

Takashi Toyoshima 2015/07/30 08:41:05 ditto

hajimehoshi 2015/07/30 08:51:29 Done.

- default:

- NOTREACHED();

- }

}

VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text

<< "\n*************************************\n";

« build/config/BUILD.gn ('K') | « components/translate/core/language_detection/BUILD.gn ('k') | no next file » | no next file with comments »