Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(208)

Side by Side Diff: components/translate/core/language_detection/language_detection_util.cc

Issue 1849323002: Adjusted to check language3[0] only and ignore the summary return. (Closed) Base URL: https://chromium.googlesource.com/chromium/src.git@master
Patch Set: Created 4 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2014 The Chromium Authors. All rights reserved. 1 // Copyright 2014 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "components/translate/core/language_detection/language_detection_util.h " 5 #include "components/translate/core/language_detection/language_detection_util.h "
6 6
7 #include <stddef.h> 7 #include <stddef.h>
8 8
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/macros.h" 10 #include "base/macros.h"
(...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after
108 int flags = 0; // No flags, see compact_lang_det.h for details. 108 int flags = 0; // No flags, see compact_lang_det.h for details.
109 int text_bytes; // Amount of non-tag/letters-only text (assumed 0). 109 int text_bytes; // Amount of non-tag/letters-only text (assumed 0).
110 double normalized_score3[3]; 110 double normalized_score3[3];
111 111
112 const char* tld_hint = ""; 112 const char* tld_hint = "";
113 int encoding_hint = CLD2::UNKNOWN_ENCODING; 113 int encoding_hint = CLD2::UNKNOWN_ENCODING;
114 CLD2::Language language_hint = CLD2::GetLanguageFromName(html_lang.c_str()); 114 CLD2::Language language_hint = CLD2::GetLanguageFromName(html_lang.c_str());
115 CLD2::CLDHints cldhints = {code.c_str(), tld_hint, encoding_hint, 115 CLD2::CLDHints cldhints = {code.c_str(), tld_hint, encoding_hint,
116 language_hint}; 116 language_hint};
117 117
118 cld_language = CLD2::ExtDetectLanguageSummaryCheckUTF8( 118 CLD2::ExtDetectLanguageSummaryCheckUTF8(
119 raw_utf8_bytes, num_utf8_bytes, is_plain_text, &cldhints, flags, 119 raw_utf8_bytes, num_utf8_bytes, is_plain_text, &cldhints, flags,
120 language3, percent3, normalized_score3, 120 language3, percent3, normalized_score3,
121 nullptr /* No ResultChunkVector used */, &text_bytes, &is_reliable, 121 nullptr /* No ResultChunkVector used */, &text_bytes, &is_reliable,
122 &num_bytes_evaluated); 122 &num_bytes_evaluated);
123 123
124 if (num_bytes_evaluated < num_utf8_bytes && 124 if (num_bytes_evaluated < num_utf8_bytes &&
125 cld_language == CLD2::UNKNOWN_LANGUAGE) { 125 language3[0] == CLD2::UNKNOWN_LANGUAGE) {
126 // Invalid UTF8 encountered, see bug http://crbug.com/444258. 126 // Invalid UTF8 encountered, see bug http://crbug.com/444258.
127 // Retry using only the valid characters. This time the check for valid 127 // Retry using only the valid characters. This time the check for valid
128 // UTF8 can be skipped since the precise number of valid bytes is known. 128 // UTF8 can be skipped since the precise number of valid bytes is known.
129 cld_language = CLD2::ExtDetectLanguageSummary( 129 CLD2::ExtDetectLanguageSummary(
130 raw_utf8_bytes, num_utf8_bytes, is_plain_text, &cldhints, flags, 130 raw_utf8_bytes, num_utf8_bytes, is_plain_text, &cldhints, flags,
131 language3, percent3, normalized_score3, 131 language3, percent3, normalized_score3,
132 nullptr /* No ResultChunkVector used */, &text_bytes, &is_reliable); 132 nullptr /* No ResultChunkVector used */, &text_bytes, &is_reliable);
133 } 133 }
134 // Choose top language.
135 cld_language = language3[0];
136
134 is_valid_language = cld_language != CLD2::NUM_LANGUAGES && 137 is_valid_language = cld_language != CLD2::NUM_LANGUAGES &&
135 cld_language != CLD2::UNKNOWN_LANGUAGE && 138 cld_language != CLD2::UNKNOWN_LANGUAGE &&
136 cld_language != CLD2::TG_UNKNOWN_LANGUAGE; 139 cld_language != CLD2::TG_UNKNOWN_LANGUAGE;
137 140
138 // Choose top language.
139 cld_language = language3[0];
140 UMA_HISTOGRAM_ENUMERATION("Translate.CLD2.LanguageDetected", 141 UMA_HISTOGRAM_ENUMERATION("Translate.CLD2.LanguageDetected",
141 cld_language, CLD2::NUM_LANGUAGES); 142 cld_language, CLD2::NUM_LANGUAGES);
142 if (is_valid_language) 143 if (is_valid_language)
143 UMA_HISTOGRAM_PERCENTAGE("Translate.CLD2.LanguageAccuracy", percent3[0]); 144 UMA_HISTOGRAM_PERCENTAGE("Translate.CLD2.LanguageAccuracy", percent3[0]);
144 145
145 146
146 #else 147 #else
147 # error "CLD_VERSION must be 1 or 2" 148 # error "CLD_VERSION must be 1 or 2"
148 #endif 149 #endif
149 150
(...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after
392 // distinguish from English, and the language is one of well-known languages 393 // distinguish from English, and the language is one of well-known languages
393 // which often provide "en-*" meta information mistakenly. 394 // which often provide "en-*" meta information mistakenly.
394 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { 395 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) {
395 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) 396 if (cld_language == kWellKnownCodesOnWrongConfiguration[i])
396 return true; 397 return true;
397 } 398 }
398 return false; 399 return false;
399 } 400 }
400 401
401 } // namespace translate 402 } // namespace translate
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698