| OLD | NEW |
| 1 // Copyright 2014 The Chromium Authors. All rights reserved. | 1 // Copyright 2014 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "components/translate/core/language_detection/language_detection_util.h
" | 5 #include "components/translate/core/language_detection/language_detection_util.h
" |
| 6 | 6 |
| 7 #include <stddef.h> | 7 #include <stddef.h> |
| 8 | 8 |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/macros.h" | 10 #include "base/macros.h" |
| (...skipping 97 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 108 int flags = 0; // No flags, see compact_lang_det.h for details. | 108 int flags = 0; // No flags, see compact_lang_det.h for details. |
| 109 int text_bytes; // Amount of non-tag/letters-only text (assumed 0). | 109 int text_bytes; // Amount of non-tag/letters-only text (assumed 0). |
| 110 double normalized_score3[3]; | 110 double normalized_score3[3]; |
| 111 | 111 |
| 112 const char* tld_hint = ""; | 112 const char* tld_hint = ""; |
| 113 int encoding_hint = CLD2::UNKNOWN_ENCODING; | 113 int encoding_hint = CLD2::UNKNOWN_ENCODING; |
| 114 CLD2::Language language_hint = CLD2::GetLanguageFromName(html_lang.c_str()); | 114 CLD2::Language language_hint = CLD2::GetLanguageFromName(html_lang.c_str()); |
| 115 CLD2::CLDHints cldhints = {code.c_str(), tld_hint, encoding_hint, | 115 CLD2::CLDHints cldhints = {code.c_str(), tld_hint, encoding_hint, |
| 116 language_hint}; | 116 language_hint}; |
| 117 | 117 |
| 118 cld_language = CLD2::ExtDetectLanguageSummaryCheckUTF8( | 118 CLD2::ExtDetectLanguageSummaryCheckUTF8( |
| 119 raw_utf8_bytes, num_utf8_bytes, is_plain_text, &cldhints, flags, | 119 raw_utf8_bytes, num_utf8_bytes, is_plain_text, &cldhints, flags, |
| 120 language3, percent3, normalized_score3, | 120 language3, percent3, normalized_score3, |
| 121 nullptr /* No ResultChunkVector used */, &text_bytes, &is_reliable, | 121 nullptr /* No ResultChunkVector used */, &text_bytes, &is_reliable, |
| 122 &num_bytes_evaluated); | 122 &num_bytes_evaluated); |
| 123 | 123 |
| 124 if (num_bytes_evaluated < num_utf8_bytes && | 124 if (num_bytes_evaluated < num_utf8_bytes && |
| 125 cld_language == CLD2::UNKNOWN_LANGUAGE) { | 125 language3[0] == CLD2::UNKNOWN_LANGUAGE) { |
| 126 // Invalid UTF8 encountered, see bug http://crbug.com/444258. | 126 // Invalid UTF8 encountered, see bug http://crbug.com/444258. |
| 127 // Retry using only the valid characters. This time the check for valid | 127 // Retry using only the valid characters. This time the check for valid |
| 128 // UTF8 can be skipped since the precise number of valid bytes is known. | 128 // UTF8 can be skipped since the precise number of valid bytes is known. |
| 129 cld_language = CLD2::ExtDetectLanguageSummary( | 129 CLD2::ExtDetectLanguageSummary( |
| 130 raw_utf8_bytes, num_utf8_bytes, is_plain_text, &cldhints, flags, | 130 raw_utf8_bytes, num_utf8_bytes, is_plain_text, &cldhints, flags, |
| 131 language3, percent3, normalized_score3, | 131 language3, percent3, normalized_score3, |
| 132 nullptr /* No ResultChunkVector used */, &text_bytes, &is_reliable); | 132 nullptr /* No ResultChunkVector used */, &text_bytes, &is_reliable); |
| 133 } | 133 } |
| 134 // Choose top language. |
| 135 cld_language = language3[0]; |
| 136 |
| 134 is_valid_language = cld_language != CLD2::NUM_LANGUAGES && | 137 is_valid_language = cld_language != CLD2::NUM_LANGUAGES && |
| 135 cld_language != CLD2::UNKNOWN_LANGUAGE && | 138 cld_language != CLD2::UNKNOWN_LANGUAGE && |
| 136 cld_language != CLD2::TG_UNKNOWN_LANGUAGE; | 139 cld_language != CLD2::TG_UNKNOWN_LANGUAGE; |
| 137 | 140 |
| 138 // Choose top language. | |
| 139 cld_language = language3[0]; | |
| 140 UMA_HISTOGRAM_ENUMERATION("Translate.CLD2.LanguageDetected", | 141 UMA_HISTOGRAM_ENUMERATION("Translate.CLD2.LanguageDetected", |
| 141 cld_language, CLD2::NUM_LANGUAGES); | 142 cld_language, CLD2::NUM_LANGUAGES); |
| 142 if (is_valid_language) | 143 if (is_valid_language) |
| 143 UMA_HISTOGRAM_PERCENTAGE("Translate.CLD2.LanguageAccuracy", percent3[0]); | 144 UMA_HISTOGRAM_PERCENTAGE("Translate.CLD2.LanguageAccuracy", percent3[0]); |
| 144 | 145 |
| 145 | 146 |
| 146 #else | 147 #else |
| 147 # error "CLD_VERSION must be 1 or 2" | 148 # error "CLD_VERSION must be 1 or 2" |
| 148 #endif | 149 #endif |
| 149 | 150 |
| (...skipping 242 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 392 // distinguish from English, and the language is one of well-known languages | 393 // distinguish from English, and the language is one of well-known languages |
| 393 // which often provide "en-*" meta information mistakenly. | 394 // which often provide "en-*" meta information mistakenly. |
| 394 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { | 395 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { |
| 395 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) | 396 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) |
| 396 return true; | 397 return true; |
| 397 } | 398 } |
| 398 return false; | 399 return false; |
| 399 } | 400 } |
| 400 | 401 |
| 401 } // namespace translate | 402 } // namespace translate |
| OLD | NEW |