| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/translate/translate_helper.h" | 5 #include "chrome/renderer/translate/translate_helper.h" |
| 6 | 6 |
| 7 #include "base/bind.h" | 7 #include "base/bind.h" |
| 8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
| (...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 115 // meta tag for "content-language". This may or may not also | 115 // meta tag for "content-language". This may or may not also |
| 116 // have a value derived from the actual Content-Language HTTP | 116 // have a value derived from the actual Content-Language HTTP |
| 117 // header. The two actually have different meanings (despite the | 117 // header. The two actually have different meanings (despite the |
| 118 // original intent of http-equiv to be an equivalent) with the former | 118 // original intent of http-equiv to be an equivalent) with the former |
| 119 // being the language of the document and the latter being the | 119 // being the language of the document and the latter being the |
| 120 // language of the intended audience (a distinction really only | 120 // language of the intended audience (a distinction really only |
| 121 // relevant for things like langauge textbooks). This distinction | 121 // relevant for things like langauge textbooks). This distinction |
| 122 // shouldn't affect translation. | 122 // shouldn't affect translation. |
| 123 WebDocument document = GetMainFrame()->document(); | 123 WebDocument document = GetMainFrame()->document(); |
| 124 std::string content_language = document.contentLanguage().utf8(); | 124 std::string content_language = document.contentLanguage().utf8(); |
| 125 std::string language = DeterminePageLanguage(content_language, contents); | 125 std::string cld_language; |
| 126 bool is_cld_reliable; |
| 127 std::string language = DeterminePageLanguage( |
| 128 content_language, contents, &cld_language, &is_cld_reliable); |
| 129 |
| 126 if (language.empty()) | 130 if (language.empty()) |
| 127 return; | 131 return; |
| 128 | 132 |
| 129 language_determined_time_ = base::TimeTicks::Now(); | 133 language_determined_time_ = base::TimeTicks::Now(); |
| 130 | 134 |
| 135 GURL url(document.url()); |
| 136 LanguageDetectionDetails details; |
| 137 details.time = base::Time::Now(); |
| 138 details.url = url; |
| 139 details.content_language = content_language; |
| 140 details.cld_language = cld_language; |
| 141 details.is_cld_reliable = is_cld_reliable; |
| 142 details.adopted_language = language; |
| 143 |
| 131 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 144 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( |
| 132 routing_id(), language, IsPageTranslatable(&document))); | 145 routing_id(), details, IsPageTranslatable(&document))); |
| 133 } | 146 } |
| 134 | 147 |
| 135 void TranslateHelper::CancelPendingTranslation() { | 148 void TranslateHelper::CancelPendingTranslation() { |
| 136 weak_method_factory_.InvalidateWeakPtrs(); | 149 weak_method_factory_.InvalidateWeakPtrs(); |
| 137 translation_pending_ = false; | 150 translation_pending_ = false; |
| 138 page_id_ = -1; | 151 page_id_ = -1; |
| 139 source_lang_.clear(); | 152 source_lang_.clear(); |
| 140 target_lang_.clear(); | 153 target_lang_.clear(); |
| 141 } | 154 } |
| 142 | 155 |
| 143 #if defined(ENABLE_LANGUAGE_DETECTION) | 156 #if defined(ENABLE_LANGUAGE_DETECTION) |
| 144 // static | 157 // static |
| 145 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { | 158 std::string TranslateHelper::DetermineTextLanguage(const string16& text, |
| 159 bool* is_cld_reliable) { |
| 146 std::string language = chrome::kUnknownLanguageCode; | 160 std::string language = chrome::kUnknownLanguageCode; |
| 147 int num_languages = 0; | 161 int num_languages = 0; |
| 148 int text_bytes = 0; | 162 int text_bytes = 0; |
| 149 bool is_reliable = false; | 163 bool is_reliable = false; |
| 150 Language cld_language = | 164 Language cld_language = |
| 151 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 165 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
| 152 &num_languages, NULL, &text_bytes); | 166 &num_languages, NULL, &text_bytes); |
| 167 if (is_cld_reliable != NULL) |
| 168 *is_cld_reliable = is_reliable; |
| 169 |
| 153 // We don't trust the result if the CLD reports that the detection is not | 170 // We don't trust the result if the CLD reports that the detection is not |
| 154 // reliable, or if the actual text used to detect the language was less than | 171 // reliable, or if the actual text used to detect the language was less than |
| 155 // 100 bytes (short texts can often lead to wrong results). | 172 // 100 bytes (short texts can often lead to wrong results). |
| 156 // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that | 173 // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that |
| 157 // the determined language code is correct with 50% confidence. Chrome should | 174 // the determined language code is correct with 50% confidence. Chrome should |
| 158 // handle the real confidence value to judge. | 175 // handle the real confidence value to judge. |
| 159 if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && | 176 if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && |
| 160 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { | 177 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { |
| 161 // We should not use LanguageCode_ISO_639_1 because it does not cover all | 178 // We should not use LanguageCode_ISO_639_1 because it does not cover all |
| 162 // the languages CLD can detect. As a result, it'll return the invalid | 179 // the languages CLD can detect. As a result, it'll return the invalid |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 321 size_t dash_index = code->find('-'); | 338 size_t dash_index = code->find('-'); |
| 322 if (!(dash_index == 2 && code->size() == 5) && | 339 if (!(dash_index == 2 && code->size() == 5) && |
| 323 !(dash_index == std::string::npos && code->size() == 2)) { | 340 !(dash_index == std::string::npos && code->size() == 2)) { |
| 324 // Reset |language| to ignore the invalid code. | 341 // Reset |language| to ignore the invalid code. |
| 325 *code = std::string(); | 342 *code = std::string(); |
| 326 } | 343 } |
| 327 } | 344 } |
| 328 | 345 |
| 329 // static | 346 // static |
| 330 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, | 347 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, |
| 331 const string16& contents) { | 348 const string16& contents, |
| 349 std::string* cld_language_p, |
| 350 bool* is_cld_reliable_p) { |
| 332 #if defined(ENABLE_LANGUAGE_DETECTION) | 351 #if defined(ENABLE_LANGUAGE_DETECTION) |
| 333 base::TimeTicks begin_time = base::TimeTicks::Now(); | 352 base::TimeTicks begin_time = base::TimeTicks::Now(); |
| 334 std::string cld_language = DetermineTextLanguage(contents); | 353 bool is_cld_reliable; |
| 354 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); |
| 335 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, | 355 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, |
| 336 base::TimeTicks::Now()); | 356 base::TimeTicks::Now()); |
| 357 |
| 358 if (cld_language_p != NULL) |
| 359 *cld_language_p = cld_language; |
| 360 if (is_cld_reliable_p != NULL) |
| 361 *is_cld_reliable_p = is_cld_reliable; |
| 337 ConvertLanguageCodeSynonym(&cld_language); | 362 ConvertLanguageCodeSynonym(&cld_language); |
| 338 VLOG(9) << "CLD determined language code: " << cld_language; | 363 VLOG(9) << "CLD determined language code: " << cld_language; |
| 339 #endif // defined(ENABLE_LANGUAGE_DETECTION) | 364 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
| 340 | 365 |
| 341 // Correct well-known format errors. | 366 // Correct well-known format errors. |
| 342 std::string language = code; | 367 std::string language = code; |
| 343 CorrectLanguageCodeTypo(&language); | 368 CorrectLanguageCodeTypo(&language); |
| 344 | 369 |
| 345 // Convert language code synonym firstly because sometime synonym code is in | 370 // Convert language code synonym firstly because sometime synonym code is in |
| 346 // invalid format, e.g. 'fil'. After validation, such a 3 characters language | 371 // invalid format, e.g. 'fil'. After validation, such a 3 characters language |
| (...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 575 WebView* web_view = render_view()->GetWebView(); | 600 WebView* web_view = render_view()->GetWebView(); |
| 576 if (!web_view) { | 601 if (!web_view) { |
| 577 // When the WebView is going away, the render view should have called | 602 // When the WebView is going away, the render view should have called |
| 578 // CancelPendingTranslation() which should have stopped any pending work, so | 603 // CancelPendingTranslation() which should have stopped any pending work, so |
| 579 // that case should not happen. | 604 // that case should not happen. |
| 580 NOTREACHED(); | 605 NOTREACHED(); |
| 581 return NULL; | 606 return NULL; |
| 582 } | 607 } |
| 583 return web_view->mainFrame(); | 608 return web_view->mainFrame(); |
| 584 } | 609 } |
| OLD | NEW |