| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/translate/translate_helper.h" | 5 #include "chrome/renderer/translate/translate_helper.h" |
| 6 | 6 |
| 7 #include "base/bind.h" | 7 #include "base/bind.h" |
| 8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
| (...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 88 // meta tag for "content-language". This may or may not also | 88 // meta tag for "content-language". This may or may not also |
| 89 // have a value derived from the actual Content-Language HTTP | 89 // have a value derived from the actual Content-Language HTTP |
| 90 // header. The two actually have different meanings (despite the | 90 // header. The two actually have different meanings (despite the |
| 91 // original intent of http-equiv to be an equivalent) with the former | 91 // original intent of http-equiv to be an equivalent) with the former |
| 92 // being the language of the document and the latter being the | 92 // being the language of the document and the latter being the |
| 93 // language of the intended audience (a distinction really only | 93 // language of the intended audience (a distinction really only |
| 94 // relevant for things like langauge textbooks). This distinction | 94 // relevant for things like langauge textbooks). This distinction |
| 95 // shouldn't affect translation. | 95 // shouldn't affect translation. |
| 96 WebDocument document = GetMainFrame()->document(); | 96 WebDocument document = GetMainFrame()->document(); |
| 97 std::string content_language = document.contentLanguage().utf8(); | 97 std::string content_language = document.contentLanguage().utf8(); |
| 98 std::string language = DeterminePageLanguage(content_language, contents); | 98 std::string cld_language; |
| 99 bool is_cld_reliable; |
| 100 std::string language = DeterminePageLanguage( |
| 101 content_language, contents, &cld_language, &is_cld_reliable); |
| 102 |
| 103 if (language.empty()) |
| 104 return; |
| 99 | 105 |
| 100 language_determined_time_ = base::TimeTicks::Now(); | 106 language_determined_time_ = base::TimeTicks::Now(); |
| 101 | 107 |
| 108 GURL url(document.url()); |
| 109 LanguageDetectionDetails details; |
| 110 details.time = base::Time::Now(); |
| 111 details.url = url; |
| 112 details.content_language = content_language; |
| 113 details.cld_language = cld_language; |
| 114 details.is_cld_reliable = is_cld_reliable; |
| 115 details.adopted_language = language; |
| 116 |
| 102 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 117 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( |
| 103 routing_id(), | 118 routing_id(), |
| 104 language, | 119 details, |
| 105 IsTranslationAllowed(&document) && !language.empty())); | 120 IsTranslationAllowed(&document) && !language.empty())); |
| 106 } | 121 } |
| 107 | 122 |
| 108 void TranslateHelper::CancelPendingTranslation() { | 123 void TranslateHelper::CancelPendingTranslation() { |
| 109 weak_method_factory_.InvalidateWeakPtrs(); | 124 weak_method_factory_.InvalidateWeakPtrs(); |
| 110 translation_pending_ = false; | 125 translation_pending_ = false; |
| 111 page_id_ = -1; | 126 page_id_ = -1; |
| 112 source_lang_.clear(); | 127 source_lang_.clear(); |
| 113 target_lang_.clear(); | 128 target_lang_.clear(); |
| 114 } | 129 } |
| 115 | 130 |
| 116 #if defined(ENABLE_LANGUAGE_DETECTION) | 131 #if defined(ENABLE_LANGUAGE_DETECTION) |
| 117 // static | 132 // static |
| 118 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { | 133 std::string TranslateHelper::DetermineTextLanguage(const string16& text, |
| 134 bool* is_cld_reliable) { |
| 119 std::string language = chrome::kUnknownLanguageCode; | 135 std::string language = chrome::kUnknownLanguageCode; |
| 120 int num_languages = 0; | 136 int num_languages = 0; |
| 121 int text_bytes = 0; | 137 int text_bytes = 0; |
| 122 bool is_reliable = false; | 138 bool is_reliable = false; |
| 123 Language cld_language = | 139 Language cld_language = |
| 124 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 140 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
| 125 &num_languages, NULL, &text_bytes); | 141 &num_languages, NULL, &text_bytes); |
| 142 if (is_cld_reliable != NULL) |
| 143 *is_cld_reliable = is_reliable; |
| 144 |
| 126 // We don't trust the result if the CLD reports that the detection is not | 145 // We don't trust the result if the CLD reports that the detection is not |
| 127 // reliable, or if the actual text used to detect the language was less than | 146 // reliable, or if the actual text used to detect the language was less than |
| 128 // 100 bytes (short texts can often lead to wrong results). | 147 // 100 bytes (short texts can often lead to wrong results). |
| 129 // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that | 148 // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that |
| 130 // the determined language code is correct with 50% confidence. Chrome should | 149 // the determined language code is correct with 50% confidence. Chrome should |
| 131 // handle the real confidence value to judge. | 150 // handle the real confidence value to judge. |
| 132 if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && | 151 if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && |
| 133 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { | 152 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { |
| 134 // We should not use LanguageCode_ISO_639_1 because it does not cover all | 153 // We should not use LanguageCode_ISO_639_1 because it does not cover all |
| 135 // the languages CLD can detect. As a result, it'll return the invalid | 154 // the languages CLD can detect. As a result, it'll return the invalid |
| (...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 294 size_t dash_index = code->find('-'); | 313 size_t dash_index = code->find('-'); |
| 295 if (!(dash_index == 2 && code->size() == 5) && | 314 if (!(dash_index == 2 && code->size() == 5) && |
| 296 !(dash_index == std::string::npos && code->size() == 2)) { | 315 !(dash_index == std::string::npos && code->size() == 2)) { |
| 297 // Reset |language| to ignore the invalid code. | 316 // Reset |language| to ignore the invalid code. |
| 298 *code = std::string(); | 317 *code = std::string(); |
| 299 } | 318 } |
| 300 } | 319 } |
| 301 | 320 |
| 302 // static | 321 // static |
| 303 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, | 322 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, |
| 304 const string16& contents) { | 323 const string16& contents, |
| 324 std::string* cld_language_p, |
| 325 bool* is_cld_reliable_p) { |
| 305 #if defined(ENABLE_LANGUAGE_DETECTION) | 326 #if defined(ENABLE_LANGUAGE_DETECTION) |
| 306 base::TimeTicks begin_time = base::TimeTicks::Now(); | 327 base::TimeTicks begin_time = base::TimeTicks::Now(); |
| 307 std::string cld_language = DetermineTextLanguage(contents); | 328 bool is_cld_reliable; |
| 329 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); |
| 308 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, | 330 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, |
| 309 base::TimeTicks::Now()); | 331 base::TimeTicks::Now()); |
| 332 |
| 333 if (cld_language_p != NULL) |
| 334 *cld_language_p = cld_language; |
| 335 if (is_cld_reliable_p != NULL) |
| 336 *is_cld_reliable_p = is_cld_reliable; |
| 310 ConvertLanguageCodeSynonym(&cld_language); | 337 ConvertLanguageCodeSynonym(&cld_language); |
| 311 VLOG(9) << "CLD determined language code: " << cld_language; | 338 VLOG(9) << "CLD determined language code: " << cld_language; |
| 312 #endif // defined(ENABLE_LANGUAGE_DETECTION) | 339 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
| 313 | 340 |
| 314 // Correct well-known format errors. | 341 // Correct well-known format errors. |
| 315 std::string language = code; | 342 std::string language = code; |
| 316 CorrectLanguageCodeTypo(&language); | 343 CorrectLanguageCodeTypo(&language); |
| 317 | 344 |
| 318 // Convert language code synonym firstly because sometime synonym code is in | 345 // Convert language code synonym firstly because sometime synonym code is in |
| 319 // invalid format, e.g. 'fil'. After validation, such a 3 characters language | 346 // invalid format, e.g. 'fil'. After validation, such a 3 characters language |
| (...skipping 245 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 565 WebView* web_view = render_view()->GetWebView(); | 592 WebView* web_view = render_view()->GetWebView(); |
| 566 if (!web_view) { | 593 if (!web_view) { |
| 567 // When the WebView is going away, the render view should have called | 594 // When the WebView is going away, the render view should have called |
| 568 // CancelPendingTranslation() which should have stopped any pending work, so | 595 // CancelPendingTranslation() which should have stopped any pending work, so |
| 569 // that case should not happen. | 596 // that case should not happen. |
| 570 NOTREACHED(); | 597 NOTREACHED(); |
| 571 return NULL; | 598 return NULL; |
| 572 } | 599 } |
| 573 return web_view->mainFrame(); | 600 return web_view->mainFrame(); |
| 574 } | 601 } |
| OLD | NEW |