OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/translate/translate_helper.h" | 5 #include "chrome/renderer/translate/translate_helper.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
(...skipping 74 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
85 // meta tag for "content-language". This may or may not also | 85 // meta tag for "content-language". This may or may not also |
86 // have a value derived from the actual Content-Language HTTP | 86 // have a value derived from the actual Content-Language HTTP |
87 // header. The two actually have different meanings (despite the | 87 // header. The two actually have different meanings (despite the |
88 // original intent of http-equiv to be an equivalent) with the former | 88 // original intent of http-equiv to be an equivalent) with the former |
89 // being the language of the document and the latter being the | 89 // being the language of the document and the latter being the |
90 // language of the intended audience (a distinction really only | 90 // language of the intended audience (a distinction really only |
91 // relevant for things like langauge textbooks). This distinction | 91 // relevant for things like langauge textbooks). This distinction |
92 // shouldn't affect translation. | 92 // shouldn't affect translation. |
93 WebDocument document = GetMainFrame()->document(); | 93 WebDocument document = GetMainFrame()->document(); |
94 std::string content_language = document.contentLanguage().utf8(); | 94 std::string content_language = document.contentLanguage().utf8(); |
95 std::string language = DeterminePageLanguage(content_language, contents); | 95 std::string cld_language; |
| 96 bool is_cld_reliable; |
| 97 std::string language = DeterminePageLanguage( |
| 98 content_language, contents, &cld_language, &is_cld_reliable); |
| 99 |
96 if (language.empty()) | 100 if (language.empty()) |
97 return; | 101 return; |
98 | 102 |
| 103 GURL url(document.url()); |
| 104 LanguageDetectionDetails details; |
| 105 details.time = base::Time::Now(); |
| 106 details.url = url; |
| 107 details.content_language = content_language; |
| 108 details.cld_language = cld_language; |
| 109 details.is_cld_reliable = is_cld_reliable; |
| 110 details.adopted_language = language; |
| 111 |
99 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 112 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( |
100 routing_id(), language, IsPageTranslatable(&document))); | 113 routing_id(), details, IsPageTranslatable(&document))); |
101 } | 114 } |
102 | 115 |
103 void TranslateHelper::CancelPendingTranslation() { | 116 void TranslateHelper::CancelPendingTranslation() { |
104 weak_method_factory_.InvalidateWeakPtrs(); | 117 weak_method_factory_.InvalidateWeakPtrs(); |
105 translation_pending_ = false; | 118 translation_pending_ = false; |
106 page_id_ = -1; | 119 page_id_ = -1; |
107 source_lang_.clear(); | 120 source_lang_.clear(); |
108 target_lang_.clear(); | 121 target_lang_.clear(); |
109 } | 122 } |
110 | 123 |
111 #if defined(ENABLE_LANGUAGE_DETECTION) | 124 #if defined(ENABLE_LANGUAGE_DETECTION) |
112 // static | 125 // static |
113 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { | 126 std::string TranslateHelper::DetermineTextLanguage(const string16& text, |
| 127 bool* is_cld_reliable) { |
114 std::string language = chrome::kUnknownLanguageCode; | 128 std::string language = chrome::kUnknownLanguageCode; |
115 int num_languages = 0; | 129 int num_languages = 0; |
116 int text_bytes = 0; | 130 int text_bytes = 0; |
117 bool is_reliable = false; | 131 bool is_reliable = false; |
118 Language cld_language = | 132 Language cld_language = |
119 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 133 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
120 &num_languages, NULL, &text_bytes); | 134 &num_languages, NULL, &text_bytes); |
| 135 if (is_cld_reliable != NULL) |
| 136 *is_cld_reliable = is_reliable; |
| 137 |
121 // We don't trust the result if the CLD reports that the detection is not | 138 // We don't trust the result if the CLD reports that the detection is not |
122 // reliable, or if the actual text used to detect the language was less than | 139 // reliable, or if the actual text used to detect the language was less than |
123 // 100 bytes (short texts can often lead to wrong results). | 140 // 100 bytes (short texts can often lead to wrong results). |
124 // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that | 141 // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that |
125 // the determined language code is correct with 50% confidence. Chrome should | 142 // the determined language code is correct with 50% confidence. Chrome should |
126 // handle the real confidence value to judge. | 143 // handle the real confidence value to judge. |
127 if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && | 144 if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && |
128 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { | 145 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { |
129 // We should not use LanguageCode_ISO_639_1 because it does not cover all | 146 // We should not use LanguageCode_ISO_639_1 because it does not cover all |
130 // the languages CLD can detect. As a result, it'll return the invalid | 147 // the languages CLD can detect. As a result, it'll return the invalid |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
289 size_t dash_index = code->find('-'); | 306 size_t dash_index = code->find('-'); |
290 if (!(dash_index == 2 && code->size() == 5) && | 307 if (!(dash_index == 2 && code->size() == 5) && |
291 !(dash_index == std::string::npos && code->size() == 2)) { | 308 !(dash_index == std::string::npos && code->size() == 2)) { |
292 // Reset |language| to ignore the invalid code. | 309 // Reset |language| to ignore the invalid code. |
293 *code = std::string(); | 310 *code = std::string(); |
294 } | 311 } |
295 } | 312 } |
296 | 313 |
297 // static | 314 // static |
298 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, | 315 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, |
299 const string16& contents) { | 316 const string16& contents, |
| 317 std::string* cld_language_p, |
| 318 bool* is_cld_reliable_p) { |
300 #if defined(ENABLE_LANGUAGE_DETECTION) | 319 #if defined(ENABLE_LANGUAGE_DETECTION) |
301 base::TimeTicks begin_time = base::TimeTicks::Now(); | 320 base::TimeTicks begin_time = base::TimeTicks::Now(); |
302 std::string cld_language = DetermineTextLanguage(contents); | 321 bool is_cld_reliable; |
| 322 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); |
| 323 |
| 324 if (cld_language_p != NULL) |
| 325 *cld_language_p = cld_language; |
| 326 if (is_cld_reliable_p != NULL) |
| 327 *is_cld_reliable_p = is_cld_reliable; |
| 328 |
303 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", | 329 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", |
304 base::TimeTicks::Now() - begin_time); | 330 base::TimeTicks::Now() - begin_time); |
305 ConvertLanguageCodeSynonym(&cld_language); | 331 ConvertLanguageCodeSynonym(&cld_language); |
306 VLOG(9) << "CLD determined language code: " << cld_language; | 332 VLOG(9) << "CLD determined language code: " << cld_language; |
307 #endif // defined(ENABLE_LANGUAGE_DETECTION) | 333 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
308 | 334 |
309 // Correct well-known format errors. | 335 // Correct well-known format errors. |
310 std::string language = code; | 336 std::string language = code; |
311 CorrectLanguageCodeTypo(&language); | 337 CorrectLanguageCodeTypo(&language); |
312 | 338 |
(...skipping 219 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
532 WebView* web_view = render_view()->GetWebView(); | 558 WebView* web_view = render_view()->GetWebView(); |
533 if (!web_view) { | 559 if (!web_view) { |
534 // When the WebView is going away, the render view should have called | 560 // When the WebView is going away, the render view should have called |
535 // CancelPendingTranslation() which should have stopped any pending work, so | 561 // CancelPendingTranslation() which should have stopped any pending work, so |
536 // that case should not happen. | 562 // that case should not happen. |
537 NOTREACHED(); | 563 NOTREACHED(); |
538 return NULL; | 564 return NULL; |
539 } | 565 } |
540 return web_view->mainFrame(); | 566 return web_view->mainFrame(); |
541 } | 567 } |
OLD | NEW |