OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/translate/translate_helper.h" | 5 #include "chrome/renderer/translate/translate_helper.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
(...skipping 104 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
115 // meta tag for "content-language". This may or may not also | 115 // meta tag for "content-language". This may or may not also |
116 // have a value derived from the actual Content-Language HTTP | 116 // have a value derived from the actual Content-Language HTTP |
117 // header. The two actually have different meanings (despite the | 117 // header. The two actually have different meanings (despite the |
118 // original intent of http-equiv to be an equivalent) with the former | 118 // original intent of http-equiv to be an equivalent) with the former |
119 // being the language of the document and the latter being the | 119 // being the language of the document and the latter being the |
120 // language of the intended audience (a distinction really only | 120 // language of the intended audience (a distinction really only |
121 // relevant for things like langauge textbooks). This distinction | 121 // relevant for things like langauge textbooks). This distinction |
122 // shouldn't affect translation. | 122 // shouldn't affect translation. |
123 WebDocument document = GetMainFrame()->document(); | 123 WebDocument document = GetMainFrame()->document(); |
124 std::string content_language = document.contentLanguage().utf8(); | 124 std::string content_language = document.contentLanguage().utf8(); |
125 std::string language = DeterminePageLanguage(content_language, contents); | 125 std::string cld_language; |
| 126 bool is_cld_reliable; |
| 127 std::string language = DeterminePageLanguage( |
| 128 content_language, contents, &cld_language, &is_cld_reliable); |
| 129 |
126 if (language.empty()) | 130 if (language.empty()) |
127 return; | 131 return; |
128 | 132 |
129 language_determined_time_ = base::TimeTicks::Now(); | 133 language_determined_time_ = base::TimeTicks::Now(); |
130 | 134 |
| 135 GURL url(document.url()); |
| 136 LanguageDetectionDetails details; |
| 137 details.time = base::Time::Now(); |
| 138 details.url = url; |
| 139 details.content_language = content_language; |
| 140 details.cld_language = cld_language; |
| 141 details.is_cld_reliable = is_cld_reliable; |
| 142 details.adopted_language = language; |
| 143 |
131 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 144 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( |
132 routing_id(), language, IsPageTranslatable(&document))); | 145 routing_id(), details, IsPageTranslatable(&document))); |
133 } | 146 } |
134 | 147 |
135 void TranslateHelper::CancelPendingTranslation() { | 148 void TranslateHelper::CancelPendingTranslation() { |
136 weak_method_factory_.InvalidateWeakPtrs(); | 149 weak_method_factory_.InvalidateWeakPtrs(); |
137 translation_pending_ = false; | 150 translation_pending_ = false; |
138 page_id_ = -1; | 151 page_id_ = -1; |
139 source_lang_.clear(); | 152 source_lang_.clear(); |
140 target_lang_.clear(); | 153 target_lang_.clear(); |
141 } | 154 } |
142 | 155 |
143 #if defined(ENABLE_LANGUAGE_DETECTION) | 156 #if defined(ENABLE_LANGUAGE_DETECTION) |
144 // static | 157 // static |
145 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { | 158 std::string TranslateHelper::DetermineTextLanguage(const string16& text, |
| 159 bool* is_cld_reliable) { |
146 std::string language = chrome::kUnknownLanguageCode; | 160 std::string language = chrome::kUnknownLanguageCode; |
147 int num_languages = 0; | 161 int num_languages = 0; |
148 int text_bytes = 0; | 162 int text_bytes = 0; |
149 bool is_reliable = false; | 163 bool is_reliable = false; |
150 Language cld_language = | 164 Language cld_language = |
151 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 165 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
152 &num_languages, NULL, &text_bytes); | 166 &num_languages, NULL, &text_bytes); |
| 167 if (is_cld_reliable != NULL) |
| 168 *is_cld_reliable = is_reliable; |
| 169 |
153 // We don't trust the result if the CLD reports that the detection is not | 170 // We don't trust the result if the CLD reports that the detection is not |
154 // reliable, or if the actual text used to detect the language was less than | 171 // reliable, or if the actual text used to detect the language was less than |
155 // 100 bytes (short texts can often lead to wrong results). | 172 // 100 bytes (short texts can often lead to wrong results). |
156 // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that | 173 // TODO(toyoshim): CLD provides |is_reliable| flag. But, it just says that |
157 // the determined language code is correct with 50% confidence. Chrome should | 174 // the determined language code is correct with 50% confidence. Chrome should |
158 // handle the real confidence value to judge. | 175 // handle the real confidence value to judge. |
159 if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && | 176 if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES && |
160 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { | 177 cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { |
161 // We should not use LanguageCode_ISO_639_1 because it does not cover all | 178 // We should not use LanguageCode_ISO_639_1 because it does not cover all |
162 // the languages CLD can detect. As a result, it'll return the invalid | 179 // the languages CLD can detect. As a result, it'll return the invalid |
(...skipping 158 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
321 size_t dash_index = code->find('-'); | 338 size_t dash_index = code->find('-'); |
322 if (!(dash_index == 2 && code->size() == 5) && | 339 if (!(dash_index == 2 && code->size() == 5) && |
323 !(dash_index == std::string::npos && code->size() == 2)) { | 340 !(dash_index == std::string::npos && code->size() == 2)) { |
324 // Reset |language| to ignore the invalid code. | 341 // Reset |language| to ignore the invalid code. |
325 *code = std::string(); | 342 *code = std::string(); |
326 } | 343 } |
327 } | 344 } |
328 | 345 |
329 // static | 346 // static |
330 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, | 347 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, |
331 const string16& contents) { | 348 const string16& contents, |
| 349 std::string* cld_language_p, |
| 350 bool* is_cld_reliable_p) { |
332 #if defined(ENABLE_LANGUAGE_DETECTION) | 351 #if defined(ENABLE_LANGUAGE_DETECTION) |
333 base::TimeTicks begin_time = base::TimeTicks::Now(); | 352 base::TimeTicks begin_time = base::TimeTicks::Now(); |
334 std::string cld_language = DetermineTextLanguage(contents); | 353 bool is_cld_reliable; |
| 354 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); |
335 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, | 355 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, |
336 base::TimeTicks::Now()); | 356 base::TimeTicks::Now()); |
| 357 |
| 358 if (cld_language_p != NULL) |
| 359 *cld_language_p = cld_language; |
| 360 if (is_cld_reliable_p != NULL) |
| 361 *is_cld_reliable_p = is_cld_reliable; |
337 ConvertLanguageCodeSynonym(&cld_language); | 362 ConvertLanguageCodeSynonym(&cld_language); |
338 VLOG(9) << "CLD determined language code: " << cld_language; | 363 VLOG(9) << "CLD determined language code: " << cld_language; |
339 #endif // defined(ENABLE_LANGUAGE_DETECTION) | 364 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
340 | 365 |
341 // Correct well-known format errors. | 366 // Correct well-known format errors. |
342 std::string language = code; | 367 std::string language = code; |
343 CorrectLanguageCodeTypo(&language); | 368 CorrectLanguageCodeTypo(&language); |
344 | 369 |
345 // Convert language code synonym firstly because sometime synonym code is in | 370 // Convert language code synonym firstly because sometime synonym code is in |
346 // invalid format, e.g. 'fil'. After validation, such a 3 characters language | 371 // invalid format, e.g. 'fil'. After validation, such a 3 characters language |
(...skipping 228 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
575 WebView* web_view = render_view()->GetWebView(); | 600 WebView* web_view = render_view()->GetWebView(); |
576 if (!web_view) { | 601 if (!web_view) { |
577 // When the WebView is going away, the render view should have called | 602 // When the WebView is going away, the render view should have called |
578 // CancelPendingTranslation() which should have stopped any pending work, so | 603 // CancelPendingTranslation() which should have stopped any pending work, so |
579 // that case should not happen. | 604 // that case should not happen. |
580 NOTREACHED(); | 605 NOTREACHED(); |
581 return NULL; | 606 return NULL; |
582 } | 607 } |
583 return web_view->mainFrame(); | 608 return web_view->mainFrame(); |
584 } | 609 } |
OLD | NEW |