OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/translate/translate_helper.h" | 5 #include "chrome/renderer/translate/translate_helper.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
88 // meta tag for "content-language". This may or may not also | 88 // meta tag for "content-language". This may or may not also |
89 // have a value derived from the actual Content-Language HTTP | 89 // have a value derived from the actual Content-Language HTTP |
90 // header. The two actually have different meanings (despite the | 90 // header. The two actually have different meanings (despite the |
91 // original intent of http-equiv to be an equivalent) with the former | 91 // original intent of http-equiv to be an equivalent) with the former |
92 // being the language of the document and the latter being the | 92 // being the language of the document and the latter being the |
93 // language of the intended audience (a distinction really only | 93 // language of the intended audience (a distinction really only |
94 // relevant for things like langauge textbooks). This distinction | 94 // relevant for things like langauge textbooks). This distinction |
95 // shouldn't affect translation. | 95 // shouldn't affect translation. |
96 WebDocument document = GetMainFrame()->document(); | 96 WebDocument document = GetMainFrame()->document(); |
97 std::string content_language = document.contentLanguage().utf8(); | 97 std::string content_language = document.contentLanguage().utf8(); |
| 98 std::string html_lang = |
| 99 document.documentElement().getAttribute("lang").utf8(); |
98 std::string cld_language; | 100 std::string cld_language; |
99 bool is_cld_reliable; | 101 bool is_cld_reliable; |
100 std::string language = DeterminePageLanguage( | 102 std::string language = DeterminePageLanguage( |
101 content_language, contents, &cld_language, &is_cld_reliable); | 103 content_language, html_lang, contents, &cld_language, &is_cld_reliable); |
102 | 104 |
103 if (language.empty()) | 105 if (language.empty()) |
104 return; | 106 return; |
105 | 107 |
106 language_determined_time_ = base::TimeTicks::Now(); | 108 language_determined_time_ = base::TimeTicks::Now(); |
107 | 109 |
| 110 // TODO(toyoshim): Add |html_lang| to LanguageDetectionDetails. |
108 GURL url(document.url()); | 111 GURL url(document.url()); |
109 LanguageDetectionDetails details; | 112 LanguageDetectionDetails details; |
110 details.time = base::Time::Now(); | 113 details.time = base::Time::Now(); |
111 details.url = url; | 114 details.url = url; |
112 details.content_language = content_language; | 115 details.content_language = content_language; |
113 details.cld_language = cld_language; | 116 details.cld_language = cld_language; |
114 details.is_cld_reliable = is_cld_reliable; | 117 details.is_cld_reliable = is_cld_reliable; |
115 details.adopted_language = language; | 118 details.adopted_language = language; |
116 | 119 |
117 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 120 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( |
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
312 // Roughly check if the language code follows [a-z][a-z](-[A-Z][A-Z]). | 315 // Roughly check if the language code follows [a-z][a-z](-[A-Z][A-Z]). |
313 size_t dash_index = code->find('-'); | 316 size_t dash_index = code->find('-'); |
314 if (!(dash_index == 2 && code->size() == 5) && | 317 if (!(dash_index == 2 && code->size() == 5) && |
315 !(dash_index == std::string::npos && code->size() == 2)) { | 318 !(dash_index == std::string::npos && code->size() == 2)) { |
316 // Reset |language| to ignore the invalid code. | 319 // Reset |language| to ignore the invalid code. |
317 *code = std::string(); | 320 *code = std::string(); |
318 } | 321 } |
319 } | 322 } |
320 | 323 |
321 // static | 324 // static |
| 325 void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) { |
| 326 // Correct well-known format errors. |
| 327 CorrectLanguageCodeTypo(code); |
| 328 |
| 329 // Convert language code synonym firstly because sometime synonym code is in |
| 330 // invalid format, e.g. 'fil'. After validation, such a 3 characters language |
| 331 // gets converted to an empty string. |
| 332 ConvertLanguageCodeSynonym(code); |
| 333 ResetInvalidLanguageCode(code); |
| 334 } |
| 335 |
| 336 // static |
322 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, | 337 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, |
| 338 const std::string& html_lang, |
323 const string16& contents, | 339 const string16& contents, |
324 std::string* cld_language_p, | 340 std::string* cld_language_p, |
325 bool* is_cld_reliable_p) { | 341 bool* is_cld_reliable_p) { |
326 #if defined(ENABLE_LANGUAGE_DETECTION) | 342 #if defined(ENABLE_LANGUAGE_DETECTION) |
327 base::TimeTicks begin_time = base::TimeTicks::Now(); | 343 base::TimeTicks begin_time = base::TimeTicks::Now(); |
328 bool is_cld_reliable; | 344 bool is_cld_reliable; |
329 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); | 345 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); |
330 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, | 346 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, |
331 base::TimeTicks::Now()); | 347 base::TimeTicks::Now()); |
332 | 348 |
333 if (cld_language_p != NULL) | 349 if (cld_language_p != NULL) |
334 *cld_language_p = cld_language; | 350 *cld_language_p = cld_language; |
335 if (is_cld_reliable_p != NULL) | 351 if (is_cld_reliable_p != NULL) |
336 *is_cld_reliable_p = is_cld_reliable; | 352 *is_cld_reliable_p = is_cld_reliable; |
337 ConvertLanguageCodeSynonym(&cld_language); | 353 ConvertLanguageCodeSynonym(&cld_language); |
338 VLOG(9) << "CLD determined language code: " << cld_language; | 354 VLOG(9) << "CLD determined language code: " << cld_language; |
339 #endif // defined(ENABLE_LANGUAGE_DETECTION) | 355 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
340 | 356 |
341 // Correct well-known format errors. | 357 // Check if html lang attribute is valid. |
342 std::string language = code; | 358 std::string modified_html_lang; |
343 CorrectLanguageCodeTypo(&language); | 359 if (!html_lang.empty()) { |
| 360 modified_html_lang = html_lang; |
| 361 ApplyLanguageCodeCorrection(&modified_html_lang); |
| 362 TranslateHelperMetrics::ReportHtmlLang(html_lang, modified_html_lang); |
| 363 VLOG(9) << "html lang based language code: " << modified_html_lang; |
| 364 } |
344 | 365 |
345 // Convert language code synonym firstly because sometime synonym code is in | 366 // Check if Content-Language is valid. |
346 // invalid format, e.g. 'fil'. After validation, such a 3 characters language | 367 std::string modified_code; |
347 // gets converted to an empty string. | 368 if (!code.empty()) { |
348 ConvertLanguageCodeSynonym(&language); | 369 modified_code = code; |
349 ResetInvalidLanguageCode(&language); | 370 ApplyLanguageCodeCorrection(&modified_code); |
350 VLOG(9) << "Content-Language based language code: " << language; | 371 TranslateHelperMetrics::ReportContentLanguage(code, modified_code); |
| 372 VLOG(9) << "Content-Language based language code: " << modified_code; |
| 373 } |
351 | 374 |
352 TranslateHelperMetrics::ReportContentLanguage(code, language); | 375 // Adopt |modified_html_lang| if it is valid. Otherwise, adopt |
| 376 // |modified_code|. |
| 377 std::string language = modified_html_lang.empty() ? modified_code : |
| 378 modified_html_lang; |
353 | 379 |
354 #if defined(ENABLE_LANGUAGE_DETECTION) | 380 #if defined(ENABLE_LANGUAGE_DETECTION) |
355 // If |language| is empty, just use CLD result even though it might be | 381 // If |language| is empty, just use CLD result even though it might be |
356 // chrome::kUnknownLanguageCode. | 382 // chrome::kUnknownLanguageCode. |
357 if (language.empty()) { | 383 if (language.empty()) { |
358 TranslateHelperMetrics::ReportLanguageVerification( | 384 TranslateHelperMetrics::ReportLanguageVerification( |
359 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); | 385 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); |
360 return cld_language; | 386 return cld_language; |
361 } | 387 } |
362 | 388 |
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
592 WebView* web_view = render_view()->GetWebView(); | 618 WebView* web_view = render_view()->GetWebView(); |
593 if (!web_view) { | 619 if (!web_view) { |
594 // When the WebView is going away, the render view should have called | 620 // When the WebView is going away, the render view should have called |
595 // CancelPendingTranslation() which should have stopped any pending work, so | 621 // CancelPendingTranslation() which should have stopped any pending work, so |
596 // that case should not happen. | 622 // that case should not happen. |
597 NOTREACHED(); | 623 NOTREACHED(); |
598 return NULL; | 624 return NULL; |
599 } | 625 } |
600 return web_view->mainFrame(); | 626 return web_view->mainFrame(); |
601 } | 627 } |
OLD | NEW |