Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(502)

Side by Side Diff: chrome/renderer/translate/translate_helper.cc

Issue 15728002: Translate: adopt html lang attribute if valid value is provided (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years, 7 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/translate/translate_helper.h" 5 #include "chrome/renderer/translate/translate_helper.h"
6 6
7 #include "base/bind.h" 7 #include "base/bind.h"
8 #include "base/compiler_specific.h" 8 #include "base/compiler_specific.h"
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/message_loop.h" 10 #include "base/message_loop.h"
(...skipping 77 matching lines...) Expand 10 before | Expand all | Expand 10 after
88 // meta tag for "content-language". This may or may not also 88 // meta tag for "content-language". This may or may not also
89 // have a value derived from the actual Content-Language HTTP 89 // have a value derived from the actual Content-Language HTTP
90 // header. The two actually have different meanings (despite the 90 // header. The two actually have different meanings (despite the
91 // original intent of http-equiv to be an equivalent) with the former 91 // original intent of http-equiv to be an equivalent) with the former
92 // being the language of the document and the latter being the 92 // being the language of the document and the latter being the
93 // language of the intended audience (a distinction really only 93 // language of the intended audience (a distinction really only
94 // relevant for things like langauge textbooks). This distinction 94 // relevant for things like langauge textbooks). This distinction
95 // shouldn't affect translation. 95 // shouldn't affect translation.
96 WebDocument document = GetMainFrame()->document(); 96 WebDocument document = GetMainFrame()->document();
97 std::string content_language = document.contentLanguage().utf8(); 97 std::string content_language = document.contentLanguage().utf8();
98 std::string html_lang =
99 document.documentElement().getAttribute("lang").utf8();
98 std::string cld_language; 100 std::string cld_language;
99 bool is_cld_reliable; 101 bool is_cld_reliable;
100 std::string language = DeterminePageLanguage( 102 std::string language = DeterminePageLanguage(
101 content_language, contents, &cld_language, &is_cld_reliable); 103 content_language, html_lang, contents, &cld_language, &is_cld_reliable);
102 104
103 if (language.empty()) 105 if (language.empty())
104 return; 106 return;
105 107
106 language_determined_time_ = base::TimeTicks::Now(); 108 language_determined_time_ = base::TimeTicks::Now();
107 109
110 // TODO(toyoshim): Add |html_lang| to LanguageDetectionDetails.
108 GURL url(document.url()); 111 GURL url(document.url());
109 LanguageDetectionDetails details; 112 LanguageDetectionDetails details;
110 details.time = base::Time::Now(); 113 details.time = base::Time::Now();
111 details.url = url; 114 details.url = url;
112 details.content_language = content_language; 115 details.content_language = content_language;
113 details.cld_language = cld_language; 116 details.cld_language = cld_language;
114 details.is_cld_reliable = is_cld_reliable; 117 details.is_cld_reliable = is_cld_reliable;
115 details.adopted_language = language; 118 details.adopted_language = language;
116 119
117 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( 120 Send(new ChromeViewHostMsg_TranslateLanguageDetermined(
(...skipping 194 matching lines...) Expand 10 before | Expand all | Expand 10 after
312 // Roughly check if the language code follows [a-z][a-z](-[A-Z][A-Z]). 315 // Roughly check if the language code follows [a-z][a-z](-[A-Z][A-Z]).
313 size_t dash_index = code->find('-'); 316 size_t dash_index = code->find('-');
314 if (!(dash_index == 2 && code->size() == 5) && 317 if (!(dash_index == 2 && code->size() == 5) &&
315 !(dash_index == std::string::npos && code->size() == 2)) { 318 !(dash_index == std::string::npos && code->size() == 2)) {
316 // Reset |language| to ignore the invalid code. 319 // Reset |language| to ignore the invalid code.
317 *code = std::string(); 320 *code = std::string();
318 } 321 }
319 } 322 }
320 323
321 // static 324 // static
325 void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) {
326 // Correct well-known format errors.
327 CorrectLanguageCodeTypo(code);
328
329 // Convert language code synonym firstly because sometime synonym code is in
330 // invalid format, e.g. 'fil'. After validation, such a 3 characters language
331 // gets converted to an empty string.
332 ConvertLanguageCodeSynonym(code);
333 ResetInvalidLanguageCode(code);
334 }
335
336 // static
322 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, 337 std::string TranslateHelper::DeterminePageLanguage(const std::string& code,
338 const std::string& html_lang,
323 const string16& contents, 339 const string16& contents,
324 std::string* cld_language_p, 340 std::string* cld_language_p,
325 bool* is_cld_reliable_p) { 341 bool* is_cld_reliable_p) {
326 #if defined(ENABLE_LANGUAGE_DETECTION) 342 #if defined(ENABLE_LANGUAGE_DETECTION)
327 base::TimeTicks begin_time = base::TimeTicks::Now(); 343 base::TimeTicks begin_time = base::TimeTicks::Now();
328 bool is_cld_reliable; 344 bool is_cld_reliable;
329 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); 345 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable);
330 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, 346 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time,
331 base::TimeTicks::Now()); 347 base::TimeTicks::Now());
332 348
333 if (cld_language_p != NULL) 349 if (cld_language_p != NULL)
334 *cld_language_p = cld_language; 350 *cld_language_p = cld_language;
335 if (is_cld_reliable_p != NULL) 351 if (is_cld_reliable_p != NULL)
336 *is_cld_reliable_p = is_cld_reliable; 352 *is_cld_reliable_p = is_cld_reliable;
337 ConvertLanguageCodeSynonym(&cld_language); 353 ConvertLanguageCodeSynonym(&cld_language);
338 VLOG(9) << "CLD determined language code: " << cld_language; 354 VLOG(9) << "CLD determined language code: " << cld_language;
339 #endif // defined(ENABLE_LANGUAGE_DETECTION) 355 #endif // defined(ENABLE_LANGUAGE_DETECTION)
340 356
341 // Correct well-known format errors. 357 // Check if html lang attribute is valid.
342 std::string language = code; 358 std::string modified_html_lang;
343 CorrectLanguageCodeTypo(&language); 359 if (!html_lang.empty()) {
360 modified_html_lang = html_lang;
361 ApplyLanguageCodeCorrection(&modified_html_lang);
362 TranslateHelperMetrics::ReportHtmlLang(html_lang, modified_html_lang);
363 VLOG(9) << "html lang based language code: " << modified_html_lang;
364 }
344 365
345 // Convert language code synonym firstly because sometime synonym code is in 366 // Check if Content-Language is valid.
346 // invalid format, e.g. 'fil'. After validation, such a 3 characters language 367 std::string modified_code;
347 // gets converted to an empty string. 368 if (!code.empty()) {
348 ConvertLanguageCodeSynonym(&language); 369 modified_code = code;
349 ResetInvalidLanguageCode(&language); 370 ApplyLanguageCodeCorrection(&modified_code);
350 VLOG(9) << "Content-Language based language code: " << language; 371 TranslateHelperMetrics::ReportContentLanguage(code, modified_code);
372 VLOG(9) << "Content-Language based language code: " << modified_code;
373 }
351 374
352 TranslateHelperMetrics::ReportContentLanguage(code, language); 375 // Adopt |modified_html_lang| if it is valid. Otherwise, adopt
376 // |modified_code|.
377 std::string language = modified_html_lang.empty() ? modified_code :
378 modified_html_lang;
353 379
354 #if defined(ENABLE_LANGUAGE_DETECTION) 380 #if defined(ENABLE_LANGUAGE_DETECTION)
355 // If |language| is empty, just use CLD result even though it might be 381 // If |language| is empty, just use CLD result even though it might be
356 // chrome::kUnknownLanguageCode. 382 // chrome::kUnknownLanguageCode.
357 if (language.empty()) { 383 if (language.empty()) {
358 TranslateHelperMetrics::ReportLanguageVerification( 384 TranslateHelperMetrics::ReportLanguageVerification(
359 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); 385 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY);
360 return cld_language; 386 return cld_language;
361 } 387 }
362 388
(...skipping 229 matching lines...) Expand 10 before | Expand all | Expand 10 after
592 WebView* web_view = render_view()->GetWebView(); 618 WebView* web_view = render_view()->GetWebView();
593 if (!web_view) { 619 if (!web_view) {
594 // When the WebView is going away, the render view should have called 620 // When the WebView is going away, the render view should have called
595 // CancelPendingTranslation() which should have stopped any pending work, so 621 // CancelPendingTranslation() which should have stopped any pending work, so
596 // that case should not happen. 622 // that case should not happen.
597 NOTREACHED(); 623 NOTREACHED();
598 return NULL; 624 return NULL;
599 } 625 }
600 return web_view->mainFrame(); 626 return web_view->mainFrame();
601 } 627 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698