Index: chrome/renderer/translate/translate_helper.cc |
diff --git a/chrome/renderer/translate/translate_helper.cc b/chrome/renderer/translate/translate_helper.cc |
index 2cf95db19e1a8af387e6380a5b4ec39e0c021742..3335a22af1a402bce8fe158b7fddf8da520321fb 100644 |
--- a/chrome/renderer/translate/translate_helper.cc |
+++ b/chrome/renderer/translate/translate_helper.cc |
@@ -95,16 +95,19 @@ void TranslateHelper::PageCaptured(const string16& contents) { |
// shouldn't affect translation. |
WebDocument document = GetMainFrame()->document(); |
std::string content_language = document.contentLanguage().utf8(); |
+ std::string html_lang = |
+ document.documentElement().getAttribute("lang").utf8(); |
std::string cld_language; |
bool is_cld_reliable; |
std::string language = DeterminePageLanguage( |
- content_language, contents, &cld_language, &is_cld_reliable); |
+ content_language, html_lang, contents, &cld_language, &is_cld_reliable); |
if (language.empty()) |
return; |
language_determined_time_ = base::TimeTicks::Now(); |
+ // TODO(toyoshim): Add |html_lang| to LanguageDetectionDetails. |
GURL url(document.url()); |
LanguageDetectionDetails details; |
details.time = base::Time::Now(); |
@@ -319,7 +322,20 @@ void TranslateHelper::ResetInvalidLanguageCode(std::string* code) { |
} |
// static |
+void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) { |
+ // Correct well-known format errors. |
+ CorrectLanguageCodeTypo(code); |
+ |
+ // Convert language code synonym firstly because sometime synonym code is in |
+ // invalid format, e.g. 'fil'. After validation, such a 3 characters language |
+ // gets converted to an empty string. |
+ ConvertLanguageCodeSynonym(code); |
+ ResetInvalidLanguageCode(code); |
+} |
+ |
+// static |
std::string TranslateHelper::DeterminePageLanguage(const std::string& code, |
+ const std::string& html_lang, |
const string16& contents, |
std::string* cld_language_p, |
bool* is_cld_reliable_p) { |
@@ -338,18 +354,28 @@ std::string TranslateHelper::DeterminePageLanguage(const std::string& code, |
VLOG(9) << "CLD determined language code: " << cld_language; |
#endif // defined(ENABLE_LANGUAGE_DETECTION) |
- // Correct well-known format errors. |
- std::string language = code; |
- CorrectLanguageCodeTypo(&language); |
+ // Check if html lang attribute is valid. |
+ std::string modified_html_lang; |
+ if (!html_lang.empty()) { |
+ modified_html_lang = html_lang; |
+ ApplyLanguageCodeCorrection(&modified_html_lang); |
+ TranslateHelperMetrics::ReportHtmlLang(html_lang, modified_html_lang); |
+ VLOG(9) << "html lang based language code: " << modified_html_lang; |
+ } |
- // Convert language code synonym firstly because sometime synonym code is in |
- // invalid format, e.g. 'fil'. After validation, such a 3 characters language |
- // gets converted to an empty string. |
- ConvertLanguageCodeSynonym(&language); |
- ResetInvalidLanguageCode(&language); |
- VLOG(9) << "Content-Language based language code: " << language; |
+ // Check if Content-Language is valid. |
+ std::string modified_code; |
+ if (!code.empty()) { |
+ modified_code = code; |
+ ApplyLanguageCodeCorrection(&modified_code); |
+ TranslateHelperMetrics::ReportContentLanguage(code, modified_code); |
+ VLOG(9) << "Content-Language based language code: " << modified_code; |
+ } |
- TranslateHelperMetrics::ReportContentLanguage(code, language); |
+ // Adopt |modified_html_lang| if it is valid. Otherwise, adopt |
+ // |modified_code|. |
+ std::string language = modified_html_lang.empty() ? modified_code : |
+ modified_html_lang; |
#if defined(ENABLE_LANGUAGE_DETECTION) |
// If |language| is empty, just use CLD result even though it might be |