| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/translate/translate_helper.h" | 5 #include "chrome/common/translate/language_detection_util.h" |
| 6 | 6 |
| 7 #include "base/bind.h" | |
| 8 #include "base/compiler_specific.h" | |
| 9 #include "base/logging.h" | 7 #include "base/logging.h" |
| 10 #include "base/message_loop.h" | |
| 11 #include "base/strings/string16.h" | |
| 12 #include "base/strings/string_split.h" | 8 #include "base/strings/string_split.h" |
| 13 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
| 14 #include "base/strings/utf_string_conversions.h" | 10 #include "base/time/time.h" |
| 15 #include "chrome/common/chrome_constants.h" | 11 #include "chrome/common/chrome_constants.h" |
| 16 #include "chrome/common/render_messages.h" | 12 #include "chrome/common/translate/translate_common_metrics.h" |
| 17 #include "chrome/common/translate/translate_util.h" | 13 #include "chrome/common/translate/translate_util.h" |
| 18 #include "chrome/renderer/translate/translate_helper_metrics.h" | |
| 19 #include "content/public/renderer/render_view.h" | |
| 20 #include "third_party/WebKit/public/web/WebDocument.h" | |
| 21 #include "third_party/WebKit/public/web/WebElement.h" | |
| 22 #include "third_party/WebKit/public/web/WebFrame.h" | |
| 23 #include "third_party/WebKit/public/web/WebNode.h" | |
| 24 #include "third_party/WebKit/public/web/WebNodeList.h" | |
| 25 #include "third_party/WebKit/public/web/WebScriptSource.h" | |
| 26 #include "third_party/WebKit/public/web/WebView.h" | |
| 27 #include "v8/include/v8.h" | |
| 28 | 14 |
| 29 #if defined(ENABLE_LANGUAGE_DETECTION) | 15 #if defined(ENABLE_LANGUAGE_DETECTION) |
| 30 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" | 16 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" |
| 31 #endif | 17 #endif |
| 32 | 18 |
| 33 using WebKit::WebDocument; | |
| 34 using WebKit::WebElement; | |
| 35 using WebKit::WebFrame; | |
| 36 using WebKit::WebNode; | |
| 37 using WebKit::WebNodeList; | |
| 38 using WebKit::WebScriptSource; | |
| 39 using WebKit::WebString; | |
| 40 using WebKit::WebView; | |
| 41 | |
| 42 namespace { | 19 namespace { |
| 43 | 20 |
| 44 // The delay in milliseconds that we'll wait before checking to see if the | |
| 45 // translate library injected in the page is ready. | |
| 46 const int kTranslateInitCheckDelayMs = 150; | |
| 47 | |
| 48 // The maximum number of times we'll check to see if the translate library | |
| 49 // injected in the page is ready. | |
| 50 const int kMaxTranslateInitCheckAttempts = 5; | |
| 51 | |
| 52 // The delay we wait in milliseconds before checking whether the translation has | |
| 53 // finished. | |
| 54 const int kTranslateStatusCheckDelayMs = 400; | |
| 55 | |
| 56 // Language name passed to the Translate element for it to detect the language. | |
| 57 const char kAutoDetectionLanguage[] = "auto"; | |
| 58 | |
| 59 // Similar language code list. Some languages are very similar and difficult | 21 // Similar language code list. Some languages are very similar and difficult |
| 60 // for CLD to distinguish. | 22 // for CLD to distinguish. |
| 61 struct SimilarLanguageCode { | 23 struct SimilarLanguageCode { |
| 62 const char* const code; | 24 const char* const code; |
| 63 int group; | 25 int group; |
| 64 }; | 26 }; |
| 65 | 27 |
| 66 const SimilarLanguageCode kSimilarLanguageCodes[] = { | 28 const SimilarLanguageCode kSimilarLanguageCodes[] = { |
| 67 {"bs", 1}, | 29 {"bs", 1}, |
| 68 {"hr", 1}, | 30 {"hr", 1}, |
| (...skipping 12 matching lines...) Expand all Loading... |
| 81 } | 43 } |
| 82 | 44 |
| 83 // Well-known languages which often have wrong server configuration of | 45 // Well-known languages which often have wrong server configuration of |
| 84 // Content-Language: en. | 46 // Content-Language: en. |
| 85 // TODO(toyoshim): Remove these static tables and caller functions to | 47 // TODO(toyoshim): Remove these static tables and caller functions to |
| 86 // chrome/common/translate, and implement them as std::set<>. | 48 // chrome/common/translate, and implement them as std::set<>. |
| 87 const char* kWellKnownCodesOnWrongConfiguration[] = { | 49 const char* kWellKnownCodesOnWrongConfiguration[] = { |
| 88 "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th" | 50 "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th" |
| 89 }; | 51 }; |
| 90 | 52 |
| 91 } // namespace | 53 // Applies a series of language code modification in proper order. |
| 54 void ApplyLanguageCodeCorrection(std::string* code) { |
| 55 // Correct well-known format errors. |
| 56 LanguageDetectionUtil::CorrectLanguageCodeTypo(code); |
| 92 | 57 |
| 93 //////////////////////////////////////////////////////////////////////////////// | 58 if (!LanguageDetectionUtil::IsValidLanguageCode(*code)) { |
| 94 // TranslateHelper, public: | 59 *code = std::string(); |
| 95 // | 60 return; |
| 96 TranslateHelper::TranslateHelper(content::RenderView* render_view) | 61 } |
| 97 : content::RenderViewObserver(render_view), | |
| 98 page_id_(-1), | |
| 99 translation_pending_(false), | |
| 100 weak_method_factory_(this) { | |
| 101 } | |
| 102 | 62 |
| 103 TranslateHelper::~TranslateHelper() { | 63 TranslateUtil::ToTranslateLanguageSynonym(code); |
| 104 CancelPendingTranslation(); | |
| 105 } | |
| 106 | |
| 107 void TranslateHelper::PageCaptured(int page_id, const string16& contents) { | |
| 108 // Get the document language as set by WebKit from the http-equiv | |
| 109 // meta tag for "content-language". This may or may not also | |
| 110 // have a value derived from the actual Content-Language HTTP | |
| 111 // header. The two actually have different meanings (despite the | |
| 112 // original intent of http-equiv to be an equivalent) with the former | |
| 113 // being the language of the document and the latter being the | |
| 114 // language of the intended audience (a distinction really only | |
| 115 // relevant for things like langauge textbooks). This distinction | |
| 116 // shouldn't affect translation. | |
| 117 WebFrame* main_frame = GetMainFrame(); | |
| 118 if (!main_frame || render_view()->GetPageId() != page_id) | |
| 119 return; | |
| 120 page_id_ = page_id; | |
| 121 WebDocument document = main_frame->document(); | |
| 122 std::string content_language = document.contentLanguage().utf8(); | |
| 123 WebElement html_element = document.documentElement(); | |
| 124 std::string html_lang; | |
| 125 // |html_element| can be null element, e.g. in | |
| 126 // BrowserTest.WindowOpenClose. | |
| 127 if (!html_element.isNull()) | |
| 128 html_lang = html_element.getAttribute("lang").utf8(); | |
| 129 std::string cld_language; | |
| 130 bool is_cld_reliable; | |
| 131 std::string language = DeterminePageLanguage( | |
| 132 content_language, html_lang, contents, &cld_language, &is_cld_reliable); | |
| 133 | |
| 134 if (language.empty()) | |
| 135 return; | |
| 136 | |
| 137 language_determined_time_ = base::TimeTicks::Now(); | |
| 138 | |
| 139 GURL url(document.url()); | |
| 140 LanguageDetectionDetails details; | |
| 141 details.time = base::Time::Now(); | |
| 142 details.url = url; | |
| 143 details.content_language = content_language; | |
| 144 details.cld_language = cld_language; | |
| 145 details.is_cld_reliable = is_cld_reliable; | |
| 146 details.html_root_language = html_lang; | |
| 147 details.adopted_language = language; | |
| 148 | |
| 149 // TODO(hajimehoshi): If this affects performance, it should be set only if | |
| 150 // translate-internals tab exists. | |
| 151 details.contents = contents; | |
| 152 | |
| 153 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | |
| 154 routing_id(), | |
| 155 details, | |
| 156 IsTranslationAllowed(&document) && !language.empty())); | |
| 157 } | |
| 158 | |
| 159 void TranslateHelper::CancelPendingTranslation() { | |
| 160 weak_method_factory_.InvalidateWeakPtrs(); | |
| 161 translation_pending_ = false; | |
| 162 source_lang_.clear(); | |
| 163 target_lang_.clear(); | |
| 164 } | 64 } |
| 165 | 65 |
| 166 #if defined(ENABLE_LANGUAGE_DETECTION) | 66 #if defined(ENABLE_LANGUAGE_DETECTION) |
| 167 // static | 67 // Returns the ISO 639 language code of the specified |text|, or 'unknown' if it |
| 168 std::string TranslateHelper::DetermineTextLanguage(const string16& text, | 68 // failed. |
| 169 bool* is_cld_reliable) { | 69 // |is_cld_reliable| will be set as true if CLD says the detection is reliable. |
| 70 std::string DetermineTextLanguage(const base::string16& text, |
| 71 bool* is_cld_reliable) { |
| 170 std::string language = chrome::kUnknownLanguageCode; | 72 std::string language = chrome::kUnknownLanguageCode; |
| 171 int num_languages = 0; | 73 int num_languages = 0; |
| 172 int text_bytes = 0; | 74 int text_bytes = 0; |
| 173 bool is_reliable = false; | 75 bool is_reliable = false; |
| 174 Language cld_language = | 76 Language cld_language = |
| 175 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 77 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
| 176 &num_languages, NULL, &text_bytes); | 78 &num_languages, NULL, &text_bytes); |
| 177 if (is_cld_reliable != NULL) | 79 if (is_cld_reliable != NULL) |
| 178 *is_cld_reliable = is_reliable; | 80 *is_cld_reliable = is_reliable; |
| 179 | 81 |
| (...skipping 12 matching lines...) Expand all Loading... |
| 192 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN | 94 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN |
| 193 // for Simplified Chinese. | 95 // for Simplified Chinese. |
| 194 language = LanguageCodeWithDialects(cld_language); | 96 language = LanguageCodeWithDialects(cld_language); |
| 195 } | 97 } |
| 196 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text | 98 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text |
| 197 << "\n*************************************\n"; | 99 << "\n*************************************\n"; |
| 198 return language; | 100 return language; |
| 199 } | 101 } |
| 200 #endif // defined(ENABLE_LANGUAGE_DETECTION) | 102 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
| 201 | 103 |
| 202 //////////////////////////////////////////////////////////////////////////////// | 104 // Checks if CLD can complement a sub code when the page language doesn't know |
| 203 // TranslateHelper, protected: | 105 // the sub code. |
| 204 // | 106 bool CanCLDComplementSubCode( |
| 205 bool TranslateHelper::IsTranslateLibAvailable() { | 107 const std::string& page_language, const std::string& cld_language) { |
| 206 return ExecuteScriptAndGetBoolResult( | 108 // Translate server cannot treat general Chinese. If Content-Language and |
| 207 "typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && " | 109 // CLD agree that the language is Chinese and Content-Language doesn't know |
| 208 "typeof cr.googleTranslate.translate == 'function'", false); | 110 // which dialect is used, CLD language has priority. |
| 111 // TODO(hajimehoshi): How about the other dialects like zh-MO? |
| 112 return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false); |
| 209 } | 113 } |
| 210 | 114 |
| 211 bool TranslateHelper::IsTranslateLibReady() { | 115 } // namespace |
| 212 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false); | 116 |
| 117 namespace LanguageDetectionUtil { |
| 118 |
| 119 std::string DeterminePageLanguage(const std::string& code, |
| 120 const std::string& html_lang, |
| 121 const base::string16& contents, |
| 122 std::string* cld_language_p, |
| 123 bool* is_cld_reliable_p) { |
| 124 #if defined(ENABLE_LANGUAGE_DETECTION) |
| 125 base::TimeTicks begin_time = base::TimeTicks::Now(); |
| 126 bool is_cld_reliable; |
| 127 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); |
| 128 TranslateCommonMetrics::ReportLanguageDetectionTime(begin_time, |
| 129 base::TimeTicks::Now()); |
| 130 |
| 131 if (cld_language_p != NULL) |
| 132 *cld_language_p = cld_language; |
| 133 if (is_cld_reliable_p != NULL) |
| 134 *is_cld_reliable_p = is_cld_reliable; |
| 135 TranslateUtil::ToTranslateLanguageSynonym(&cld_language); |
| 136 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
| 137 |
| 138 // Check if html lang attribute is valid. |
| 139 std::string modified_html_lang; |
| 140 if (!html_lang.empty()) { |
| 141 modified_html_lang = html_lang; |
| 142 ApplyLanguageCodeCorrection(&modified_html_lang); |
| 143 TranslateCommonMetrics::ReportHtmlLang(html_lang, modified_html_lang); |
| 144 VLOG(9) << "html lang based language code: " << modified_html_lang; |
| 145 } |
| 146 |
| 147 // Check if Content-Language is valid. |
| 148 std::string modified_code; |
| 149 if (!code.empty()) { |
| 150 modified_code = code; |
| 151 ApplyLanguageCodeCorrection(&modified_code); |
| 152 TranslateCommonMetrics::ReportContentLanguage(code, modified_code); |
| 153 } |
| 154 |
| 155 // Adopt |modified_html_lang| if it is valid. Otherwise, adopt |
| 156 // |modified_code|. |
| 157 std::string language = modified_html_lang.empty() ? modified_code : |
| 158 modified_html_lang; |
| 159 |
| 160 #if defined(ENABLE_LANGUAGE_DETECTION) |
| 161 // If |language| is empty, just use CLD result even though it might be |
| 162 // chrome::kUnknownLanguageCode. |
| 163 if (language.empty()) { |
| 164 TranslateCommonMetrics::ReportLanguageVerification( |
| 165 TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); |
| 166 return cld_language; |
| 167 } |
| 168 |
| 169 if (cld_language == chrome::kUnknownLanguageCode) { |
| 170 TranslateCommonMetrics::ReportLanguageVerification( |
| 171 TranslateCommonMetrics::LANGUAGE_VERIFICATION_UNKNOWN); |
| 172 return language; |
| 173 } else if (IsSameOrSimilarLanguages(language, cld_language)) { |
| 174 TranslateCommonMetrics::ReportLanguageVerification( |
| 175 TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_AGREE); |
| 176 return language; |
| 177 } else if (MaybeServerWrongConfiguration(language, cld_language)) { |
| 178 TranslateCommonMetrics::ReportLanguageVerification( |
| 179 TranslateCommonMetrics::LANGUAGE_VERIFICATION_TRUST_CLD); |
| 180 return cld_language; |
| 181 } else if (CanCLDComplementSubCode(language, cld_language)) { |
| 182 TranslateCommonMetrics::ReportLanguageVerification( |
| 183 TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE); |
| 184 return cld_language; |
| 185 } else { |
| 186 TranslateCommonMetrics::ReportLanguageVerification( |
| 187 TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE); |
| 188 // Content-Language value might be wrong because CLD says that this page |
| 189 // is written in another language with confidence. |
| 190 // In this case, Chrome doesn't rely on any of the language codes, and |
| 191 // gives up suggesting a translation. |
| 192 return std::string(chrome::kUnknownLanguageCode); |
| 193 } |
| 194 #else // defined(ENABLE_LANGUAGE_DETECTION) |
| 195 TranslateCommonMetrics::ReportLanguageVerification( |
| 196 TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED); |
| 197 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
| 198 |
| 199 return language; |
| 213 } | 200 } |
| 214 | 201 |
| 215 bool TranslateHelper::HasTranslationFinished() { | 202 void CorrectLanguageCodeTypo(std::string* code) { |
| 216 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true); | |
| 217 } | |
| 218 | |
| 219 bool TranslateHelper::HasTranslationFailed() { | |
| 220 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true); | |
| 221 } | |
| 222 | |
| 223 bool TranslateHelper::StartTranslation() { | |
| 224 std::string script = "cr.googleTranslate.translate('" + | |
| 225 source_lang_ + | |
| 226 "','" + | |
| 227 target_lang_ + | |
| 228 "')"; | |
| 229 return ExecuteScriptAndGetBoolResult(script, false); | |
| 230 } | |
| 231 | |
| 232 std::string TranslateHelper::GetOriginalPageLanguage() { | |
| 233 return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang"); | |
| 234 } | |
| 235 | |
| 236 base::TimeDelta TranslateHelper::AdjustDelay(int delayInMs) { | |
| 237 // Just converts |delayInMs| without any modification in practical cases. | |
| 238 // Tests will override this function to return modified value. | |
| 239 return base::TimeDelta::FromMilliseconds(delayInMs); | |
| 240 } | |
| 241 | |
| 242 void TranslateHelper::ExecuteScript(const std::string& script) { | |
| 243 WebFrame* main_frame = GetMainFrame(); | |
| 244 if (main_frame) | |
| 245 main_frame->executeScript(WebScriptSource(ASCIIToUTF16(script))); | |
| 246 } | |
| 247 | |
| 248 bool TranslateHelper::ExecuteScriptAndGetBoolResult(const std::string& script, | |
| 249 bool fallback) { | |
| 250 WebFrame* main_frame = GetMainFrame(); | |
| 251 if (!main_frame) | |
| 252 return fallback; | |
| 253 | |
| 254 v8::HandleScope handle_scope; | |
| 255 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue( | |
| 256 WebScriptSource(ASCIIToUTF16(script))); | |
| 257 if (v.IsEmpty() || !v->IsBoolean()) { | |
| 258 NOTREACHED(); | |
| 259 return fallback; | |
| 260 } | |
| 261 | |
| 262 return v->BooleanValue(); | |
| 263 } | |
| 264 | |
| 265 std::string TranslateHelper::ExecuteScriptAndGetStringResult( | |
| 266 const std::string& script) { | |
| 267 WebFrame* main_frame = GetMainFrame(); | |
| 268 if (!main_frame) | |
| 269 return std::string(); | |
| 270 | |
| 271 v8::HandleScope handle_scope; | |
| 272 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue( | |
| 273 WebScriptSource(ASCIIToUTF16(script))); | |
| 274 if (v.IsEmpty() || !v->IsString()) { | |
| 275 NOTREACHED(); | |
| 276 return std::string(); | |
| 277 } | |
| 278 | |
| 279 v8::Local<v8::String> v8_str = v->ToString(); | |
| 280 int length = v8_str->Utf8Length() + 1; | |
| 281 scoped_ptr<char[]> str(new char[length]); | |
| 282 v8_str->WriteUtf8(str.get(), length); | |
| 283 return std::string(str.get()); | |
| 284 } | |
| 285 | |
| 286 double TranslateHelper::ExecuteScriptAndGetDoubleResult( | |
| 287 const std::string& script) { | |
| 288 WebFrame* main_frame = GetMainFrame(); | |
| 289 if (!main_frame) | |
| 290 return 0.0; | |
| 291 | |
| 292 v8::HandleScope handle_scope; | |
| 293 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue( | |
| 294 WebScriptSource(ASCIIToUTF16(script))); | |
| 295 if (v.IsEmpty() || !v->IsNumber()) { | |
| 296 NOTREACHED(); | |
| 297 return 0.0; | |
| 298 } | |
| 299 | |
| 300 return v->NumberValue(); | |
| 301 } | |
| 302 | |
| 303 //////////////////////////////////////////////////////////////////////////////// | |
| 304 // TranslateHelper, private: | |
| 305 // | |
| 306 // static | |
| 307 void TranslateHelper::CorrectLanguageCodeTypo(std::string* code) { | |
| 308 DCHECK(code); | 203 DCHECK(code); |
| 309 | 204 |
| 310 size_t coma_index = code->find(','); | 205 size_t coma_index = code->find(','); |
| 311 if (coma_index != std::string::npos) { | 206 if (coma_index != std::string::npos) { |
| 312 // There are more than 1 language specified, just keep the first one. | 207 // There are more than 1 language specified, just keep the first one. |
| 313 *code = code->substr(0, coma_index); | 208 *code = code->substr(0, coma_index); |
| 314 } | 209 } |
| 315 TrimWhitespaceASCII(*code, TRIM_ALL, code); | 210 TrimWhitespaceASCII(*code, TRIM_ALL, code); |
| 316 | 211 |
| 317 // An underscore instead of a dash is a frequent mistake. | 212 // An underscore instead of a dash is a frequent mistake. |
| 318 size_t underscore_index = code->find('_'); | 213 size_t underscore_index = code->find('_'); |
| 319 if (underscore_index != std::string::npos) | 214 if (underscore_index != std::string::npos) |
| 320 (*code)[underscore_index] = '-'; | 215 (*code)[underscore_index] = '-'; |
| 321 | 216 |
| 322 // Change everything up to a dash to lower-case and everything after to upper. | 217 // Change everything up to a dash to lower-case and everything after to upper. |
| 323 size_t dash_index = code->find('-'); | 218 size_t dash_index = code->find('-'); |
| 324 if (dash_index != std::string::npos) { | 219 if (dash_index != std::string::npos) { |
| 325 *code = StringToLowerASCII(code->substr(0, dash_index)) + | 220 *code = StringToLowerASCII(code->substr(0, dash_index)) + |
| 326 StringToUpperASCII(code->substr(dash_index)); | 221 StringToUpperASCII(code->substr(dash_index)); |
| 327 } else { | 222 } else { |
| 328 *code = StringToLowerASCII(*code); | 223 *code = StringToLowerASCII(*code); |
| 329 } | 224 } |
| 330 } | 225 } |
| 331 | 226 |
| 332 // static | 227 bool IsValidLanguageCode(const std::string& code) { |
| 333 bool TranslateHelper::IsValidLanguageCode(const std::string& code) { | |
| 334 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/. | 228 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/. |
| 335 // TODO(hajimehoshi): How about es-419, which is used as an Accept language? | 229 // TODO(hajimehoshi): How about es-419, which is used as an Accept language? |
| 336 std::vector<std::string> chunks; | 230 std::vector<std::string> chunks; |
| 337 base::SplitString(code, '-', &chunks); | 231 base::SplitString(code, '-', &chunks); |
| 338 | 232 |
| 339 if (chunks.size() < 1 || 2 < chunks.size()) | 233 if (chunks.size() < 1 || 2 < chunks.size()) |
| 340 return false; | 234 return false; |
| 341 | 235 |
| 342 const std::string& main_code = chunks[0]; | 236 const std::string& main_code = chunks[0]; |
| 343 | 237 |
| (...skipping 16 matching lines...) Expand all Loading... |
| 360 | 254 |
| 361 for (std::string::const_iterator it = sub_code.begin(); | 255 for (std::string::const_iterator it = sub_code.begin(); |
| 362 it != sub_code.end(); ++it) { | 256 it != sub_code.end(); ++it) { |
| 363 if (!IsAsciiAlpha(*it)) | 257 if (!IsAsciiAlpha(*it)) |
| 364 return false; | 258 return false; |
| 365 } | 259 } |
| 366 | 260 |
| 367 return true; | 261 return true; |
| 368 } | 262 } |
| 369 | 263 |
| 370 // static | 264 bool IsSameOrSimilarLanguages(const std::string& page_language, |
| 371 void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) { | 265 const std::string& cld_language) { |
| 372 // Correct well-known format errors. | |
| 373 CorrectLanguageCodeTypo(code); | |
| 374 | |
| 375 if (!IsValidLanguageCode(*code)) { | |
| 376 *code = std::string(); | |
| 377 return; | |
| 378 } | |
| 379 | |
| 380 TranslateUtil::ToTranslateLanguageSynonym(code); | |
| 381 } | |
| 382 | |
| 383 // static | |
| 384 bool TranslateHelper::IsSameOrSimilarLanguages( | |
| 385 const std::string& page_language, const std::string& cld_language) { | |
| 386 // Language code part of |page_language| is matched to one of |cld_language|. | 266 // Language code part of |page_language| is matched to one of |cld_language|. |
| 387 // Country code is ignored here. | 267 // Country code is ignored here. |
| 388 if (page_language.size() >= 2 && | 268 if (page_language.size() >= 2 && |
| 389 cld_language.find(page_language.c_str(), 0, 2) == 0) { | 269 cld_language.find(page_language.c_str(), 0, 2) == 0) { |
| 390 // Languages are matched strictly. Reports false to metrics, but returns | 270 // Languages are matched strictly. Reports false to metrics, but returns |
| 391 // true. | 271 // true. |
| 392 TranslateHelperMetrics::ReportSimilarLanguageMatch(false); | 272 TranslateCommonMetrics::ReportSimilarLanguageMatch(false); |
| 393 return true; | 273 return true; |
| 394 } | 274 } |
| 395 | 275 |
| 396 // Check if |page_language| and |cld_language| are in the similar language | 276 // Check if |page_language| and |cld_language| are in the similar language |
| 397 // list and belong to the same language group. | 277 // list and belong to the same language group. |
| 398 int page_code = GetSimilarLanguageGroupCode(page_language); | 278 int page_code = GetSimilarLanguageGroupCode(page_language); |
| 399 bool match = page_code != 0 && | 279 bool match = page_code != 0 && |
| 400 page_code == GetSimilarLanguageGroupCode(cld_language); | 280 page_code == GetSimilarLanguageGroupCode(cld_language); |
| 401 | 281 |
| 402 TranslateHelperMetrics::ReportSimilarLanguageMatch(match); | 282 TranslateCommonMetrics::ReportSimilarLanguageMatch(match); |
| 403 return match; | 283 return match; |
| 404 } | 284 } |
| 405 | 285 |
| 406 // static | 286 bool MaybeServerWrongConfiguration(const std::string& page_language, |
| 407 bool TranslateHelper::MaybeServerWrongConfiguration( | 287 const std::string& cld_language) { |
| 408 const std::string& page_language, const std::string& cld_language) { | |
| 409 // If |page_language| is not "en-*", respect it and just return false here. | 288 // If |page_language| is not "en-*", respect it and just return false here. |
| 410 if (!StartsWithASCII(page_language, "en", false)) | 289 if (!StartsWithASCII(page_language, "en", false)) |
| 411 return false; | 290 return false; |
| 412 | 291 |
| 413 // A server provides a language meta information representing "en-*". But it | 292 // A server provides a language meta information representing "en-*". But it |
| 414 // might be just a default value due to missing user configuration. | 293 // might be just a default value due to missing user configuration. |
| 415 // Let's trust |cld_language| if the determined language is not difficult to | 294 // Let's trust |cld_language| if the determined language is not difficult to |
| 416 // distinguish from English, and the language is one of well-known languages | 295 // distinguish from English, and the language is one of well-known languages |
| 417 // which often provide "en-*" meta information mistakenly. | 296 // which often provide "en-*" meta information mistakenly. |
| 418 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { | 297 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { |
| 419 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) | 298 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) |
| 420 return true; | 299 return true; |
| 421 } | 300 } |
| 422 return false; | 301 return false; |
| 423 } | 302 } |
| 424 | 303 |
| 425 // static | 304 } // namespace LanguageDetectionUtil |
| 426 bool TranslateHelper::CanCLDComplementSubCode( | |
| 427 const std::string& page_language, const std::string& cld_language) { | |
| 428 // Translate server cannot treat general Chinese. If Content-Language and | |
| 429 // CLD agree that the language is Chinese and Content-Language doesn't know | |
| 430 // which dialect is used, CLD language has priority. | |
| 431 // TODO(hajimehoshi): How about the other dialects like zh-MO? | |
| 432 return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false); | |
| 433 } | |
| 434 | |
| 435 // static | |
| 436 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, | |
| 437 const std::string& html_lang, | |
| 438 const string16& contents, | |
| 439 std::string* cld_language_p, | |
| 440 bool* is_cld_reliable_p) { | |
| 441 #if defined(ENABLE_LANGUAGE_DETECTION) | |
| 442 base::TimeTicks begin_time = base::TimeTicks::Now(); | |
| 443 bool is_cld_reliable; | |
| 444 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); | |
| 445 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, | |
| 446 base::TimeTicks::Now()); | |
| 447 | |
| 448 if (cld_language_p != NULL) | |
| 449 *cld_language_p = cld_language; | |
| 450 if (is_cld_reliable_p != NULL) | |
| 451 *is_cld_reliable_p = is_cld_reliable; | |
| 452 TranslateUtil::ToTranslateLanguageSynonym(&cld_language); | |
| 453 #endif // defined(ENABLE_LANGUAGE_DETECTION) | |
| 454 | |
| 455 // Check if html lang attribute is valid. | |
| 456 std::string modified_html_lang; | |
| 457 if (!html_lang.empty()) { | |
| 458 modified_html_lang = html_lang; | |
| 459 ApplyLanguageCodeCorrection(&modified_html_lang); | |
| 460 TranslateHelperMetrics::ReportHtmlLang(html_lang, modified_html_lang); | |
| 461 VLOG(9) << "html lang based language code: " << modified_html_lang; | |
| 462 } | |
| 463 | |
| 464 // Check if Content-Language is valid. | |
| 465 std::string modified_code; | |
| 466 if (!code.empty()) { | |
| 467 modified_code = code; | |
| 468 ApplyLanguageCodeCorrection(&modified_code); | |
| 469 TranslateHelperMetrics::ReportContentLanguage(code, modified_code); | |
| 470 } | |
| 471 | |
| 472 // Adopt |modified_html_lang| if it is valid. Otherwise, adopt | |
| 473 // |modified_code|. | |
| 474 std::string language = modified_html_lang.empty() ? modified_code : | |
| 475 modified_html_lang; | |
| 476 | |
| 477 #if defined(ENABLE_LANGUAGE_DETECTION) | |
| 478 // If |language| is empty, just use CLD result even though it might be | |
| 479 // chrome::kUnknownLanguageCode. | |
| 480 if (language.empty()) { | |
| 481 TranslateHelperMetrics::ReportLanguageVerification( | |
| 482 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY); | |
| 483 return cld_language; | |
| 484 } | |
| 485 | |
| 486 if (cld_language == chrome::kUnknownLanguageCode) { | |
| 487 TranslateHelperMetrics::ReportLanguageVerification( | |
| 488 TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN); | |
| 489 return language; | |
| 490 } else if (IsSameOrSimilarLanguages(language, cld_language)) { | |
| 491 TranslateHelperMetrics::ReportLanguageVerification( | |
| 492 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE); | |
| 493 return language; | |
| 494 } else if (MaybeServerWrongConfiguration(language, cld_language)) { | |
| 495 TranslateHelperMetrics::ReportLanguageVerification( | |
| 496 TranslateHelperMetrics::LANGUAGE_VERIFICATION_TRUST_CLD); | |
| 497 return cld_language; | |
| 498 } else if (CanCLDComplementSubCode(language, cld_language)) { | |
| 499 TranslateHelperMetrics::ReportLanguageVerification( | |
| 500 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE); | |
| 501 return cld_language; | |
| 502 } else { | |
| 503 TranslateHelperMetrics::ReportLanguageVerification( | |
| 504 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE); | |
| 505 // Content-Language value might be wrong because CLD says that this page | |
| 506 // is written in another language with confidence. | |
| 507 // In this case, Chrome doesn't rely on any of the language codes, and | |
| 508 // gives up suggesting a translation. | |
| 509 return std::string(chrome::kUnknownLanguageCode); | |
| 510 } | |
| 511 #else // defined(ENABLE_LANGUAGE_DETECTION) | |
| 512 TranslateHelperMetrics::ReportLanguageVerification( | |
| 513 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED); | |
| 514 #endif // defined(ENABLE_LANGUAGE_DETECTION) | |
| 515 | |
| 516 return language; | |
| 517 } | |
| 518 | |
| 519 // static | |
| 520 bool TranslateHelper::IsTranslationAllowed(WebDocument* document) { | |
| 521 WebElement head = document->head(); | |
| 522 if (head.isNull() || !head.hasChildNodes()) | |
| 523 return true; | |
| 524 | |
| 525 const WebString meta(ASCIIToUTF16("meta")); | |
| 526 const WebString name(ASCIIToUTF16("name")); | |
| 527 const WebString google(ASCIIToUTF16("google")); | |
| 528 const WebString value(ASCIIToUTF16("value")); | |
| 529 const WebString content(ASCIIToUTF16("content")); | |
| 530 | |
| 531 WebNodeList children = head.childNodes(); | |
| 532 for (size_t i = 0; i < children.length(); ++i) { | |
| 533 WebNode node = children.item(i); | |
| 534 if (!node.isElementNode()) | |
| 535 continue; | |
| 536 WebElement element = node.to<WebElement>(); | |
| 537 // Check if a tag is <meta>. | |
| 538 if (!element.hasTagName(meta)) | |
| 539 continue; | |
| 540 // Check if the tag contains name="google". | |
| 541 WebString attribute = element.getAttribute(name); | |
| 542 if (attribute.isNull() || attribute != google) | |
| 543 continue; | |
| 544 // Check if the tag contains value="notranslate", or content="notranslate". | |
| 545 attribute = element.getAttribute(value); | |
| 546 if (attribute.isNull()) | |
| 547 attribute = element.getAttribute(content); | |
| 548 if (attribute.isNull()) | |
| 549 continue; | |
| 550 if (LowerCaseEqualsASCII(attribute, "notranslate")) | |
| 551 return false; | |
| 552 } | |
| 553 return true; | |
| 554 } | |
| 555 | |
| 556 bool TranslateHelper::OnMessageReceived(const IPC::Message& message) { | |
| 557 bool handled = true; | |
| 558 IPC_BEGIN_MESSAGE_MAP(TranslateHelper, message) | |
| 559 IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage, OnTranslatePage) | |
| 560 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation) | |
| 561 IPC_MESSAGE_UNHANDLED(handled = false) | |
| 562 IPC_END_MESSAGE_MAP() | |
| 563 return handled; | |
| 564 } | |
| 565 | |
| 566 void TranslateHelper::OnTranslatePage(int page_id, | |
| 567 const std::string& translate_script, | |
| 568 const std::string& source_lang, | |
| 569 const std::string& target_lang) { | |
| 570 WebFrame* main_frame = GetMainFrame(); | |
| 571 if (!main_frame || | |
| 572 page_id_ != page_id || | |
| 573 render_view()->GetPageId() != page_id) | |
| 574 return; // We navigated away, nothing to do. | |
| 575 | |
| 576 // A similar translation is already under way, nothing to do. | |
| 577 if (translation_pending_ && target_lang_ == target_lang) | |
| 578 return; | |
| 579 | |
| 580 // Any pending translation is now irrelevant. | |
| 581 CancelPendingTranslation(); | |
| 582 | |
| 583 // Set our states. | |
| 584 translation_pending_ = true; | |
| 585 | |
| 586 // If the source language is undetermined, we'll let the translate element | |
| 587 // detect it. | |
| 588 source_lang_ = (source_lang != chrome::kUnknownLanguageCode) ? | |
| 589 source_lang : kAutoDetectionLanguage; | |
| 590 target_lang_ = target_lang; | |
| 591 | |
| 592 TranslateHelperMetrics::ReportUserActionDuration(language_determined_time_, | |
| 593 base::TimeTicks::Now()); | |
| 594 | |
| 595 GURL url(main_frame->document().url()); | |
| 596 TranslateHelperMetrics::ReportPageScheme(url.scheme()); | |
| 597 | |
| 598 if (!IsTranslateLibAvailable()) { | |
| 599 // Evaluate the script to add the translation related method to the global | |
| 600 // context of the page. | |
| 601 ExecuteScript(translate_script); | |
| 602 DCHECK(IsTranslateLibAvailable()); | |
| 603 } | |
| 604 | |
| 605 TranslatePageImpl(0); | |
| 606 } | |
| 607 | |
| 608 void TranslateHelper::OnRevertTranslation(int page_id) { | |
| 609 if (page_id_ != page_id || render_view()->GetPageId() != page_id) | |
| 610 return; // We navigated away, nothing to do. | |
| 611 | |
| 612 if (!IsTranslateLibAvailable()) { | |
| 613 NOTREACHED(); | |
| 614 return; | |
| 615 } | |
| 616 | |
| 617 CancelPendingTranslation(); | |
| 618 | |
| 619 ExecuteScript("cr.googleTranslate.revert()"); | |
| 620 } | |
| 621 | |
| 622 void TranslateHelper::CheckTranslateStatus() { | |
| 623 // If this is not the same page, the translation has been canceled. If the | |
| 624 // view is gone, the page is closing. | |
| 625 if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView()) | |
| 626 return; | |
| 627 | |
| 628 // First check if there was an error. | |
| 629 if (HasTranslationFailed()) { | |
| 630 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR); | |
| 631 return; // There was an error. | |
| 632 } | |
| 633 | |
| 634 if (HasTranslationFinished()) { | |
| 635 std::string actual_source_lang; | |
| 636 // Translation was successfull, if it was auto, retrieve the source | |
| 637 // language the Translate Element detected. | |
| 638 if (source_lang_ == kAutoDetectionLanguage) { | |
| 639 actual_source_lang = GetOriginalPageLanguage(); | |
| 640 if (actual_source_lang.empty()) { | |
| 641 NotifyBrowserTranslationFailed(TranslateErrors::UNKNOWN_LANGUAGE); | |
| 642 return; | |
| 643 } else if (actual_source_lang == target_lang_) { | |
| 644 NotifyBrowserTranslationFailed(TranslateErrors::IDENTICAL_LANGUAGES); | |
| 645 return; | |
| 646 } | |
| 647 } else { | |
| 648 actual_source_lang = source_lang_; | |
| 649 } | |
| 650 | |
| 651 if (!translation_pending_) { | |
| 652 NOTREACHED(); | |
| 653 return; | |
| 654 } | |
| 655 | |
| 656 translation_pending_ = false; | |
| 657 | |
| 658 // Check JavaScript performance counters for UMA reports. | |
| 659 TranslateHelperMetrics::ReportTimeToTranslate( | |
| 660 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime")); | |
| 661 | |
| 662 // Notify the browser we are done. | |
| 663 render_view()->Send(new ChromeViewHostMsg_PageTranslated( | |
| 664 render_view()->GetRoutingID(), render_view()->GetPageId(), | |
| 665 actual_source_lang, target_lang_, TranslateErrors::NONE)); | |
| 666 return; | |
| 667 } | |
| 668 | |
| 669 // The translation is still pending, check again later. | |
| 670 base::MessageLoop::current()->PostDelayedTask( | |
| 671 FROM_HERE, | |
| 672 base::Bind(&TranslateHelper::CheckTranslateStatus, | |
| 673 weak_method_factory_.GetWeakPtr()), | |
| 674 AdjustDelay(kTranslateStatusCheckDelayMs)); | |
| 675 } | |
| 676 | |
| 677 void TranslateHelper::TranslatePageImpl(int count) { | |
| 678 DCHECK_LT(count, kMaxTranslateInitCheckAttempts); | |
| 679 if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView()) | |
| 680 return; | |
| 681 | |
| 682 if (!IsTranslateLibReady()) { | |
| 683 // The library is not ready, try again later, unless we have tried several | |
| 684 // times unsucessfully already. | |
| 685 if (++count >= kMaxTranslateInitCheckAttempts) { | |
| 686 NotifyBrowserTranslationFailed(TranslateErrors::INITIALIZATION_ERROR); | |
| 687 return; | |
| 688 } | |
| 689 base::MessageLoop::current()->PostDelayedTask( | |
| 690 FROM_HERE, | |
| 691 base::Bind(&TranslateHelper::TranslatePageImpl, | |
| 692 weak_method_factory_.GetWeakPtr(), | |
| 693 count), | |
| 694 AdjustDelay(count * kTranslateInitCheckDelayMs)); | |
| 695 return; | |
| 696 } | |
| 697 | |
| 698 // The library is loaded, and ready for translation now. | |
| 699 // Check JavaScript performance counters for UMA reports. | |
| 700 TranslateHelperMetrics::ReportTimeToBeReady( | |
| 701 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime")); | |
| 702 TranslateHelperMetrics::ReportTimeToLoad( | |
| 703 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime")); | |
| 704 | |
| 705 if (!StartTranslation()) { | |
| 706 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR); | |
| 707 return; | |
| 708 } | |
| 709 // Check the status of the translation. | |
| 710 base::MessageLoop::current()->PostDelayedTask( | |
| 711 FROM_HERE, | |
| 712 base::Bind(&TranslateHelper::CheckTranslateStatus, | |
| 713 weak_method_factory_.GetWeakPtr()), | |
| 714 AdjustDelay(kTranslateStatusCheckDelayMs)); | |
| 715 } | |
| 716 | |
| 717 void TranslateHelper::NotifyBrowserTranslationFailed( | |
| 718 TranslateErrors::Type error) { | |
| 719 translation_pending_ = false; | |
| 720 // Notify the browser there was an error. | |
| 721 render_view()->Send(new ChromeViewHostMsg_PageTranslated( | |
| 722 render_view()->GetRoutingID(), page_id_, source_lang_, | |
| 723 target_lang_, error)); | |
| 724 } | |
| 725 | |
| 726 WebFrame* TranslateHelper::GetMainFrame() { | |
| 727 WebView* web_view = render_view()->GetWebView(); | |
| 728 | |
| 729 // When the tab is going to be closed, the web_view can be NULL. | |
| 730 if (!web_view) | |
| 731 return NULL; | |
| 732 | |
| 733 return web_view->mainFrame(); | |
| 734 } | |
| OLD | NEW |