OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved. |
hajimehoshi
2013/07/11 07:34:30
(c) is not needed.
| |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/translate/translate_helper.h" | 5 #include "chrome/common/translate/language_detection_util.h" |
6 | 6 |
7 #include "base/bind.h" | |
8 #include "base/compiler_specific.h" | |
9 #include "base/logging.h" | 7 #include "base/logging.h" |
10 #include "base/message_loop.h" | |
11 #include "base/strings/string16.h" | |
12 #include "base/strings/string_split.h" | 8 #include "base/strings/string_split.h" |
13 #include "base/strings/string_util.h" | 9 #include "base/strings/string_util.h" |
14 #include "base/strings/utf_string_conversions.h" | 10 #include "base/time/time.h" |
15 #include "chrome/common/chrome_constants.h" | 11 #include "chrome/common/chrome_constants.h" |
16 #include "chrome/common/render_messages.h" | 12 #include "chrome/common/translate/translate_helper_metrics.h" |
17 #include "chrome/common/translate/translate_util.h" | 13 #include "chrome/common/translate/translate_util.h" |
18 #include "chrome/renderer/translate/translate_helper_metrics.h" | |
19 #include "content/public/renderer/render_view.h" | |
20 #include "third_party/WebKit/public/web/WebDocument.h" | |
21 #include "third_party/WebKit/public/web/WebElement.h" | |
22 #include "third_party/WebKit/public/web/WebFrame.h" | |
23 #include "third_party/WebKit/public/web/WebNode.h" | |
24 #include "third_party/WebKit/public/web/WebNodeList.h" | |
25 #include "third_party/WebKit/public/web/WebScriptSource.h" | |
26 #include "third_party/WebKit/public/web/WebView.h" | |
27 #include "v8/include/v8.h" | |
28 | 14 |
29 #if defined(ENABLE_LANGUAGE_DETECTION) | 15 #if defined(ENABLE_LANGUAGE_DETECTION) |
30 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" | 16 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" |
31 #endif | 17 #endif |
32 | 18 |
33 using WebKit::WebDocument; | |
34 using WebKit::WebElement; | |
35 using WebKit::WebFrame; | |
36 using WebKit::WebNode; | |
37 using WebKit::WebNodeList; | |
38 using WebKit::WebScriptSource; | |
39 using WebKit::WebString; | |
40 using WebKit::WebView; | |
41 | |
42 namespace { | 19 namespace { |
43 | 20 |
44 // The delay in milliseconds that we'll wait before checking to see if the | |
45 // translate library injected in the page is ready. | |
46 const int kTranslateInitCheckDelayMs = 150; | |
47 | |
48 // The maximum number of times we'll check to see if the translate library | |
49 // injected in the page is ready. | |
50 const int kMaxTranslateInitCheckAttempts = 5; | |
51 | |
52 // The delay we wait in milliseconds before checking whether the translation has | |
53 // finished. | |
54 const int kTranslateStatusCheckDelayMs = 400; | |
55 | |
56 // Language name passed to the Translate element for it to detect the language. | |
57 const char kAutoDetectionLanguage[] = "auto"; | |
58 | |
59 // Similar language code list. Some languages are very similar and difficult | 21 // Similar language code list. Some languages are very similar and difficult |
60 // for CLD to distinguish. | 22 // for CLD to distinguish. |
61 struct SimilarLanguageCode { | 23 struct SimilarLanguageCode { |
62 const char* const code; | 24 const char* const code; |
63 int group; | 25 int group; |
64 }; | 26 }; |
65 | 27 |
66 const SimilarLanguageCode kSimilarLanguageCodes[] = { | 28 const SimilarLanguageCode kSimilarLanguageCodes[] = { |
67 {"bs", 1}, | 29 {"bs", 1}, |
68 {"hr", 1}, | 30 {"hr", 1}, |
(...skipping 12 matching lines...) Expand all Loading... | |
81 } | 43 } |
82 | 44 |
83 // Well-known languages which often have wrong server configuration of | 45 // Well-known languages which often have wrong server configuration of |
84 // Content-Language: en. | 46 // Content-Language: en. |
85 // TODO(toyoshim): Remove these static tables and caller functions to | 47 // TODO(toyoshim): Remove these static tables and caller functions to |
86 // chrome/common/translate, and implement them as std::set<>. | 48 // chrome/common/translate, and implement them as std::set<>. |
87 const char* kWellKnownCodesOnWrongConfiguration[] = { | 49 const char* kWellKnownCodesOnWrongConfiguration[] = { |
88 "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th" | 50 "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th" |
89 }; | 51 }; |
90 | 52 |
91 } // namespace | 53 // Applies a series of language code modification in proper order. |
54 void ApplyLanguageCodeCorrection(std::string* code) { | |
55 // Correct well-known format errors. | |
56 LanguageDetectionUtil::CorrectLanguageCodeTypo(code); | |
92 | 57 |
93 //////////////////////////////////////////////////////////////////////////////// | 58 if (!LanguageDetectionUtil::IsValidLanguageCode(*code)) { |
94 // TranslateHelper, public: | 59 *code = std::string(); |
95 // | 60 return; |
96 TranslateHelper::TranslateHelper(content::RenderView* render_view) | 61 } |
97 : content::RenderViewObserver(render_view), | |
98 page_id_(-1), | |
99 translation_pending_(false), | |
100 weak_method_factory_(this) { | |
101 } | |
102 | 62 |
103 TranslateHelper::~TranslateHelper() { | 63 TranslateUtil::ToTranslateLanguageSynonym(code); |
104 CancelPendingTranslation(); | |
105 } | |
106 | |
107 void TranslateHelper::PageCaptured(int page_id, const string16& contents) { | |
108 // Get the document language as set by WebKit from the http-equiv | |
109 // meta tag for "content-language". This may or may not also | |
110 // have a value derived from the actual Content-Language HTTP | |
111 // header. The two actually have different meanings (despite the | |
112 // original intent of http-equiv to be an equivalent) with the former | |
113 // being the language of the document and the latter being the | |
114 // language of the intended audience (a distinction really only | |
115 // relevant for things like langauge textbooks). This distinction | |
116 // shouldn't affect translation. | |
117 WebFrame* main_frame = GetMainFrame(); | |
118 if (!main_frame || render_view()->GetPageId() != page_id) | |
119 return; | |
120 page_id_ = page_id; | |
121 WebDocument document = main_frame->document(); | |
122 std::string content_language = document.contentLanguage().utf8(); | |
123 WebElement html_element = document.documentElement(); | |
124 std::string html_lang; | |
125 // |html_element| can be null element, e.g. in | |
126 // BrowserTest.WindowOpenClose. | |
127 if (!html_element.isNull()) | |
128 html_lang = html_element.getAttribute("lang").utf8(); | |
129 std::string cld_language; | |
130 bool is_cld_reliable; | |
131 std::string language = DeterminePageLanguage( | |
132 content_language, html_lang, contents, &cld_language, &is_cld_reliable); | |
133 | |
134 if (language.empty()) | |
135 return; | |
136 | |
137 language_determined_time_ = base::TimeTicks::Now(); | |
138 | |
139 GURL url(document.url()); | |
140 LanguageDetectionDetails details; | |
141 details.time = base::Time::Now(); | |
142 details.url = url; | |
143 details.content_language = content_language; | |
144 details.cld_language = cld_language; | |
145 details.is_cld_reliable = is_cld_reliable; | |
146 details.html_root_language = html_lang; | |
147 details.adopted_language = language; | |
148 | |
149 // TODO(hajimehoshi): If this affects performance, it should be set only if | |
150 // translate-internals tab exists. | |
151 details.contents = contents; | |
152 | |
153 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | |
154 routing_id(), | |
155 details, | |
156 IsTranslationAllowed(&document) && !language.empty())); | |
157 } | |
158 | |
159 void TranslateHelper::CancelPendingTranslation() { | |
160 weak_method_factory_.InvalidateWeakPtrs(); | |
161 translation_pending_ = false; | |
162 source_lang_.clear(); | |
163 target_lang_.clear(); | |
164 } | 64 } |
165 | 65 |
166 #if defined(ENABLE_LANGUAGE_DETECTION) | 66 #if defined(ENABLE_LANGUAGE_DETECTION) |
167 // static | 67 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown' |
hajimehoshi
2013/07/11 07:34:30
DetermineTextLanguage could return a language code
| |
168 std::string TranslateHelper::DetermineTextLanguage(const string16& text, | 68 // if it failed. |
169 bool* is_cld_reliable) { | 69 // |is_cld_reliable| will be set as true if CLD says the detection is reliable. |
70 std::string DetermineTextLanguage(const base::string16& text, | |
71 bool* is_cld_reliable) { | |
170 std::string language = chrome::kUnknownLanguageCode; | 72 std::string language = chrome::kUnknownLanguageCode; |
171 int num_languages = 0; | 73 int num_languages = 0; |
172 int text_bytes = 0; | 74 int text_bytes = 0; |
173 bool is_reliable = false; | 75 bool is_reliable = false; |
174 Language cld_language = | 76 Language cld_language = |
175 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 77 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
176 &num_languages, NULL, &text_bytes); | 78 &num_languages, NULL, &text_bytes); |
177 if (is_cld_reliable != NULL) | 79 if (is_cld_reliable != NULL) |
178 *is_cld_reliable = is_reliable; | 80 *is_cld_reliable = is_reliable; |
179 | 81 |
(...skipping 12 matching lines...) Expand all Loading... | |
192 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN | 94 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN |
193 // for Simplified Chinese. | 95 // for Simplified Chinese. |
194 language = LanguageCodeWithDialects(cld_language); | 96 language = LanguageCodeWithDialects(cld_language); |
195 } | 97 } |
196 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text | 98 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text |
197 << "\n*************************************\n"; | 99 << "\n*************************************\n"; |
198 return language; | 100 return language; |
199 } | 101 } |
200 #endif // defined(ENABLE_LANGUAGE_DETECTION) | 102 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
201 | 103 |
202 //////////////////////////////////////////////////////////////////////////////// | 104 // Checks if CLD can complement a sub code when the page language doesn't know |
203 // TranslateHelper, protected: | 105 // the sub code. |
204 // | 106 bool CanCLDComplementSubCode( |
205 bool TranslateHelper::IsTranslateLibAvailable() { | |
206 return ExecuteScriptAndGetBoolResult( | |
207 "typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && " | |
208 "typeof cr.googleTranslate.translate == 'function'", false); | |
209 } | |
210 | |
211 bool TranslateHelper::IsTranslateLibReady() { | |
212 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false); | |
213 } | |
214 | |
215 bool TranslateHelper::HasTranslationFinished() { | |
216 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true); | |
217 } | |
218 | |
219 bool TranslateHelper::HasTranslationFailed() { | |
220 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true); | |
221 } | |
222 | |
223 bool TranslateHelper::StartTranslation() { | |
224 std::string script = "cr.googleTranslate.translate('" + | |
225 source_lang_ + | |
226 "','" + | |
227 target_lang_ + | |
228 "')"; | |
229 return ExecuteScriptAndGetBoolResult(script, false); | |
230 } | |
231 | |
232 std::string TranslateHelper::GetOriginalPageLanguage() { | |
233 return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang"); | |
234 } | |
235 | |
236 base::TimeDelta TranslateHelper::AdjustDelay(int delayInMs) { | |
237 // Just converts |delayInMs| without any modification in practical cases. | |
238 // Tests will override this function to return modified value. | |
239 return base::TimeDelta::FromMilliseconds(delayInMs); | |
240 } | |
241 | |
242 void TranslateHelper::ExecuteScript(const std::string& script) { | |
243 WebFrame* main_frame = GetMainFrame(); | |
244 if (main_frame) | |
245 main_frame->executeScript(WebScriptSource(ASCIIToUTF16(script))); | |
246 } | |
247 | |
248 bool TranslateHelper::ExecuteScriptAndGetBoolResult(const std::string& script, | |
249 bool fallback) { | |
250 WebFrame* main_frame = GetMainFrame(); | |
251 if (!main_frame) | |
252 return fallback; | |
253 | |
254 v8::HandleScope handle_scope; | |
255 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue( | |
256 WebScriptSource(ASCIIToUTF16(script))); | |
257 if (v.IsEmpty() || !v->IsBoolean()) { | |
258 NOTREACHED(); | |
259 return fallback; | |
260 } | |
261 | |
262 return v->BooleanValue(); | |
263 } | |
264 | |
265 std::string TranslateHelper::ExecuteScriptAndGetStringResult( | |
266 const std::string& script) { | |
267 WebFrame* main_frame = GetMainFrame(); | |
268 if (!main_frame) | |
269 return std::string(); | |
270 | |
271 v8::HandleScope handle_scope; | |
272 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue( | |
273 WebScriptSource(ASCIIToUTF16(script))); | |
274 if (v.IsEmpty() || !v->IsString()) { | |
275 NOTREACHED(); | |
276 return std::string(); | |
277 } | |
278 | |
279 v8::Local<v8::String> v8_str = v->ToString(); | |
280 int length = v8_str->Utf8Length() + 1; | |
281 scoped_ptr<char[]> str(new char[length]); | |
282 v8_str->WriteUtf8(str.get(), length); | |
283 return std::string(str.get()); | |
284 } | |
285 | |
286 double TranslateHelper::ExecuteScriptAndGetDoubleResult( | |
287 const std::string& script) { | |
288 WebFrame* main_frame = GetMainFrame(); | |
289 if (!main_frame) | |
290 return 0.0; | |
291 | |
292 v8::HandleScope handle_scope; | |
293 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue( | |
294 WebScriptSource(ASCIIToUTF16(script))); | |
295 if (v.IsEmpty() || !v->IsNumber()) { | |
296 NOTREACHED(); | |
297 return 0.0; | |
298 } | |
299 | |
300 return v->NumberValue(); | |
301 } | |
302 | |
303 //////////////////////////////////////////////////////////////////////////////// | |
304 // TranslateHelper, private: | |
305 // | |
306 // static | |
307 void TranslateHelper::CorrectLanguageCodeTypo(std::string* code) { | |
308 DCHECK(code); | |
309 | |
310 size_t coma_index = code->find(','); | |
311 if (coma_index != std::string::npos) { | |
312 // There are more than 1 language specified, just keep the first one. | |
313 *code = code->substr(0, coma_index); | |
314 } | |
315 TrimWhitespaceASCII(*code, TRIM_ALL, code); | |
316 | |
317 // An underscore instead of a dash is a frequent mistake. | |
318 size_t underscore_index = code->find('_'); | |
319 if (underscore_index != std::string::npos) | |
320 (*code)[underscore_index] = '-'; | |
321 | |
322 // Change everything up to a dash to lower-case and everything after to upper. | |
323 size_t dash_index = code->find('-'); | |
324 if (dash_index != std::string::npos) { | |
325 *code = StringToLowerASCII(code->substr(0, dash_index)) + | |
326 StringToUpperASCII(code->substr(dash_index)); | |
327 } else { | |
328 *code = StringToLowerASCII(*code); | |
329 } | |
330 } | |
331 | |
332 // static | |
333 bool TranslateHelper::IsValidLanguageCode(const std::string& code) { | |
334 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/. | |
335 // TODO(hajimehoshi): How about es-419, which is used as an Accept language? | |
336 std::vector<std::string> chunks; | |
337 base::SplitString(code, '-', &chunks); | |
338 | |
339 if (chunks.size() < 1 || 2 < chunks.size()) | |
340 return false; | |
341 | |
342 const std::string& main_code = chunks[0]; | |
343 | |
344 if (main_code.size() < 1 || 3 < main_code.size()) | |
345 return false; | |
346 | |
347 for (std::string::const_iterator it = main_code.begin(); | |
348 it != main_code.end(); ++it) { | |
349 if (!IsAsciiAlpha(*it)) | |
350 return false; | |
351 } | |
352 | |
353 if (chunks.size() == 1) | |
354 return true; | |
355 | |
356 const std::string& sub_code = chunks[1]; | |
357 | |
358 if (sub_code.size() != 2) | |
359 return false; | |
360 | |
361 for (std::string::const_iterator it = sub_code.begin(); | |
362 it != sub_code.end(); ++it) { | |
363 if (!IsAsciiAlpha(*it)) | |
364 return false; | |
365 } | |
366 | |
367 return true; | |
368 } | |
369 | |
370 // static | |
371 void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) { | |
372 // Correct well-known format errors. | |
373 CorrectLanguageCodeTypo(code); | |
374 | |
375 if (!IsValidLanguageCode(*code)) { | |
376 *code = std::string(); | |
377 return; | |
378 } | |
379 | |
380 TranslateUtil::ToTranslateLanguageSynonym(code); | |
381 } | |
382 | |
383 // static | |
384 bool TranslateHelper::IsSameOrSimilarLanguages( | |
385 const std::string& page_language, const std::string& cld_language) { | |
386 // Language code part of |page_language| is matched to one of |cld_language|. | |
387 // Country code is ignored here. | |
388 if (page_language.size() >= 2 && | |
389 cld_language.find(page_language.c_str(), 0, 2) == 0) { | |
390 // Languages are matched strictly. Reports false to metrics, but returns | |
391 // true. | |
392 TranslateHelperMetrics::ReportSimilarLanguageMatch(false); | |
393 return true; | |
394 } | |
395 | |
396 // Check if |page_language| and |cld_language| are in the similar language | |
397 // list and belong to the same language group. | |
398 int page_code = GetSimilarLanguageGroupCode(page_language); | |
399 bool match = page_code != 0 && | |
400 page_code == GetSimilarLanguageGroupCode(cld_language); | |
401 | |
402 TranslateHelperMetrics::ReportSimilarLanguageMatch(match); | |
403 return match; | |
404 } | |
405 | |
406 // static | |
407 bool TranslateHelper::MaybeServerWrongConfiguration( | |
408 const std::string& page_language, const std::string& cld_language) { | |
409 // If |page_language| is not "en-*", respect it and just return false here. | |
410 if (!StartsWithASCII(page_language, "en", false)) | |
411 return false; | |
412 | |
413 // A server provides a language meta information representing "en-*". But it | |
414 // might be just a default value due to missing user configuration. | |
415 // Let's trust |cld_language| if the determined language is not difficult to | |
416 // distinguish from English, and the language is one of well-known languages | |
417 // which often provide "en-*" meta information mistakenly. | |
418 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { | |
419 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) | |
420 return true; | |
421 } | |
422 return false; | |
423 } | |
424 | |
425 // static | |
426 bool TranslateHelper::CanCLDComplementSubCode( | |
427 const std::string& page_language, const std::string& cld_language) { | 107 const std::string& page_language, const std::string& cld_language) { |
428 // Translate server cannot treat general Chinese. If Content-Language and | 108 // Translate server cannot treat general Chinese. If Content-Language and |
429 // CLD agree that the language is Chinese and Content-Language doesn't know | 109 // CLD agree that the language is Chinese and Content-Language doesn't know |
430 // which dialect is used, CLD language has priority. | 110 // which dialect is used, CLD language has priority. |
431 // TODO(hajimehoshi): How about the other dialects like zh-MO? | 111 // TODO(hajimehoshi): How about the other dialects like zh-MO? |
432 return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false); | 112 return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false); |
433 } | 113 } |
434 | 114 |
435 // static | 115 } // namespace |
436 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, | 116 |
437 const std::string& html_lang, | 117 namespace LanguageDetectionUtil { |
438 const string16& contents, | 118 |
439 std::string* cld_language_p, | 119 std::string DeterminePageLanguage(const std::string& code, |
440 bool* is_cld_reliable_p) { | 120 const std::string& html_lang, |
121 const base::string16& contents, | |
122 std::string* cld_language_p, | |
123 bool* is_cld_reliable_p) { | |
441 #if defined(ENABLE_LANGUAGE_DETECTION) | 124 #if defined(ENABLE_LANGUAGE_DETECTION) |
442 base::TimeTicks begin_time = base::TimeTicks::Now(); | 125 base::TimeTicks begin_time = base::TimeTicks::Now(); |
443 bool is_cld_reliable; | 126 bool is_cld_reliable; |
444 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); | 127 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); |
445 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, | 128 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, |
446 base::TimeTicks::Now()); | 129 base::TimeTicks::Now()); |
447 | 130 |
448 if (cld_language_p != NULL) | 131 if (cld_language_p != NULL) |
449 *cld_language_p = cld_language; | 132 *cld_language_p = cld_language; |
450 if (is_cld_reliable_p != NULL) | 133 if (is_cld_reliable_p != NULL) |
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
509 return std::string(chrome::kUnknownLanguageCode); | 192 return std::string(chrome::kUnknownLanguageCode); |
510 } | 193 } |
511 #else // defined(ENABLE_LANGUAGE_DETECTION) | 194 #else // defined(ENABLE_LANGUAGE_DETECTION) |
512 TranslateHelperMetrics::ReportLanguageVerification( | 195 TranslateHelperMetrics::ReportLanguageVerification( |
513 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED); | 196 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED); |
514 #endif // defined(ENABLE_LANGUAGE_DETECTION) | 197 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
515 | 198 |
516 return language; | 199 return language; |
517 } | 200 } |
518 | 201 |
519 // static | 202 void CorrectLanguageCodeTypo(std::string* code) { |
520 bool TranslateHelper::IsTranslationAllowed(WebDocument* document) { | 203 DCHECK(code); |
521 WebElement head = document->head(); | 204 |
522 if (head.isNull() || !head.hasChildNodes()) | 205 size_t coma_index = code->find(','); |
206 if (coma_index != std::string::npos) { | |
207 // There are more than 1 language specified, just keep the first one. | |
208 *code = code->substr(0, coma_index); | |
209 } | |
210 TrimWhitespaceASCII(*code, TRIM_ALL, code); | |
211 | |
212 // An underscore instead of a dash is a frequent mistake. | |
213 size_t underscore_index = code->find('_'); | |
214 if (underscore_index != std::string::npos) | |
215 (*code)[underscore_index] = '-'; | |
216 | |
217 // Change everything up to a dash to lower-case and everything after to upper. | |
218 size_t dash_index = code->find('-'); | |
219 if (dash_index != std::string::npos) { | |
220 *code = StringToLowerASCII(code->substr(0, dash_index)) + | |
221 StringToUpperASCII(code->substr(dash_index)); | |
222 } else { | |
223 *code = StringToLowerASCII(*code); | |
224 } | |
225 } | |
226 | |
227 bool IsValidLanguageCode(const std::string& code) { | |
228 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/. | |
229 // TODO(hajimehoshi): How about es-419, which is used as an Accept language? | |
230 std::vector<std::string> chunks; | |
231 base::SplitString(code, '-', &chunks); | |
232 | |
233 if (chunks.size() < 1 || 2 < chunks.size()) | |
234 return false; | |
235 | |
236 const std::string& main_code = chunks[0]; | |
237 | |
238 if (main_code.size() < 1 || 3 < main_code.size()) | |
239 return false; | |
240 | |
241 for (std::string::const_iterator it = main_code.begin(); | |
242 it != main_code.end(); ++it) { | |
243 if (!IsAsciiAlpha(*it)) | |
244 return false; | |
245 } | |
246 | |
247 if (chunks.size() == 1) | |
523 return true; | 248 return true; |
524 | 249 |
525 const WebString meta(ASCIIToUTF16("meta")); | 250 const std::string& sub_code = chunks[1]; |
526 const WebString name(ASCIIToUTF16("name")); | |
527 const WebString google(ASCIIToUTF16("google")); | |
528 const WebString value(ASCIIToUTF16("value")); | |
529 const WebString content(ASCIIToUTF16("content")); | |
530 | 251 |
531 WebNodeList children = head.childNodes(); | 252 if (sub_code.size() != 2) |
532 for (size_t i = 0; i < children.length(); ++i) { | 253 return false; |
533 WebNode node = children.item(i); | 254 |
534 if (!node.isElementNode()) | 255 for (std::string::const_iterator it = sub_code.begin(); |
535 continue; | 256 it != sub_code.end(); ++it) { |
536 WebElement element = node.to<WebElement>(); | 257 if (!IsAsciiAlpha(*it)) |
537 // Check if a tag is <meta>. | |
538 if (!element.hasTagName(meta)) | |
539 continue; | |
540 // Check if the tag contains name="google". | |
541 WebString attribute = element.getAttribute(name); | |
542 if (attribute.isNull() || attribute != google) | |
543 continue; | |
544 // Check if the tag contains value="notranslate", or content="notranslate". | |
545 attribute = element.getAttribute(value); | |
546 if (attribute.isNull()) | |
547 attribute = element.getAttribute(content); | |
548 if (attribute.isNull()) | |
549 continue; | |
550 if (LowerCaseEqualsASCII(attribute, "notranslate")) | |
551 return false; | 258 return false; |
552 } | 259 } |
260 | |
553 return true; | 261 return true; |
554 } | 262 } |
555 | 263 |
556 bool TranslateHelper::OnMessageReceived(const IPC::Message& message) { | 264 bool IsSameOrSimilarLanguages(const std::string& page_language, |
557 bool handled = true; | 265 const std::string& cld_language) { |
558 IPC_BEGIN_MESSAGE_MAP(TranslateHelper, message) | 266 // Language code part of |page_language| is matched to one of |cld_language|. |
559 IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage, OnTranslatePage) | 267 // Country code is ignored here. |
560 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation) | 268 if (page_language.size() >= 2 && |
561 IPC_MESSAGE_UNHANDLED(handled = false) | 269 cld_language.find(page_language.c_str(), 0, 2) == 0) { |
562 IPC_END_MESSAGE_MAP() | 270 // Languages are matched strictly. Reports false to metrics, but returns |
563 return handled; | 271 // true. |
272 TranslateHelperMetrics::ReportSimilarLanguageMatch(false); | |
273 return true; | |
274 } | |
275 | |
276 // Check if |page_language| and |cld_language| are in the similar language | |
277 // list and belong to the same language group. | |
278 int page_code = GetSimilarLanguageGroupCode(page_language); | |
279 bool match = page_code != 0 && | |
280 page_code == GetSimilarLanguageGroupCode(cld_language); | |
281 | |
282 TranslateHelperMetrics::ReportSimilarLanguageMatch(match); | |
283 return match; | |
564 } | 284 } |
565 | 285 |
566 void TranslateHelper::OnTranslatePage(int page_id, | 286 bool MaybeServerWrongConfiguration(const std::string& page_language, |
567 const std::string& translate_script, | 287 const std::string& cld_language) { |
568 const std::string& source_lang, | 288 // If |page_language| is not "en-*", respect it and just return false here. |
569 const std::string& target_lang) { | 289 if (!StartsWithASCII(page_language, "en", false)) |
570 WebFrame* main_frame = GetMainFrame(); | 290 return false; |
571 if (!main_frame || | |
572 page_id_ != page_id || | |
573 render_view()->GetPageId() != page_id) | |
574 return; // We navigated away, nothing to do. | |
575 | 291 |
576 // A similar translation is already under way, nothing to do. | 292 // A server provides a language meta information representing "en-*". But it |
577 if (translation_pending_ && target_lang_ == target_lang) | 293 // might be just a default value due to missing user configuration. |
578 return; | 294 // Let's trust |cld_language| if the determined language is not difficult to |
579 | 295 // distinguish from English, and the language is one of well-known languages |
580 // Any pending translation is now irrelevant. | 296 // which often provide "en-*" meta information mistakenly. |
581 CancelPendingTranslation(); | 297 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { |
582 | 298 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) |
583 // Set our states. | 299 return true; |
584 translation_pending_ = true; | |
585 | |
586 // If the source language is undetermined, we'll let the translate element | |
587 // detect it. | |
588 source_lang_ = (source_lang != chrome::kUnknownLanguageCode) ? | |
589 source_lang : kAutoDetectionLanguage; | |
590 target_lang_ = target_lang; | |
591 | |
592 TranslateHelperMetrics::ReportUserActionDuration(language_determined_time_, | |
593 base::TimeTicks::Now()); | |
594 | |
595 GURL url(main_frame->document().url()); | |
596 TranslateHelperMetrics::ReportPageScheme(url.scheme()); | |
597 | |
598 if (!IsTranslateLibAvailable()) { | |
599 // Evaluate the script to add the translation related method to the global | |
600 // context of the page. | |
601 ExecuteScript(translate_script); | |
602 DCHECK(IsTranslateLibAvailable()); | |
603 } | 300 } |
604 | 301 return false; |
605 TranslatePageImpl(0); | |
606 } | 302 } |
607 | 303 |
608 void TranslateHelper::OnRevertTranslation(int page_id) { | 304 } // namespace LanguageDetectionUtil |
609 if (page_id_ != page_id || render_view()->GetPageId() != page_id) | |
610 return; // We navigated away, nothing to do. | |
611 | |
612 if (!IsTranslateLibAvailable()) { | |
613 NOTREACHED(); | |
614 return; | |
615 } | |
616 | |
617 CancelPendingTranslation(); | |
618 | |
619 ExecuteScript("cr.googleTranslate.revert()"); | |
620 } | |
621 | |
622 void TranslateHelper::CheckTranslateStatus() { | |
623 // If this is not the same page, the translation has been canceled. If the | |
624 // view is gone, the page is closing. | |
625 if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView()) | |
626 return; | |
627 | |
628 // First check if there was an error. | |
629 if (HasTranslationFailed()) { | |
630 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR); | |
631 return; // There was an error. | |
632 } | |
633 | |
634 if (HasTranslationFinished()) { | |
635 std::string actual_source_lang; | |
636 // Translation was successfull, if it was auto, retrieve the source | |
637 // language the Translate Element detected. | |
638 if (source_lang_ == kAutoDetectionLanguage) { | |
639 actual_source_lang = GetOriginalPageLanguage(); | |
640 if (actual_source_lang.empty()) { | |
641 NotifyBrowserTranslationFailed(TranslateErrors::UNKNOWN_LANGUAGE); | |
642 return; | |
643 } else if (actual_source_lang == target_lang_) { | |
644 NotifyBrowserTranslationFailed(TranslateErrors::IDENTICAL_LANGUAGES); | |
645 return; | |
646 } | |
647 } else { | |
648 actual_source_lang = source_lang_; | |
649 } | |
650 | |
651 if (!translation_pending_) { | |
652 NOTREACHED(); | |
653 return; | |
654 } | |
655 | |
656 translation_pending_ = false; | |
657 | |
658 // Check JavaScript performance counters for UMA reports. | |
659 TranslateHelperMetrics::ReportTimeToTranslate( | |
660 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime")); | |
661 | |
662 // Notify the browser we are done. | |
663 render_view()->Send(new ChromeViewHostMsg_PageTranslated( | |
664 render_view()->GetRoutingID(), render_view()->GetPageId(), | |
665 actual_source_lang, target_lang_, TranslateErrors::NONE)); | |
666 return; | |
667 } | |
668 | |
669 // The translation is still pending, check again later. | |
670 base::MessageLoop::current()->PostDelayedTask( | |
671 FROM_HERE, | |
672 base::Bind(&TranslateHelper::CheckTranslateStatus, | |
673 weak_method_factory_.GetWeakPtr()), | |
674 AdjustDelay(kTranslateStatusCheckDelayMs)); | |
675 } | |
676 | |
677 void TranslateHelper::TranslatePageImpl(int count) { | |
678 DCHECK_LT(count, kMaxTranslateInitCheckAttempts); | |
679 if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView()) | |
680 return; | |
681 | |
682 if (!IsTranslateLibReady()) { | |
683 // The library is not ready, try again later, unless we have tried several | |
684 // times unsucessfully already. | |
685 if (++count >= kMaxTranslateInitCheckAttempts) { | |
686 NotifyBrowserTranslationFailed(TranslateErrors::INITIALIZATION_ERROR); | |
687 return; | |
688 } | |
689 base::MessageLoop::current()->PostDelayedTask( | |
690 FROM_HERE, | |
691 base::Bind(&TranslateHelper::TranslatePageImpl, | |
692 weak_method_factory_.GetWeakPtr(), | |
693 count), | |
694 AdjustDelay(count * kTranslateInitCheckDelayMs)); | |
695 return; | |
696 } | |
697 | |
698 // The library is loaded, and ready for translation now. | |
699 // Check JavaScript performance counters for UMA reports. | |
700 TranslateHelperMetrics::ReportTimeToBeReady( | |
701 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime")); | |
702 TranslateHelperMetrics::ReportTimeToLoad( | |
703 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime")); | |
704 | |
705 if (!StartTranslation()) { | |
706 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR); | |
707 return; | |
708 } | |
709 // Check the status of the translation. | |
710 base::MessageLoop::current()->PostDelayedTask( | |
711 FROM_HERE, | |
712 base::Bind(&TranslateHelper::CheckTranslateStatus, | |
713 weak_method_factory_.GetWeakPtr()), | |
714 AdjustDelay(kTranslateStatusCheckDelayMs)); | |
715 } | |
716 | |
717 void TranslateHelper::NotifyBrowserTranslationFailed( | |
718 TranslateErrors::Type error) { | |
719 translation_pending_ = false; | |
720 // Notify the browser there was an error. | |
721 render_view()->Send(new ChromeViewHostMsg_PageTranslated( | |
722 render_view()->GetRoutingID(), page_id_, source_lang_, | |
723 target_lang_, error)); | |
724 } | |
725 | |
726 WebFrame* TranslateHelper::GetMainFrame() { | |
727 WebView* web_view = render_view()->GetWebView(); | |
728 | |
729 // When the tab is going to be closed, the web_view can be NULL. | |
730 if (!web_view) | |
731 return NULL; | |
732 | |
733 return web_view->mainFrame(); | |
734 } | |
OLD | NEW |