Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(129)

Side by Side Diff: chrome/common/translate/language_detection_util.cc

Issue 18911002: Move language detection to chrome/common/. (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Full test + rebase Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
hajimehoshi 2013/07/11 07:34:30 (c) is not needed.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/translate/translate_helper.h" 5 #include "chrome/common/translate/language_detection_util.h"
6 6
7 #include "base/bind.h"
8 #include "base/compiler_specific.h"
9 #include "base/logging.h" 7 #include "base/logging.h"
10 #include "base/message_loop.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_split.h" 8 #include "base/strings/string_split.h"
13 #include "base/strings/string_util.h" 9 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h" 10 #include "base/time/time.h"
15 #include "chrome/common/chrome_constants.h" 11 #include "chrome/common/chrome_constants.h"
16 #include "chrome/common/render_messages.h" 12 #include "chrome/common/translate/translate_helper_metrics.h"
17 #include "chrome/common/translate/translate_util.h" 13 #include "chrome/common/translate/translate_util.h"
18 #include "chrome/renderer/translate/translate_helper_metrics.h"
19 #include "content/public/renderer/render_view.h"
20 #include "third_party/WebKit/public/web/WebDocument.h"
21 #include "third_party/WebKit/public/web/WebElement.h"
22 #include "third_party/WebKit/public/web/WebFrame.h"
23 #include "third_party/WebKit/public/web/WebNode.h"
24 #include "third_party/WebKit/public/web/WebNodeList.h"
25 #include "third_party/WebKit/public/web/WebScriptSource.h"
26 #include "third_party/WebKit/public/web/WebView.h"
27 #include "v8/include/v8.h"
28 14
29 #if defined(ENABLE_LANGUAGE_DETECTION) 15 #if defined(ENABLE_LANGUAGE_DETECTION)
30 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" 16 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h"
31 #endif 17 #endif
32 18
33 using WebKit::WebDocument;
34 using WebKit::WebElement;
35 using WebKit::WebFrame;
36 using WebKit::WebNode;
37 using WebKit::WebNodeList;
38 using WebKit::WebScriptSource;
39 using WebKit::WebString;
40 using WebKit::WebView;
41
42 namespace { 19 namespace {
43 20
44 // The delay in milliseconds that we'll wait before checking to see if the
45 // translate library injected in the page is ready.
46 const int kTranslateInitCheckDelayMs = 150;
47
48 // The maximum number of times we'll check to see if the translate library
49 // injected in the page is ready.
50 const int kMaxTranslateInitCheckAttempts = 5;
51
52 // The delay we wait in milliseconds before checking whether the translation has
53 // finished.
54 const int kTranslateStatusCheckDelayMs = 400;
55
56 // Language name passed to the Translate element for it to detect the language.
57 const char kAutoDetectionLanguage[] = "auto";
58
59 // Similar language code list. Some languages are very similar and difficult 21 // Similar language code list. Some languages are very similar and difficult
60 // for CLD to distinguish. 22 // for CLD to distinguish.
61 struct SimilarLanguageCode { 23 struct SimilarLanguageCode {
62 const char* const code; 24 const char* const code;
63 int group; 25 int group;
64 }; 26 };
65 27
66 const SimilarLanguageCode kSimilarLanguageCodes[] = { 28 const SimilarLanguageCode kSimilarLanguageCodes[] = {
67 {"bs", 1}, 29 {"bs", 1},
68 {"hr", 1}, 30 {"hr", 1},
(...skipping 12 matching lines...) Expand all
81 } 43 }
82 44
83 // Well-known languages which often have wrong server configuration of 45 // Well-known languages which often have wrong server configuration of
84 // Content-Language: en. 46 // Content-Language: en.
85 // TODO(toyoshim): Remove these static tables and caller functions to 47 // TODO(toyoshim): Remove these static tables and caller functions to
86 // chrome/common/translate, and implement them as std::set<>. 48 // chrome/common/translate, and implement them as std::set<>.
87 const char* kWellKnownCodesOnWrongConfiguration[] = { 49 const char* kWellKnownCodesOnWrongConfiguration[] = {
88 "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th" 50 "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th"
89 }; 51 };
90 52
91 } // namespace 53 // Applies a series of language code modification in proper order.
54 void ApplyLanguageCodeCorrection(std::string* code) {
55 // Correct well-known format errors.
56 LanguageDetectionUtil::CorrectLanguageCodeTypo(code);
92 57
93 //////////////////////////////////////////////////////////////////////////////// 58 if (!LanguageDetectionUtil::IsValidLanguageCode(*code)) {
94 // TranslateHelper, public: 59 *code = std::string();
95 // 60 return;
96 TranslateHelper::TranslateHelper(content::RenderView* render_view) 61 }
97 : content::RenderViewObserver(render_view),
98 page_id_(-1),
99 translation_pending_(false),
100 weak_method_factory_(this) {
101 }
102 62
103 TranslateHelper::~TranslateHelper() { 63 TranslateUtil::ToTranslateLanguageSynonym(code);
104 CancelPendingTranslation();
105 }
106
107 void TranslateHelper::PageCaptured(int page_id, const string16& contents) {
108 // Get the document language as set by WebKit from the http-equiv
109 // meta tag for "content-language". This may or may not also
110 // have a value derived from the actual Content-Language HTTP
111 // header. The two actually have different meanings (despite the
112 // original intent of http-equiv to be an equivalent) with the former
113 // being the language of the document and the latter being the
114 // language of the intended audience (a distinction really only
115 // relevant for things like langauge textbooks). This distinction
116 // shouldn't affect translation.
117 WebFrame* main_frame = GetMainFrame();
118 if (!main_frame || render_view()->GetPageId() != page_id)
119 return;
120 page_id_ = page_id;
121 WebDocument document = main_frame->document();
122 std::string content_language = document.contentLanguage().utf8();
123 WebElement html_element = document.documentElement();
124 std::string html_lang;
125 // |html_element| can be null element, e.g. in
126 // BrowserTest.WindowOpenClose.
127 if (!html_element.isNull())
128 html_lang = html_element.getAttribute("lang").utf8();
129 std::string cld_language;
130 bool is_cld_reliable;
131 std::string language = DeterminePageLanguage(
132 content_language, html_lang, contents, &cld_language, &is_cld_reliable);
133
134 if (language.empty())
135 return;
136
137 language_determined_time_ = base::TimeTicks::Now();
138
139 GURL url(document.url());
140 LanguageDetectionDetails details;
141 details.time = base::Time::Now();
142 details.url = url;
143 details.content_language = content_language;
144 details.cld_language = cld_language;
145 details.is_cld_reliable = is_cld_reliable;
146 details.html_root_language = html_lang;
147 details.adopted_language = language;
148
149 // TODO(hajimehoshi): If this affects performance, it should be set only if
150 // translate-internals tab exists.
151 details.contents = contents;
152
153 Send(new ChromeViewHostMsg_TranslateLanguageDetermined(
154 routing_id(),
155 details,
156 IsTranslationAllowed(&document) && !language.empty()));
157 }
158
159 void TranslateHelper::CancelPendingTranslation() {
160 weak_method_factory_.InvalidateWeakPtrs();
161 translation_pending_ = false;
162 source_lang_.clear();
163 target_lang_.clear();
164 } 64 }
165 65
166 #if defined(ENABLE_LANGUAGE_DETECTION) 66 #if defined(ENABLE_LANGUAGE_DETECTION)
167 // static 67 // Returns the ISO 639_1 language code of the specified |text|, or 'unknown'
hajimehoshi 2013/07/11 07:34:30 DetermineTextLanguage could return a language code
168 std::string TranslateHelper::DetermineTextLanguage(const string16& text, 68 // if it failed.
169 bool* is_cld_reliable) { 69 // |is_cld_reliable| will be set as true if CLD says the detection is reliable.
70 std::string DetermineTextLanguage(const base::string16& text,
71 bool* is_cld_reliable) {
170 std::string language = chrome::kUnknownLanguageCode; 72 std::string language = chrome::kUnknownLanguageCode;
171 int num_languages = 0; 73 int num_languages = 0;
172 int text_bytes = 0; 74 int text_bytes = 0;
173 bool is_reliable = false; 75 bool is_reliable = false;
174 Language cld_language = 76 Language cld_language =
175 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, 77 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
176 &num_languages, NULL, &text_bytes); 78 &num_languages, NULL, &text_bytes);
177 if (is_cld_reliable != NULL) 79 if (is_cld_reliable != NULL)
178 *is_cld_reliable = is_reliable; 80 *is_cld_reliable = is_reliable;
179 81
(...skipping 12 matching lines...) Expand all
192 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN 94 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN
193 // for Simplified Chinese. 95 // for Simplified Chinese.
194 language = LanguageCodeWithDialects(cld_language); 96 language = LanguageCodeWithDialects(cld_language);
195 } 97 }
196 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text 98 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text
197 << "\n*************************************\n"; 99 << "\n*************************************\n";
198 return language; 100 return language;
199 } 101 }
200 #endif // defined(ENABLE_LANGUAGE_DETECTION) 102 #endif // defined(ENABLE_LANGUAGE_DETECTION)
201 103
202 //////////////////////////////////////////////////////////////////////////////// 104 // Checks if CLD can complement a sub code when the page language doesn't know
203 // TranslateHelper, protected: 105 // the sub code.
204 // 106 bool CanCLDComplementSubCode(
205 bool TranslateHelper::IsTranslateLibAvailable() {
206 return ExecuteScriptAndGetBoolResult(
207 "typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && "
208 "typeof cr.googleTranslate.translate == 'function'", false);
209 }
210
211 bool TranslateHelper::IsTranslateLibReady() {
212 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false);
213 }
214
215 bool TranslateHelper::HasTranslationFinished() {
216 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true);
217 }
218
219 bool TranslateHelper::HasTranslationFailed() {
220 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true);
221 }
222
223 bool TranslateHelper::StartTranslation() {
224 std::string script = "cr.googleTranslate.translate('" +
225 source_lang_ +
226 "','" +
227 target_lang_ +
228 "')";
229 return ExecuteScriptAndGetBoolResult(script, false);
230 }
231
232 std::string TranslateHelper::GetOriginalPageLanguage() {
233 return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang");
234 }
235
236 base::TimeDelta TranslateHelper::AdjustDelay(int delayInMs) {
237 // Just converts |delayInMs| without any modification in practical cases.
238 // Tests will override this function to return modified value.
239 return base::TimeDelta::FromMilliseconds(delayInMs);
240 }
241
242 void TranslateHelper::ExecuteScript(const std::string& script) {
243 WebFrame* main_frame = GetMainFrame();
244 if (main_frame)
245 main_frame->executeScript(WebScriptSource(ASCIIToUTF16(script)));
246 }
247
248 bool TranslateHelper::ExecuteScriptAndGetBoolResult(const std::string& script,
249 bool fallback) {
250 WebFrame* main_frame = GetMainFrame();
251 if (!main_frame)
252 return fallback;
253
254 v8::HandleScope handle_scope;
255 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue(
256 WebScriptSource(ASCIIToUTF16(script)));
257 if (v.IsEmpty() || !v->IsBoolean()) {
258 NOTREACHED();
259 return fallback;
260 }
261
262 return v->BooleanValue();
263 }
264
265 std::string TranslateHelper::ExecuteScriptAndGetStringResult(
266 const std::string& script) {
267 WebFrame* main_frame = GetMainFrame();
268 if (!main_frame)
269 return std::string();
270
271 v8::HandleScope handle_scope;
272 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue(
273 WebScriptSource(ASCIIToUTF16(script)));
274 if (v.IsEmpty() || !v->IsString()) {
275 NOTREACHED();
276 return std::string();
277 }
278
279 v8::Local<v8::String> v8_str = v->ToString();
280 int length = v8_str->Utf8Length() + 1;
281 scoped_ptr<char[]> str(new char[length]);
282 v8_str->WriteUtf8(str.get(), length);
283 return std::string(str.get());
284 }
285
286 double TranslateHelper::ExecuteScriptAndGetDoubleResult(
287 const std::string& script) {
288 WebFrame* main_frame = GetMainFrame();
289 if (!main_frame)
290 return 0.0;
291
292 v8::HandleScope handle_scope;
293 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue(
294 WebScriptSource(ASCIIToUTF16(script)));
295 if (v.IsEmpty() || !v->IsNumber()) {
296 NOTREACHED();
297 return 0.0;
298 }
299
300 return v->NumberValue();
301 }
302
303 ////////////////////////////////////////////////////////////////////////////////
304 // TranslateHelper, private:
305 //
306 // static
307 void TranslateHelper::CorrectLanguageCodeTypo(std::string* code) {
308 DCHECK(code);
309
310 size_t coma_index = code->find(',');
311 if (coma_index != std::string::npos) {
312 // There are more than 1 language specified, just keep the first one.
313 *code = code->substr(0, coma_index);
314 }
315 TrimWhitespaceASCII(*code, TRIM_ALL, code);
316
317 // An underscore instead of a dash is a frequent mistake.
318 size_t underscore_index = code->find('_');
319 if (underscore_index != std::string::npos)
320 (*code)[underscore_index] = '-';
321
322 // Change everything up to a dash to lower-case and everything after to upper.
323 size_t dash_index = code->find('-');
324 if (dash_index != std::string::npos) {
325 *code = StringToLowerASCII(code->substr(0, dash_index)) +
326 StringToUpperASCII(code->substr(dash_index));
327 } else {
328 *code = StringToLowerASCII(*code);
329 }
330 }
331
332 // static
333 bool TranslateHelper::IsValidLanguageCode(const std::string& code) {
334 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/.
335 // TODO(hajimehoshi): How about es-419, which is used as an Accept language?
336 std::vector<std::string> chunks;
337 base::SplitString(code, '-', &chunks);
338
339 if (chunks.size() < 1 || 2 < chunks.size())
340 return false;
341
342 const std::string& main_code = chunks[0];
343
344 if (main_code.size() < 1 || 3 < main_code.size())
345 return false;
346
347 for (std::string::const_iterator it = main_code.begin();
348 it != main_code.end(); ++it) {
349 if (!IsAsciiAlpha(*it))
350 return false;
351 }
352
353 if (chunks.size() == 1)
354 return true;
355
356 const std::string& sub_code = chunks[1];
357
358 if (sub_code.size() != 2)
359 return false;
360
361 for (std::string::const_iterator it = sub_code.begin();
362 it != sub_code.end(); ++it) {
363 if (!IsAsciiAlpha(*it))
364 return false;
365 }
366
367 return true;
368 }
369
370 // static
371 void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) {
372 // Correct well-known format errors.
373 CorrectLanguageCodeTypo(code);
374
375 if (!IsValidLanguageCode(*code)) {
376 *code = std::string();
377 return;
378 }
379
380 TranslateUtil::ToTranslateLanguageSynonym(code);
381 }
382
383 // static
384 bool TranslateHelper::IsSameOrSimilarLanguages(
385 const std::string& page_language, const std::string& cld_language) {
386 // Language code part of |page_language| is matched to one of |cld_language|.
387 // Country code is ignored here.
388 if (page_language.size() >= 2 &&
389 cld_language.find(page_language.c_str(), 0, 2) == 0) {
390 // Languages are matched strictly. Reports false to metrics, but returns
391 // true.
392 TranslateHelperMetrics::ReportSimilarLanguageMatch(false);
393 return true;
394 }
395
396 // Check if |page_language| and |cld_language| are in the similar language
397 // list and belong to the same language group.
398 int page_code = GetSimilarLanguageGroupCode(page_language);
399 bool match = page_code != 0 &&
400 page_code == GetSimilarLanguageGroupCode(cld_language);
401
402 TranslateHelperMetrics::ReportSimilarLanguageMatch(match);
403 return match;
404 }
405
406 // static
407 bool TranslateHelper::MaybeServerWrongConfiguration(
408 const std::string& page_language, const std::string& cld_language) {
409 // If |page_language| is not "en-*", respect it and just return false here.
410 if (!StartsWithASCII(page_language, "en", false))
411 return false;
412
413 // A server provides a language meta information representing "en-*". But it
414 // might be just a default value due to missing user configuration.
415 // Let's trust |cld_language| if the determined language is not difficult to
416 // distinguish from English, and the language is one of well-known languages
417 // which often provide "en-*" meta information mistakenly.
418 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) {
419 if (cld_language == kWellKnownCodesOnWrongConfiguration[i])
420 return true;
421 }
422 return false;
423 }
424
425 // static
426 bool TranslateHelper::CanCLDComplementSubCode(
427 const std::string& page_language, const std::string& cld_language) { 107 const std::string& page_language, const std::string& cld_language) {
428 // Translate server cannot treat general Chinese. If Content-Language and 108 // Translate server cannot treat general Chinese. If Content-Language and
429 // CLD agree that the language is Chinese and Content-Language doesn't know 109 // CLD agree that the language is Chinese and Content-Language doesn't know
430 // which dialect is used, CLD language has priority. 110 // which dialect is used, CLD language has priority.
431 // TODO(hajimehoshi): How about the other dialects like zh-MO? 111 // TODO(hajimehoshi): How about the other dialects like zh-MO?
432 return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false); 112 return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false);
433 } 113 }
434 114
435 // static 115 } // namespace
436 std::string TranslateHelper::DeterminePageLanguage(const std::string& code, 116
437 const std::string& html_lang, 117 namespace LanguageDetectionUtil {
438 const string16& contents, 118
439 std::string* cld_language_p, 119 std::string DeterminePageLanguage(const std::string& code,
440 bool* is_cld_reliable_p) { 120 const std::string& html_lang,
121 const base::string16& contents,
122 std::string* cld_language_p,
123 bool* is_cld_reliable_p) {
441 #if defined(ENABLE_LANGUAGE_DETECTION) 124 #if defined(ENABLE_LANGUAGE_DETECTION)
442 base::TimeTicks begin_time = base::TimeTicks::Now(); 125 base::TimeTicks begin_time = base::TimeTicks::Now();
443 bool is_cld_reliable; 126 bool is_cld_reliable;
444 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable); 127 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable);
445 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time, 128 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time,
446 base::TimeTicks::Now()); 129 base::TimeTicks::Now());
447 130
448 if (cld_language_p != NULL) 131 if (cld_language_p != NULL)
449 *cld_language_p = cld_language; 132 *cld_language_p = cld_language;
450 if (is_cld_reliable_p != NULL) 133 if (is_cld_reliable_p != NULL)
(...skipping 58 matching lines...) Expand 10 before | Expand all | Expand 10 after
509 return std::string(chrome::kUnknownLanguageCode); 192 return std::string(chrome::kUnknownLanguageCode);
510 } 193 }
511 #else // defined(ENABLE_LANGUAGE_DETECTION) 194 #else // defined(ENABLE_LANGUAGE_DETECTION)
512 TranslateHelperMetrics::ReportLanguageVerification( 195 TranslateHelperMetrics::ReportLanguageVerification(
513 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED); 196 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED);
514 #endif // defined(ENABLE_LANGUAGE_DETECTION) 197 #endif // defined(ENABLE_LANGUAGE_DETECTION)
515 198
516 return language; 199 return language;
517 } 200 }
518 201
519 // static 202 void CorrectLanguageCodeTypo(std::string* code) {
520 bool TranslateHelper::IsTranslationAllowed(WebDocument* document) { 203 DCHECK(code);
521 WebElement head = document->head(); 204
522 if (head.isNull() || !head.hasChildNodes()) 205 size_t coma_index = code->find(',');
206 if (coma_index != std::string::npos) {
207 // There are more than 1 language specified, just keep the first one.
208 *code = code->substr(0, coma_index);
209 }
210 TrimWhitespaceASCII(*code, TRIM_ALL, code);
211
212 // An underscore instead of a dash is a frequent mistake.
213 size_t underscore_index = code->find('_');
214 if (underscore_index != std::string::npos)
215 (*code)[underscore_index] = '-';
216
217 // Change everything up to a dash to lower-case and everything after to upper.
218 size_t dash_index = code->find('-');
219 if (dash_index != std::string::npos) {
220 *code = StringToLowerASCII(code->substr(0, dash_index)) +
221 StringToUpperASCII(code->substr(dash_index));
222 } else {
223 *code = StringToLowerASCII(*code);
224 }
225 }
226
227 bool IsValidLanguageCode(const std::string& code) {
228 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/.
229 // TODO(hajimehoshi): How about es-419, which is used as an Accept language?
230 std::vector<std::string> chunks;
231 base::SplitString(code, '-', &chunks);
232
233 if (chunks.size() < 1 || 2 < chunks.size())
234 return false;
235
236 const std::string& main_code = chunks[0];
237
238 if (main_code.size() < 1 || 3 < main_code.size())
239 return false;
240
241 for (std::string::const_iterator it = main_code.begin();
242 it != main_code.end(); ++it) {
243 if (!IsAsciiAlpha(*it))
244 return false;
245 }
246
247 if (chunks.size() == 1)
523 return true; 248 return true;
524 249
525 const WebString meta(ASCIIToUTF16("meta")); 250 const std::string& sub_code = chunks[1];
526 const WebString name(ASCIIToUTF16("name"));
527 const WebString google(ASCIIToUTF16("google"));
528 const WebString value(ASCIIToUTF16("value"));
529 const WebString content(ASCIIToUTF16("content"));
530 251
531 WebNodeList children = head.childNodes(); 252 if (sub_code.size() != 2)
532 for (size_t i = 0; i < children.length(); ++i) { 253 return false;
533 WebNode node = children.item(i); 254
534 if (!node.isElementNode()) 255 for (std::string::const_iterator it = sub_code.begin();
535 continue; 256 it != sub_code.end(); ++it) {
536 WebElement element = node.to<WebElement>(); 257 if (!IsAsciiAlpha(*it))
537 // Check if a tag is <meta>.
538 if (!element.hasTagName(meta))
539 continue;
540 // Check if the tag contains name="google".
541 WebString attribute = element.getAttribute(name);
542 if (attribute.isNull() || attribute != google)
543 continue;
544 // Check if the tag contains value="notranslate", or content="notranslate".
545 attribute = element.getAttribute(value);
546 if (attribute.isNull())
547 attribute = element.getAttribute(content);
548 if (attribute.isNull())
549 continue;
550 if (LowerCaseEqualsASCII(attribute, "notranslate"))
551 return false; 258 return false;
552 } 259 }
260
553 return true; 261 return true;
554 } 262 }
555 263
556 bool TranslateHelper::OnMessageReceived(const IPC::Message& message) { 264 bool IsSameOrSimilarLanguages(const std::string& page_language,
557 bool handled = true; 265 const std::string& cld_language) {
558 IPC_BEGIN_MESSAGE_MAP(TranslateHelper, message) 266 // Language code part of |page_language| is matched to one of |cld_language|.
559 IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage, OnTranslatePage) 267 // Country code is ignored here.
560 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation) 268 if (page_language.size() >= 2 &&
561 IPC_MESSAGE_UNHANDLED(handled = false) 269 cld_language.find(page_language.c_str(), 0, 2) == 0) {
562 IPC_END_MESSAGE_MAP() 270 // Languages are matched strictly. Reports false to metrics, but returns
563 return handled; 271 // true.
272 TranslateHelperMetrics::ReportSimilarLanguageMatch(false);
273 return true;
274 }
275
276 // Check if |page_language| and |cld_language| are in the similar language
277 // list and belong to the same language group.
278 int page_code = GetSimilarLanguageGroupCode(page_language);
279 bool match = page_code != 0 &&
280 page_code == GetSimilarLanguageGroupCode(cld_language);
281
282 TranslateHelperMetrics::ReportSimilarLanguageMatch(match);
283 return match;
564 } 284 }
565 285
566 void TranslateHelper::OnTranslatePage(int page_id, 286 bool MaybeServerWrongConfiguration(const std::string& page_language,
567 const std::string& translate_script, 287 const std::string& cld_language) {
568 const std::string& source_lang, 288 // If |page_language| is not "en-*", respect it and just return false here.
569 const std::string& target_lang) { 289 if (!StartsWithASCII(page_language, "en", false))
570 WebFrame* main_frame = GetMainFrame(); 290 return false;
571 if (!main_frame ||
572 page_id_ != page_id ||
573 render_view()->GetPageId() != page_id)
574 return; // We navigated away, nothing to do.
575 291
576 // A similar translation is already under way, nothing to do. 292 // A server provides a language meta information representing "en-*". But it
577 if (translation_pending_ && target_lang_ == target_lang) 293 // might be just a default value due to missing user configuration.
578 return; 294 // Let's trust |cld_language| if the determined language is not difficult to
579 295 // distinguish from English, and the language is one of well-known languages
580 // Any pending translation is now irrelevant. 296 // which often provide "en-*" meta information mistakenly.
581 CancelPendingTranslation(); 297 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) {
582 298 if (cld_language == kWellKnownCodesOnWrongConfiguration[i])
583 // Set our states. 299 return true;
584 translation_pending_ = true;
585
586 // If the source language is undetermined, we'll let the translate element
587 // detect it.
588 source_lang_ = (source_lang != chrome::kUnknownLanguageCode) ?
589 source_lang : kAutoDetectionLanguage;
590 target_lang_ = target_lang;
591
592 TranslateHelperMetrics::ReportUserActionDuration(language_determined_time_,
593 base::TimeTicks::Now());
594
595 GURL url(main_frame->document().url());
596 TranslateHelperMetrics::ReportPageScheme(url.scheme());
597
598 if (!IsTranslateLibAvailable()) {
599 // Evaluate the script to add the translation related method to the global
600 // context of the page.
601 ExecuteScript(translate_script);
602 DCHECK(IsTranslateLibAvailable());
603 } 300 }
604 301 return false;
605 TranslatePageImpl(0);
606 } 302 }
607 303
608 void TranslateHelper::OnRevertTranslation(int page_id) { 304 } // namespace LanguageDetectionUtil
609 if (page_id_ != page_id || render_view()->GetPageId() != page_id)
610 return; // We navigated away, nothing to do.
611
612 if (!IsTranslateLibAvailable()) {
613 NOTREACHED();
614 return;
615 }
616
617 CancelPendingTranslation();
618
619 ExecuteScript("cr.googleTranslate.revert()");
620 }
621
622 void TranslateHelper::CheckTranslateStatus() {
623 // If this is not the same page, the translation has been canceled. If the
624 // view is gone, the page is closing.
625 if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView())
626 return;
627
628 // First check if there was an error.
629 if (HasTranslationFailed()) {
630 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR);
631 return; // There was an error.
632 }
633
634 if (HasTranslationFinished()) {
635 std::string actual_source_lang;
636 // Translation was successfull, if it was auto, retrieve the source
637 // language the Translate Element detected.
638 if (source_lang_ == kAutoDetectionLanguage) {
639 actual_source_lang = GetOriginalPageLanguage();
640 if (actual_source_lang.empty()) {
641 NotifyBrowserTranslationFailed(TranslateErrors::UNKNOWN_LANGUAGE);
642 return;
643 } else if (actual_source_lang == target_lang_) {
644 NotifyBrowserTranslationFailed(TranslateErrors::IDENTICAL_LANGUAGES);
645 return;
646 }
647 } else {
648 actual_source_lang = source_lang_;
649 }
650
651 if (!translation_pending_) {
652 NOTREACHED();
653 return;
654 }
655
656 translation_pending_ = false;
657
658 // Check JavaScript performance counters for UMA reports.
659 TranslateHelperMetrics::ReportTimeToTranslate(
660 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime"));
661
662 // Notify the browser we are done.
663 render_view()->Send(new ChromeViewHostMsg_PageTranslated(
664 render_view()->GetRoutingID(), render_view()->GetPageId(),
665 actual_source_lang, target_lang_, TranslateErrors::NONE));
666 return;
667 }
668
669 // The translation is still pending, check again later.
670 base::MessageLoop::current()->PostDelayedTask(
671 FROM_HERE,
672 base::Bind(&TranslateHelper::CheckTranslateStatus,
673 weak_method_factory_.GetWeakPtr()),
674 AdjustDelay(kTranslateStatusCheckDelayMs));
675 }
676
677 void TranslateHelper::TranslatePageImpl(int count) {
678 DCHECK_LT(count, kMaxTranslateInitCheckAttempts);
679 if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView())
680 return;
681
682 if (!IsTranslateLibReady()) {
683 // The library is not ready, try again later, unless we have tried several
684 // times unsucessfully already.
685 if (++count >= kMaxTranslateInitCheckAttempts) {
686 NotifyBrowserTranslationFailed(TranslateErrors::INITIALIZATION_ERROR);
687 return;
688 }
689 base::MessageLoop::current()->PostDelayedTask(
690 FROM_HERE,
691 base::Bind(&TranslateHelper::TranslatePageImpl,
692 weak_method_factory_.GetWeakPtr(),
693 count),
694 AdjustDelay(count * kTranslateInitCheckDelayMs));
695 return;
696 }
697
698 // The library is loaded, and ready for translation now.
699 // Check JavaScript performance counters for UMA reports.
700 TranslateHelperMetrics::ReportTimeToBeReady(
701 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime"));
702 TranslateHelperMetrics::ReportTimeToLoad(
703 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime"));
704
705 if (!StartTranslation()) {
706 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR);
707 return;
708 }
709 // Check the status of the translation.
710 base::MessageLoop::current()->PostDelayedTask(
711 FROM_HERE,
712 base::Bind(&TranslateHelper::CheckTranslateStatus,
713 weak_method_factory_.GetWeakPtr()),
714 AdjustDelay(kTranslateStatusCheckDelayMs));
715 }
716
717 void TranslateHelper::NotifyBrowserTranslationFailed(
718 TranslateErrors::Type error) {
719 translation_pending_ = false;
720 // Notify the browser there was an error.
721 render_view()->Send(new ChromeViewHostMsg_PageTranslated(
722 render_view()->GetRoutingID(), page_id_, source_lang_,
723 target_lang_, error));
724 }
725
726 WebFrame* TranslateHelper::GetMainFrame() {
727 WebView* web_view = render_view()->GetWebView();
728
729 // When the tab is going to be closed, the web_view can be NULL.
730 if (!web_view)
731 return NULL;
732
733 return web_view->mainFrame();
734 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698