Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(885)

Side by Side Diff: chrome/common/translate/language_detection_util.cc

Issue 18911002: Move language detection to chrome/common/. (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Rebase Created 7 years, 5 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/translate/translate_helper.h" 5 #include "chrome/common/translate/language_detection_util.h"
6 6
7 #include "base/bind.h"
8 #include "base/compiler_specific.h"
9 #include "base/logging.h" 7 #include "base/logging.h"
10 #include "base/message_loop.h"
11 #include "base/strings/string16.h"
12 #include "base/strings/string_split.h" 8 #include "base/strings/string_split.h"
13 #include "base/strings/string_util.h" 9 #include "base/strings/string_util.h"
14 #include "base/strings/utf_string_conversions.h" 10 #include "base/time/time.h"
15 #include "chrome/common/chrome_constants.h" 11 #include "chrome/common/chrome_constants.h"
16 #include "chrome/common/render_messages.h" 12 #include "chrome/common/translate/translate_common_metrics.h"
17 #include "chrome/common/translate/translate_util.h" 13 #include "chrome/common/translate/translate_util.h"
18 #include "chrome/renderer/translate/translate_helper_metrics.h"
19 #include "content/public/renderer/render_view.h"
20 #include "third_party/WebKit/public/web/WebDocument.h"
21 #include "third_party/WebKit/public/web/WebElement.h"
22 #include "third_party/WebKit/public/web/WebFrame.h"
23 #include "third_party/WebKit/public/web/WebNode.h"
24 #include "third_party/WebKit/public/web/WebNodeList.h"
25 #include "third_party/WebKit/public/web/WebScriptSource.h"
26 #include "third_party/WebKit/public/web/WebView.h"
27 #include "v8/include/v8.h"
28 14
29 #if defined(ENABLE_LANGUAGE_DETECTION) 15 #if defined(ENABLE_LANGUAGE_DETECTION)
30 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" 16 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h"
31 #endif 17 #endif
32 18
33 using WebKit::WebDocument;
34 using WebKit::WebElement;
35 using WebKit::WebFrame;
36 using WebKit::WebNode;
37 using WebKit::WebNodeList;
38 using WebKit::WebScriptSource;
39 using WebKit::WebString;
40 using WebKit::WebView;
41
42 namespace { 19 namespace {
43 20
44 // The delay in milliseconds that we'll wait before checking to see if the
45 // translate library injected in the page is ready.
46 const int kTranslateInitCheckDelayMs = 150;
47
48 // The maximum number of times we'll check to see if the translate library
49 // injected in the page is ready.
50 const int kMaxTranslateInitCheckAttempts = 5;
51
52 // The delay we wait in milliseconds before checking whether the translation has
53 // finished.
54 const int kTranslateStatusCheckDelayMs = 400;
55
56 // Language name passed to the Translate element for it to detect the language.
57 const char kAutoDetectionLanguage[] = "auto";
58
59 // Similar language code list. Some languages are very similar and difficult 21 // Similar language code list. Some languages are very similar and difficult
60 // for CLD to distinguish. 22 // for CLD to distinguish.
61 struct SimilarLanguageCode { 23 struct SimilarLanguageCode {
62 const char* const code; 24 const char* const code;
63 int group; 25 int group;
64 }; 26 };
65 27
66 const SimilarLanguageCode kSimilarLanguageCodes[] = { 28 const SimilarLanguageCode kSimilarLanguageCodes[] = {
67 {"bs", 1}, 29 {"bs", 1},
68 {"hr", 1}, 30 {"hr", 1},
(...skipping 12 matching lines...) Expand all
81 } 43 }
82 44
83 // Well-known languages which often have wrong server configuration of 45 // Well-known languages which often have wrong server configuration of
84 // Content-Language: en. 46 // Content-Language: en.
85 // TODO(toyoshim): Remove these static tables and caller functions to 47 // TODO(toyoshim): Remove these static tables and caller functions to
86 // chrome/common/translate, and implement them as std::set<>. 48 // chrome/common/translate, and implement them as std::set<>.
87 const char* kWellKnownCodesOnWrongConfiguration[] = { 49 const char* kWellKnownCodesOnWrongConfiguration[] = {
88 "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th" 50 "es", "pt", "ja", "ru", "de", "zh-CN", "zh-TW", "ar", "id", "fr", "it", "th"
89 }; 51 };
90 52
91 } // namespace 53 // Applies a series of language code modification in proper order.
54 void ApplyLanguageCodeCorrection(std::string* code) {
55 // Correct well-known format errors.
56 LanguageDetectionUtil::CorrectLanguageCodeTypo(code);
92 57
93 //////////////////////////////////////////////////////////////////////////////// 58 if (!LanguageDetectionUtil::IsValidLanguageCode(*code)) {
94 // TranslateHelper, public: 59 *code = std::string();
95 // 60 return;
96 TranslateHelper::TranslateHelper(content::RenderView* render_view) 61 }
97 : content::RenderViewObserver(render_view),
98 page_id_(-1),
99 translation_pending_(false),
100 weak_method_factory_(this) {
101 }
102 62
103 TranslateHelper::~TranslateHelper() { 63 TranslateUtil::ToTranslateLanguageSynonym(code);
104 CancelPendingTranslation();
105 }
106
107 void TranslateHelper::PageCaptured(int page_id, const string16& contents) {
108 // Get the document language as set by WebKit from the http-equiv
109 // meta tag for "content-language". This may or may not also
110 // have a value derived from the actual Content-Language HTTP
111 // header. The two actually have different meanings (despite the
112 // original intent of http-equiv to be an equivalent) with the former
113 // being the language of the document and the latter being the
114 // language of the intended audience (a distinction really only
115 // relevant for things like langauge textbooks). This distinction
116 // shouldn't affect translation.
117 WebFrame* main_frame = GetMainFrame();
118 if (!main_frame || render_view()->GetPageId() != page_id)
119 return;
120 page_id_ = page_id;
121 WebDocument document = main_frame->document();
122 std::string content_language = document.contentLanguage().utf8();
123 WebElement html_element = document.documentElement();
124 std::string html_lang;
125 // |html_element| can be null element, e.g. in
126 // BrowserTest.WindowOpenClose.
127 if (!html_element.isNull())
128 html_lang = html_element.getAttribute("lang").utf8();
129 std::string cld_language;
130 bool is_cld_reliable;
131 std::string language = DeterminePageLanguage(
132 content_language, html_lang, contents, &cld_language, &is_cld_reliable);
133
134 if (language.empty())
135 return;
136
137 language_determined_time_ = base::TimeTicks::Now();
138
139 GURL url(document.url());
140 LanguageDetectionDetails details;
141 details.time = base::Time::Now();
142 details.url = url;
143 details.content_language = content_language;
144 details.cld_language = cld_language;
145 details.is_cld_reliable = is_cld_reliable;
146 details.html_root_language = html_lang;
147 details.adopted_language = language;
148
149 // TODO(hajimehoshi): If this affects performance, it should be set only if
150 // translate-internals tab exists.
151 details.contents = contents;
152
153 Send(new ChromeViewHostMsg_TranslateLanguageDetermined(
154 routing_id(),
155 details,
156 IsTranslationAllowed(&document) && !language.empty()));
157 }
158
159 void TranslateHelper::CancelPendingTranslation() {
160 weak_method_factory_.InvalidateWeakPtrs();
161 translation_pending_ = false;
162 source_lang_.clear();
163 target_lang_.clear();
164 } 64 }
165 65
166 #if defined(ENABLE_LANGUAGE_DETECTION) 66 #if defined(ENABLE_LANGUAGE_DETECTION)
167 // static 67 // Returns the ISO 639 language code of the specified |text|, or 'unknown' if it
168 std::string TranslateHelper::DetermineTextLanguage(const string16& text, 68 // failed.
169 bool* is_cld_reliable) { 69 // |is_cld_reliable| will be set as true if CLD says the detection is reliable.
70 std::string DetermineTextLanguage(const base::string16& text,
71 bool* is_cld_reliable) {
170 std::string language = chrome::kUnknownLanguageCode; 72 std::string language = chrome::kUnknownLanguageCode;
171 int num_languages = 0; 73 int num_languages = 0;
172 int text_bytes = 0; 74 int text_bytes = 0;
173 bool is_reliable = false; 75 bool is_reliable = false;
174 Language cld_language = 76 Language cld_language =
175 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, 77 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
176 &num_languages, NULL, &text_bytes); 78 &num_languages, NULL, &text_bytes);
177 if (is_cld_reliable != NULL) 79 if (is_cld_reliable != NULL)
178 *is_cld_reliable = is_reliable; 80 *is_cld_reliable = is_reliable;
179 81
(...skipping 12 matching lines...) Expand all
192 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN 94 // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN
193 // for Simplified Chinese. 95 // for Simplified Chinese.
194 language = LanguageCodeWithDialects(cld_language); 96 language = LanguageCodeWithDialects(cld_language);
195 } 97 }
196 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text 98 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text
197 << "\n*************************************\n"; 99 << "\n*************************************\n";
198 return language; 100 return language;
199 } 101 }
200 #endif // defined(ENABLE_LANGUAGE_DETECTION) 102 #endif // defined(ENABLE_LANGUAGE_DETECTION)
201 103
202 //////////////////////////////////////////////////////////////////////////////// 104 // Checks if CLD can complement a sub code when the page language doesn't know
203 // TranslateHelper, protected: 105 // the sub code.
204 // 106 bool CanCLDComplementSubCode(
205 bool TranslateHelper::IsTranslateLibAvailable() { 107 const std::string& page_language, const std::string& cld_language) {
206 return ExecuteScriptAndGetBoolResult( 108 // Translate server cannot treat general Chinese. If Content-Language and
207 "typeof cr != 'undefined' && typeof cr.googleTranslate != 'undefined' && " 109 // CLD agree that the language is Chinese and Content-Language doesn't know
208 "typeof cr.googleTranslate.translate == 'function'", false); 110 // which dialect is used, CLD language has priority.
111 // TODO(hajimehoshi): How about the other dialects like zh-MO?
112 return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false);
209 } 113 }
210 114
211 bool TranslateHelper::IsTranslateLibReady() { 115 } // namespace
212 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.libReady", false); 116
117 namespace LanguageDetectionUtil {
118
119 std::string DeterminePageLanguage(const std::string& code,
120 const std::string& html_lang,
121 const base::string16& contents,
122 std::string* cld_language_p,
123 bool* is_cld_reliable_p) {
124 #if defined(ENABLE_LANGUAGE_DETECTION)
125 base::TimeTicks begin_time = base::TimeTicks::Now();
126 bool is_cld_reliable;
127 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable);
128 TranslateCommonMetrics::ReportLanguageDetectionTime(begin_time,
129 base::TimeTicks::Now());
130
131 if (cld_language_p != NULL)
132 *cld_language_p = cld_language;
133 if (is_cld_reliable_p != NULL)
134 *is_cld_reliable_p = is_cld_reliable;
135 TranslateUtil::ToTranslateLanguageSynonym(&cld_language);
136 #endif // defined(ENABLE_LANGUAGE_DETECTION)
137
138 // Check if html lang attribute is valid.
139 std::string modified_html_lang;
140 if (!html_lang.empty()) {
141 modified_html_lang = html_lang;
142 ApplyLanguageCodeCorrection(&modified_html_lang);
143 TranslateCommonMetrics::ReportHtmlLang(html_lang, modified_html_lang);
144 VLOG(9) << "html lang based language code: " << modified_html_lang;
145 }
146
147 // Check if Content-Language is valid.
148 std::string modified_code;
149 if (!code.empty()) {
150 modified_code = code;
151 ApplyLanguageCodeCorrection(&modified_code);
152 TranslateCommonMetrics::ReportContentLanguage(code, modified_code);
153 }
154
155 // Adopt |modified_html_lang| if it is valid. Otherwise, adopt
156 // |modified_code|.
157 std::string language = modified_html_lang.empty() ? modified_code :
158 modified_html_lang;
159
160 #if defined(ENABLE_LANGUAGE_DETECTION)
161 // If |language| is empty, just use CLD result even though it might be
162 // chrome::kUnknownLanguageCode.
163 if (language.empty()) {
164 TranslateCommonMetrics::ReportLanguageVerification(
165 TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_ONLY);
166 return cld_language;
167 }
168
169 if (cld_language == chrome::kUnknownLanguageCode) {
170 TranslateCommonMetrics::ReportLanguageVerification(
171 TranslateCommonMetrics::LANGUAGE_VERIFICATION_UNKNOWN);
172 return language;
173 } else if (IsSameOrSimilarLanguages(language, cld_language)) {
174 TranslateCommonMetrics::ReportLanguageVerification(
175 TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_AGREE);
176 return language;
177 } else if (MaybeServerWrongConfiguration(language, cld_language)) {
178 TranslateCommonMetrics::ReportLanguageVerification(
179 TranslateCommonMetrics::LANGUAGE_VERIFICATION_TRUST_CLD);
180 return cld_language;
181 } else if (CanCLDComplementSubCode(language, cld_language)) {
182 TranslateCommonMetrics::ReportLanguageVerification(
183 TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE);
184 return cld_language;
185 } else {
186 TranslateCommonMetrics::ReportLanguageVerification(
187 TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE);
188 // Content-Language value might be wrong because CLD says that this page
189 // is written in another language with confidence.
190 // In this case, Chrome doesn't rely on any of the language codes, and
191 // gives up suggesting a translation.
192 return std::string(chrome::kUnknownLanguageCode);
193 }
194 #else // defined(ENABLE_LANGUAGE_DETECTION)
195 TranslateCommonMetrics::ReportLanguageVerification(
196 TranslateCommonMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED);
197 #endif // defined(ENABLE_LANGUAGE_DETECTION)
198
199 return language;
213 } 200 }
214 201
215 bool TranslateHelper::HasTranslationFinished() { 202 void CorrectLanguageCodeTypo(std::string* code) {
216 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.finished", true);
217 }
218
219 bool TranslateHelper::HasTranslationFailed() {
220 return ExecuteScriptAndGetBoolResult("cr.googleTranslate.error", true);
221 }
222
223 bool TranslateHelper::StartTranslation() {
224 std::string script = "cr.googleTranslate.translate('" +
225 source_lang_ +
226 "','" +
227 target_lang_ +
228 "')";
229 return ExecuteScriptAndGetBoolResult(script, false);
230 }
231
232 std::string TranslateHelper::GetOriginalPageLanguage() {
233 return ExecuteScriptAndGetStringResult("cr.googleTranslate.sourceLang");
234 }
235
236 base::TimeDelta TranslateHelper::AdjustDelay(int delayInMs) {
237 // Just converts |delayInMs| without any modification in practical cases.
238 // Tests will override this function to return modified value.
239 return base::TimeDelta::FromMilliseconds(delayInMs);
240 }
241
242 void TranslateHelper::ExecuteScript(const std::string& script) {
243 WebFrame* main_frame = GetMainFrame();
244 if (main_frame)
245 main_frame->executeScript(WebScriptSource(ASCIIToUTF16(script)));
246 }
247
248 bool TranslateHelper::ExecuteScriptAndGetBoolResult(const std::string& script,
249 bool fallback) {
250 WebFrame* main_frame = GetMainFrame();
251 if (!main_frame)
252 return fallback;
253
254 v8::HandleScope handle_scope;
255 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue(
256 WebScriptSource(ASCIIToUTF16(script)));
257 if (v.IsEmpty() || !v->IsBoolean()) {
258 NOTREACHED();
259 return fallback;
260 }
261
262 return v->BooleanValue();
263 }
264
265 std::string TranslateHelper::ExecuteScriptAndGetStringResult(
266 const std::string& script) {
267 WebFrame* main_frame = GetMainFrame();
268 if (!main_frame)
269 return std::string();
270
271 v8::HandleScope handle_scope;
272 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue(
273 WebScriptSource(ASCIIToUTF16(script)));
274 if (v.IsEmpty() || !v->IsString()) {
275 NOTREACHED();
276 return std::string();
277 }
278
279 v8::Local<v8::String> v8_str = v->ToString();
280 int length = v8_str->Utf8Length() + 1;
281 scoped_ptr<char[]> str(new char[length]);
282 v8_str->WriteUtf8(str.get(), length);
283 return std::string(str.get());
284 }
285
286 double TranslateHelper::ExecuteScriptAndGetDoubleResult(
287 const std::string& script) {
288 WebFrame* main_frame = GetMainFrame();
289 if (!main_frame)
290 return 0.0;
291
292 v8::HandleScope handle_scope;
293 v8::Handle<v8::Value> v = main_frame->executeScriptAndReturnValue(
294 WebScriptSource(ASCIIToUTF16(script)));
295 if (v.IsEmpty() || !v->IsNumber()) {
296 NOTREACHED();
297 return 0.0;
298 }
299
300 return v->NumberValue();
301 }
302
303 ////////////////////////////////////////////////////////////////////////////////
304 // TranslateHelper, private:
305 //
306 // static
307 void TranslateHelper::CorrectLanguageCodeTypo(std::string* code) {
308 DCHECK(code); 203 DCHECK(code);
309 204
310 size_t coma_index = code->find(','); 205 size_t coma_index = code->find(',');
311 if (coma_index != std::string::npos) { 206 if (coma_index != std::string::npos) {
312 // There are more than 1 language specified, just keep the first one. 207 // There are more than 1 language specified, just keep the first one.
313 *code = code->substr(0, coma_index); 208 *code = code->substr(0, coma_index);
314 } 209 }
315 TrimWhitespaceASCII(*code, TRIM_ALL, code); 210 TrimWhitespaceASCII(*code, TRIM_ALL, code);
316 211
317 // An underscore instead of a dash is a frequent mistake. 212 // An underscore instead of a dash is a frequent mistake.
318 size_t underscore_index = code->find('_'); 213 size_t underscore_index = code->find('_');
319 if (underscore_index != std::string::npos) 214 if (underscore_index != std::string::npos)
320 (*code)[underscore_index] = '-'; 215 (*code)[underscore_index] = '-';
321 216
322 // Change everything up to a dash to lower-case and everything after to upper. 217 // Change everything up to a dash to lower-case and everything after to upper.
323 size_t dash_index = code->find('-'); 218 size_t dash_index = code->find('-');
324 if (dash_index != std::string::npos) { 219 if (dash_index != std::string::npos) {
325 *code = StringToLowerASCII(code->substr(0, dash_index)) + 220 *code = StringToLowerASCII(code->substr(0, dash_index)) +
326 StringToUpperASCII(code->substr(dash_index)); 221 StringToUpperASCII(code->substr(dash_index));
327 } else { 222 } else {
328 *code = StringToLowerASCII(*code); 223 *code = StringToLowerASCII(*code);
329 } 224 }
330 } 225 }
331 226
332 // static 227 bool IsValidLanguageCode(const std::string& code) {
333 bool TranslateHelper::IsValidLanguageCode(const std::string& code) {
334 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/. 228 // Roughly check if the language code follows /[a-zA-Z]{2,3}(-[a-zA-Z]{2})?/.
335 // TODO(hajimehoshi): How about es-419, which is used as an Accept language? 229 // TODO(hajimehoshi): How about es-419, which is used as an Accept language?
336 std::vector<std::string> chunks; 230 std::vector<std::string> chunks;
337 base::SplitString(code, '-', &chunks); 231 base::SplitString(code, '-', &chunks);
338 232
339 if (chunks.size() < 1 || 2 < chunks.size()) 233 if (chunks.size() < 1 || 2 < chunks.size())
340 return false; 234 return false;
341 235
342 const std::string& main_code = chunks[0]; 236 const std::string& main_code = chunks[0];
343 237
(...skipping 16 matching lines...) Expand all
360 254
361 for (std::string::const_iterator it = sub_code.begin(); 255 for (std::string::const_iterator it = sub_code.begin();
362 it != sub_code.end(); ++it) { 256 it != sub_code.end(); ++it) {
363 if (!IsAsciiAlpha(*it)) 257 if (!IsAsciiAlpha(*it))
364 return false; 258 return false;
365 } 259 }
366 260
367 return true; 261 return true;
368 } 262 }
369 263
370 // static 264 bool IsSameOrSimilarLanguages(const std::string& page_language,
371 void TranslateHelper::ApplyLanguageCodeCorrection(std::string* code) { 265 const std::string& cld_language) {
372 // Correct well-known format errors.
373 CorrectLanguageCodeTypo(code);
374
375 if (!IsValidLanguageCode(*code)) {
376 *code = std::string();
377 return;
378 }
379
380 TranslateUtil::ToTranslateLanguageSynonym(code);
381 }
382
383 // static
384 bool TranslateHelper::IsSameOrSimilarLanguages(
385 const std::string& page_language, const std::string& cld_language) {
386 // Language code part of |page_language| is matched to one of |cld_language|. 266 // Language code part of |page_language| is matched to one of |cld_language|.
387 // Country code is ignored here. 267 // Country code is ignored here.
388 if (page_language.size() >= 2 && 268 if (page_language.size() >= 2 &&
389 cld_language.find(page_language.c_str(), 0, 2) == 0) { 269 cld_language.find(page_language.c_str(), 0, 2) == 0) {
390 // Languages are matched strictly. Reports false to metrics, but returns 270 // Languages are matched strictly. Reports false to metrics, but returns
391 // true. 271 // true.
392 TranslateHelperMetrics::ReportSimilarLanguageMatch(false); 272 TranslateCommonMetrics::ReportSimilarLanguageMatch(false);
393 return true; 273 return true;
394 } 274 }
395 275
396 // Check if |page_language| and |cld_language| are in the similar language 276 // Check if |page_language| and |cld_language| are in the similar language
397 // list and belong to the same language group. 277 // list and belong to the same language group.
398 int page_code = GetSimilarLanguageGroupCode(page_language); 278 int page_code = GetSimilarLanguageGroupCode(page_language);
399 bool match = page_code != 0 && 279 bool match = page_code != 0 &&
400 page_code == GetSimilarLanguageGroupCode(cld_language); 280 page_code == GetSimilarLanguageGroupCode(cld_language);
401 281
402 TranslateHelperMetrics::ReportSimilarLanguageMatch(match); 282 TranslateCommonMetrics::ReportSimilarLanguageMatch(match);
403 return match; 283 return match;
404 } 284 }
405 285
406 // static 286 bool MaybeServerWrongConfiguration(const std::string& page_language,
407 bool TranslateHelper::MaybeServerWrongConfiguration( 287 const std::string& cld_language) {
408 const std::string& page_language, const std::string& cld_language) {
409 // If |page_language| is not "en-*", respect it and just return false here. 288 // If |page_language| is not "en-*", respect it and just return false here.
410 if (!StartsWithASCII(page_language, "en", false)) 289 if (!StartsWithASCII(page_language, "en", false))
411 return false; 290 return false;
412 291
413 // A server provides a language meta information representing "en-*". But it 292 // A server provides a language meta information representing "en-*". But it
414 // might be just a default value due to missing user configuration. 293 // might be just a default value due to missing user configuration.
415 // Let's trust |cld_language| if the determined language is not difficult to 294 // Let's trust |cld_language| if the determined language is not difficult to
416 // distinguish from English, and the language is one of well-known languages 295 // distinguish from English, and the language is one of well-known languages
417 // which often provide "en-*" meta information mistakenly. 296 // which often provide "en-*" meta information mistakenly.
418 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) { 297 for (size_t i = 0; i < arraysize(kWellKnownCodesOnWrongConfiguration); ++i) {
419 if (cld_language == kWellKnownCodesOnWrongConfiguration[i]) 298 if (cld_language == kWellKnownCodesOnWrongConfiguration[i])
420 return true; 299 return true;
421 } 300 }
422 return false; 301 return false;
423 } 302 }
424 303
425 // static 304 } // namespace LanguageDetectionUtil
426 bool TranslateHelper::CanCLDComplementSubCode(
427 const std::string& page_language, const std::string& cld_language) {
428 // Translate server cannot treat general Chinese. If Content-Language and
429 // CLD agree that the language is Chinese and Content-Language doesn't know
430 // which dialect is used, CLD language has priority.
431 // TODO(hajimehoshi): How about the other dialects like zh-MO?
432 return page_language == "zh" && StartsWithASCII(cld_language, "zh-", false);
433 }
434
435 // static
436 std::string TranslateHelper::DeterminePageLanguage(const std::string& code,
437 const std::string& html_lang,
438 const string16& contents,
439 std::string* cld_language_p,
440 bool* is_cld_reliable_p) {
441 #if defined(ENABLE_LANGUAGE_DETECTION)
442 base::TimeTicks begin_time = base::TimeTicks::Now();
443 bool is_cld_reliable;
444 std::string cld_language = DetermineTextLanguage(contents, &is_cld_reliable);
445 TranslateHelperMetrics::ReportLanguageDetectionTime(begin_time,
446 base::TimeTicks::Now());
447
448 if (cld_language_p != NULL)
449 *cld_language_p = cld_language;
450 if (is_cld_reliable_p != NULL)
451 *is_cld_reliable_p = is_cld_reliable;
452 TranslateUtil::ToTranslateLanguageSynonym(&cld_language);
453 #endif // defined(ENABLE_LANGUAGE_DETECTION)
454
455 // Check if html lang attribute is valid.
456 std::string modified_html_lang;
457 if (!html_lang.empty()) {
458 modified_html_lang = html_lang;
459 ApplyLanguageCodeCorrection(&modified_html_lang);
460 TranslateHelperMetrics::ReportHtmlLang(html_lang, modified_html_lang);
461 VLOG(9) << "html lang based language code: " << modified_html_lang;
462 }
463
464 // Check if Content-Language is valid.
465 std::string modified_code;
466 if (!code.empty()) {
467 modified_code = code;
468 ApplyLanguageCodeCorrection(&modified_code);
469 TranslateHelperMetrics::ReportContentLanguage(code, modified_code);
470 }
471
472 // Adopt |modified_html_lang| if it is valid. Otherwise, adopt
473 // |modified_code|.
474 std::string language = modified_html_lang.empty() ? modified_code :
475 modified_html_lang;
476
477 #if defined(ENABLE_LANGUAGE_DETECTION)
478 // If |language| is empty, just use CLD result even though it might be
479 // chrome::kUnknownLanguageCode.
480 if (language.empty()) {
481 TranslateHelperMetrics::ReportLanguageVerification(
482 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_ONLY);
483 return cld_language;
484 }
485
486 if (cld_language == chrome::kUnknownLanguageCode) {
487 TranslateHelperMetrics::ReportLanguageVerification(
488 TranslateHelperMetrics::LANGUAGE_VERIFICATION_UNKNOWN);
489 return language;
490 } else if (IsSameOrSimilarLanguages(language, cld_language)) {
491 TranslateHelperMetrics::ReportLanguageVerification(
492 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_AGREE);
493 return language;
494 } else if (MaybeServerWrongConfiguration(language, cld_language)) {
495 TranslateHelperMetrics::ReportLanguageVerification(
496 TranslateHelperMetrics::LANGUAGE_VERIFICATION_TRUST_CLD);
497 return cld_language;
498 } else if (CanCLDComplementSubCode(language, cld_language)) {
499 TranslateHelperMetrics::ReportLanguageVerification(
500 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_COMPLEMENT_SUB_CODE);
501 return cld_language;
502 } else {
503 TranslateHelperMetrics::ReportLanguageVerification(
504 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISAGREE);
505 // Content-Language value might be wrong because CLD says that this page
506 // is written in another language with confidence.
507 // In this case, Chrome doesn't rely on any of the language codes, and
508 // gives up suggesting a translation.
509 return std::string(chrome::kUnknownLanguageCode);
510 }
511 #else // defined(ENABLE_LANGUAGE_DETECTION)
512 TranslateHelperMetrics::ReportLanguageVerification(
513 TranslateHelperMetrics::LANGUAGE_VERIFICATION_CLD_DISABLED);
514 #endif // defined(ENABLE_LANGUAGE_DETECTION)
515
516 return language;
517 }
518
519 // static
520 bool TranslateHelper::IsTranslationAllowed(WebDocument* document) {
521 WebElement head = document->head();
522 if (head.isNull() || !head.hasChildNodes())
523 return true;
524
525 const WebString meta(ASCIIToUTF16("meta"));
526 const WebString name(ASCIIToUTF16("name"));
527 const WebString google(ASCIIToUTF16("google"));
528 const WebString value(ASCIIToUTF16("value"));
529 const WebString content(ASCIIToUTF16("content"));
530
531 WebNodeList children = head.childNodes();
532 for (size_t i = 0; i < children.length(); ++i) {
533 WebNode node = children.item(i);
534 if (!node.isElementNode())
535 continue;
536 WebElement element = node.to<WebElement>();
537 // Check if a tag is <meta>.
538 if (!element.hasTagName(meta))
539 continue;
540 // Check if the tag contains name="google".
541 WebString attribute = element.getAttribute(name);
542 if (attribute.isNull() || attribute != google)
543 continue;
544 // Check if the tag contains value="notranslate", or content="notranslate".
545 attribute = element.getAttribute(value);
546 if (attribute.isNull())
547 attribute = element.getAttribute(content);
548 if (attribute.isNull())
549 continue;
550 if (LowerCaseEqualsASCII(attribute, "notranslate"))
551 return false;
552 }
553 return true;
554 }
555
556 bool TranslateHelper::OnMessageReceived(const IPC::Message& message) {
557 bool handled = true;
558 IPC_BEGIN_MESSAGE_MAP(TranslateHelper, message)
559 IPC_MESSAGE_HANDLER(ChromeViewMsg_TranslatePage, OnTranslatePage)
560 IPC_MESSAGE_HANDLER(ChromeViewMsg_RevertTranslation, OnRevertTranslation)
561 IPC_MESSAGE_UNHANDLED(handled = false)
562 IPC_END_MESSAGE_MAP()
563 return handled;
564 }
565
566 void TranslateHelper::OnTranslatePage(int page_id,
567 const std::string& translate_script,
568 const std::string& source_lang,
569 const std::string& target_lang) {
570 WebFrame* main_frame = GetMainFrame();
571 if (!main_frame ||
572 page_id_ != page_id ||
573 render_view()->GetPageId() != page_id)
574 return; // We navigated away, nothing to do.
575
576 // A similar translation is already under way, nothing to do.
577 if (translation_pending_ && target_lang_ == target_lang)
578 return;
579
580 // Any pending translation is now irrelevant.
581 CancelPendingTranslation();
582
583 // Set our states.
584 translation_pending_ = true;
585
586 // If the source language is undetermined, we'll let the translate element
587 // detect it.
588 source_lang_ = (source_lang != chrome::kUnknownLanguageCode) ?
589 source_lang : kAutoDetectionLanguage;
590 target_lang_ = target_lang;
591
592 TranslateHelperMetrics::ReportUserActionDuration(language_determined_time_,
593 base::TimeTicks::Now());
594
595 GURL url(main_frame->document().url());
596 TranslateHelperMetrics::ReportPageScheme(url.scheme());
597
598 if (!IsTranslateLibAvailable()) {
599 // Evaluate the script to add the translation related method to the global
600 // context of the page.
601 ExecuteScript(translate_script);
602 DCHECK(IsTranslateLibAvailable());
603 }
604
605 TranslatePageImpl(0);
606 }
607
608 void TranslateHelper::OnRevertTranslation(int page_id) {
609 if (page_id_ != page_id || render_view()->GetPageId() != page_id)
610 return; // We navigated away, nothing to do.
611
612 if (!IsTranslateLibAvailable()) {
613 NOTREACHED();
614 return;
615 }
616
617 CancelPendingTranslation();
618
619 ExecuteScript("cr.googleTranslate.revert()");
620 }
621
622 void TranslateHelper::CheckTranslateStatus() {
623 // If this is not the same page, the translation has been canceled. If the
624 // view is gone, the page is closing.
625 if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView())
626 return;
627
628 // First check if there was an error.
629 if (HasTranslationFailed()) {
630 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR);
631 return; // There was an error.
632 }
633
634 if (HasTranslationFinished()) {
635 std::string actual_source_lang;
636 // Translation was successfull, if it was auto, retrieve the source
637 // language the Translate Element detected.
638 if (source_lang_ == kAutoDetectionLanguage) {
639 actual_source_lang = GetOriginalPageLanguage();
640 if (actual_source_lang.empty()) {
641 NotifyBrowserTranslationFailed(TranslateErrors::UNKNOWN_LANGUAGE);
642 return;
643 } else if (actual_source_lang == target_lang_) {
644 NotifyBrowserTranslationFailed(TranslateErrors::IDENTICAL_LANGUAGES);
645 return;
646 }
647 } else {
648 actual_source_lang = source_lang_;
649 }
650
651 if (!translation_pending_) {
652 NOTREACHED();
653 return;
654 }
655
656 translation_pending_ = false;
657
658 // Check JavaScript performance counters for UMA reports.
659 TranslateHelperMetrics::ReportTimeToTranslate(
660 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.translationTime"));
661
662 // Notify the browser we are done.
663 render_view()->Send(new ChromeViewHostMsg_PageTranslated(
664 render_view()->GetRoutingID(), render_view()->GetPageId(),
665 actual_source_lang, target_lang_, TranslateErrors::NONE));
666 return;
667 }
668
669 // The translation is still pending, check again later.
670 base::MessageLoop::current()->PostDelayedTask(
671 FROM_HERE,
672 base::Bind(&TranslateHelper::CheckTranslateStatus,
673 weak_method_factory_.GetWeakPtr()),
674 AdjustDelay(kTranslateStatusCheckDelayMs));
675 }
676
677 void TranslateHelper::TranslatePageImpl(int count) {
678 DCHECK_LT(count, kMaxTranslateInitCheckAttempts);
679 if (page_id_ != render_view()->GetPageId() || !render_view()->GetWebView())
680 return;
681
682 if (!IsTranslateLibReady()) {
683 // The library is not ready, try again later, unless we have tried several
684 // times unsucessfully already.
685 if (++count >= kMaxTranslateInitCheckAttempts) {
686 NotifyBrowserTranslationFailed(TranslateErrors::INITIALIZATION_ERROR);
687 return;
688 }
689 base::MessageLoop::current()->PostDelayedTask(
690 FROM_HERE,
691 base::Bind(&TranslateHelper::TranslatePageImpl,
692 weak_method_factory_.GetWeakPtr(),
693 count),
694 AdjustDelay(count * kTranslateInitCheckDelayMs));
695 return;
696 }
697
698 // The library is loaded, and ready for translation now.
699 // Check JavaScript performance counters for UMA reports.
700 TranslateHelperMetrics::ReportTimeToBeReady(
701 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.readyTime"));
702 TranslateHelperMetrics::ReportTimeToLoad(
703 ExecuteScriptAndGetDoubleResult("cr.googleTranslate.loadTime"));
704
705 if (!StartTranslation()) {
706 NotifyBrowserTranslationFailed(TranslateErrors::TRANSLATION_ERROR);
707 return;
708 }
709 // Check the status of the translation.
710 base::MessageLoop::current()->PostDelayedTask(
711 FROM_HERE,
712 base::Bind(&TranslateHelper::CheckTranslateStatus,
713 weak_method_factory_.GetWeakPtr()),
714 AdjustDelay(kTranslateStatusCheckDelayMs));
715 }
716
717 void TranslateHelper::NotifyBrowserTranslationFailed(
718 TranslateErrors::Type error) {
719 translation_pending_ = false;
720 // Notify the browser there was an error.
721 render_view()->Send(new ChromeViewHostMsg_PageTranslated(
722 render_view()->GetRoutingID(), page_id_, source_lang_,
723 target_lang_, error));
724 }
725
726 WebFrame* TranslateHelper::GetMainFrame() {
727 WebView* web_view = render_view()->GetWebView();
728
729 // When the tab is going to be closed, the web_view can be NULL.
730 if (!web_view)
731 return NULL;
732
733 return web_view->mainFrame();
734 }
OLDNEW
« no previous file with comments | « chrome/common/translate/language_detection_util.h ('k') | chrome/common/translate/language_detection_util_unittest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698