OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/translate_helper.h" | 5 #include "chrome/renderer/translate_helper.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
11 #include "base/metrics/histogram.h" | 11 #include "base/metrics/histogram.h" |
| 12 #include "base/string16.h" |
12 #include "base/utf_string_conversions.h" | 13 #include "base/utf_string_conversions.h" |
13 #include "chrome/common/chrome_constants.h" | 14 #include "chrome/common/chrome_constants.h" |
14 #include "chrome/common/render_messages.h" | 15 #include "chrome/common/render_messages.h" |
15 #include "content/public/renderer/render_view.h" | 16 #include "content/public/renderer/render_view.h" |
16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" | 17 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" |
17 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" | 18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" |
18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" | 19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" |
19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebScriptSource.h" | 20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebScriptSource.h" |
20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" | 21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" |
21 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" | 22 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
53 page_id_(-1), | 54 page_id_(-1), |
54 ALLOW_THIS_IN_INITIALIZER_LIST(weak_method_factory_(this)) { | 55 ALLOW_THIS_IN_INITIALIZER_LIST(weak_method_factory_(this)) { |
55 } | 56 } |
56 | 57 |
57 TranslateHelper::~TranslateHelper() { | 58 TranslateHelper::~TranslateHelper() { |
58 CancelPendingTranslation(); | 59 CancelPendingTranslation(); |
59 } | 60 } |
60 | 61 |
61 void TranslateHelper::PageCaptured(const string16& contents) { | 62 void TranslateHelper::PageCaptured(const string16& contents) { |
62 WebDocument document = render_view()->GetWebView()->mainFrame()->document(); | 63 WebDocument document = render_view()->GetWebView()->mainFrame()->document(); |
63 // If the page explicitly specifies a language, use it, otherwise we'll | 64 |
64 // determine it based on the text content using the CLD. | 65 // Get the document language as set by WebKit from the http-equiv |
65 std::string language = GetPageLanguageFromMetaTag(&document); | 66 // meta tag for "content-language". This may or may not also |
| 67 // have a value derived from the actual Content-Language HTTP |
| 68 // header. The two actually have different meanings (despite the |
| 69 // original intent of http-equiv to be an equivalent) with the former |
| 70 // being the language of the document and the latter being the |
| 71 // language of the intended audience (a distinction really only |
| 72 // relevant for things like langauge textbooks). This distinction |
| 73 // shouldn't affect translation. |
| 74 std::string language = document.contentLanguage().utf8(); |
| 75 size_t coma_index = language.find(','); |
| 76 if (coma_index != std::string::npos) { |
| 77 // There are more than 1 language specified, just keep the first one. |
| 78 language = language.substr(0, coma_index); |
| 79 } |
| 80 TrimWhitespaceASCII(language, TRIM_ALL, &language); |
| 81 language = StringToLowerASCII(language); |
| 82 |
66 if (language.empty()) { | 83 if (language.empty()) { |
67 base::TimeTicks begin_time = base::TimeTicks::Now(); | 84 base::TimeTicks begin_time = base::TimeTicks::Now(); |
68 language = DetermineTextLanguage(contents); | 85 language = DetermineTextLanguage(contents); |
69 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", | 86 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", |
70 base::TimeTicks::Now() - begin_time); | 87 base::TimeTicks::Now() - begin_time); |
71 } else { | 88 } else { |
72 VLOG(9) << "PageLanguageFromMetaTag: " << language; | 89 VLOG(9) << "PageLanguageFromMetaTag: " << language; |
73 } | 90 } |
74 | 91 |
75 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 92 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( |
(...skipping 22 matching lines...) Expand all Loading... |
98 attribute = iter->getAttribute("content"); | 115 attribute = iter->getAttribute("content"); |
99 if (attribute.isNull()) | 116 if (attribute.isNull()) |
100 continue; | 117 continue; |
101 if (LowerCaseEqualsASCII(attribute, "notranslate")) | 118 if (LowerCaseEqualsASCII(attribute, "notranslate")) |
102 return false; | 119 return false; |
103 } | 120 } |
104 return true; | 121 return true; |
105 } | 122 } |
106 | 123 |
107 // static | 124 // static |
108 std::string TranslateHelper::GetPageLanguageFromMetaTag(WebDocument* document) { | |
109 // The META language tag looks like: | |
110 // <meta http-equiv="content-language" content="en"> | |
111 // It can contain more than one language: | |
112 // <meta http-equiv="content-language" content="en, fr"> | |
113 std::vector<WebElement> meta_elements; | |
114 webkit_glue::GetMetaElementsWithAttribute(document, | |
115 ASCIIToUTF16("http-equiv"), | |
116 ASCIIToUTF16("content-language"), | |
117 &meta_elements); | |
118 if (meta_elements.empty()) | |
119 return std::string(); | |
120 | |
121 // We don't expect more than one such tag. If there are several, just use the | |
122 // first one. | |
123 WebString attribute = meta_elements[0].getAttribute("content"); | |
124 if (attribute.isEmpty()) | |
125 return std::string(); | |
126 | |
127 // The value is supposed to be ASCII. | |
128 if (!IsStringASCII(attribute)) | |
129 return std::string(); | |
130 | |
131 std::string language = StringToLowerASCII(UTF16ToASCII(attribute)); | |
132 size_t coma_index = language.find(','); | |
133 if (coma_index != std::string::npos) { | |
134 // There are more than 1 language specified, just keep the first one. | |
135 language = language.substr(0, coma_index); | |
136 } | |
137 TrimWhitespaceASCII(language, TRIM_ALL, &language); | |
138 return language; | |
139 } | |
140 | |
141 // static | |
142 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { | 125 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { |
143 std::string language = chrome::kUnknownLanguageCode; | 126 std::string language = chrome::kUnknownLanguageCode; |
144 int num_languages = 0; | 127 int num_languages = 0; |
145 int text_bytes = 0; | 128 int text_bytes = 0; |
146 bool is_reliable = false; | 129 bool is_reliable = false; |
147 Language cld_language = | 130 Language cld_language = |
148 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 131 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
149 &num_languages, NULL, &text_bytes); | 132 &num_languages, NULL, &text_bytes); |
150 // We don't trust the result if the CLD reports that the detection is not | 133 // We don't trust the result if the CLD reports that the detection is not |
151 // reliable, or if the actual text used to detect the language was less than | 134 // reliable, or if the actual text used to detect the language was less than |
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
443 WebView* web_view = render_view()->GetWebView(); | 426 WebView* web_view = render_view()->GetWebView(); |
444 if (!web_view) { | 427 if (!web_view) { |
445 // When the WebView is going away, the render view should have called | 428 // When the WebView is going away, the render view should have called |
446 // CancelPendingTranslation() which should have stopped any pending work, so | 429 // CancelPendingTranslation() which should have stopped any pending work, so |
447 // that case should not happen. | 430 // that case should not happen. |
448 NOTREACHED(); | 431 NOTREACHED(); |
449 return NULL; | 432 return NULL; |
450 } | 433 } |
451 return web_view->mainFrame(); | 434 return web_view->mainFrame(); |
452 } | 435 } |
OLD | NEW |