OLD | NEW |
---|---|
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/translate_helper.h" | 5 #include "chrome/renderer/translate_helper.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
11 #include "base/metrics/histogram.h" | 11 #include "base/metrics/histogram.h" |
12 #include "base/string16.h" | |
12 #include "base/utf_string_conversions.h" | 13 #include "base/utf_string_conversions.h" |
13 #include "chrome/common/chrome_constants.h" | 14 #include "chrome/common/chrome_constants.h" |
14 #include "chrome/common/render_messages.h" | 15 #include "chrome/common/render_messages.h" |
15 #include "content/public/renderer/render_view.h" | 16 #include "content/public/renderer/render_view.h" |
16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" | 17 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" |
17 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" | 18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" |
18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" | 19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" |
19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebScriptSource.h" | 20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebScriptSource.h" |
20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" | 21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" |
21 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" | 22 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" |
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
53 page_id_(-1), | 54 page_id_(-1), |
54 ALLOW_THIS_IN_INITIALIZER_LIST(weak_method_factory_(this)) { | 55 ALLOW_THIS_IN_INITIALIZER_LIST(weak_method_factory_(this)) { |
55 } | 56 } |
56 | 57 |
57 TranslateHelper::~TranslateHelper() { | 58 TranslateHelper::~TranslateHelper() { |
58 CancelPendingTranslation(); | 59 CancelPendingTranslation(); |
59 } | 60 } |
60 | 61 |
61 void TranslateHelper::PageCaptured(const string16& contents) { | 62 void TranslateHelper::PageCaptured(const string16& contents) { |
62 WebDocument document = render_view()->GetWebView()->mainFrame()->document(); | 63 WebDocument document = render_view()->GetWebView()->mainFrame()->document(); |
63 // If the page explicitly specifies a language, use it, otherwise we'll | 64 |
64 // determine it based on the text content using the CLD. | 65 // determine it based on the text content using the CLD. |
MAD
2012/10/02 20:15:26
determine it???
| |
65 std::string language = GetPageLanguageFromMetaTag(&document); | 66 std::string language = document.contentLanguage().utf8(); |
67 size_t coma_index = language.find(','); | |
68 if (coma_index != std::string::npos) { | |
69 // There are more than 1 language specified, just keep the first one. | |
70 language = language.substr(0, coma_index); | |
71 } | |
72 TrimWhitespaceASCII(language, TRIM_ALL, &language); | |
73 language = StringToLowerASCII(language); | |
74 | |
66 if (language.empty()) { | 75 if (language.empty()) { |
67 base::TimeTicks begin_time = base::TimeTicks::Now(); | 76 base::TimeTicks begin_time = base::TimeTicks::Now(); |
68 language = DetermineTextLanguage(contents); | 77 language = DetermineTextLanguage(contents); |
69 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", | 78 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", |
70 base::TimeTicks::Now() - begin_time); | 79 base::TimeTicks::Now() - begin_time); |
71 } else { | 80 } else { |
72 VLOG(9) << "PageLanguageFromMetaTag: " << language; | 81 VLOG(9) << "PageLanguageFromMetaTag: " << language; |
73 } | 82 } |
74 | 83 |
75 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 84 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( |
(...skipping 22 matching lines...) Expand all Loading... | |
98 attribute = iter->getAttribute("content"); | 107 attribute = iter->getAttribute("content"); |
99 if (attribute.isNull()) | 108 if (attribute.isNull()) |
100 continue; | 109 continue; |
101 if (LowerCaseEqualsASCII(attribute, "notranslate")) | 110 if (LowerCaseEqualsASCII(attribute, "notranslate")) |
102 return false; | 111 return false; |
103 } | 112 } |
104 return true; | 113 return true; |
105 } | 114 } |
106 | 115 |
107 // static | 116 // static |
108 std::string TranslateHelper::GetPageLanguageFromMetaTag(WebDocument* document) { | |
109 // The META language tag looks like: | |
110 // <meta http-equiv="content-language" content="en"> | |
111 // It can contain more than one language: | |
112 // <meta http-equiv="content-language" content="en, fr"> | |
113 std::vector<WebElement> meta_elements; | |
114 webkit_glue::GetMetaElementsWithAttribute(document, | |
115 ASCIIToUTF16("http-equiv"), | |
116 ASCIIToUTF16("content-language"), | |
117 &meta_elements); | |
118 if (meta_elements.empty()) | |
119 return std::string(); | |
120 | |
121 // We don't expect more than one such tag. If there are several, just use the | |
122 // first one. | |
123 WebString attribute = meta_elements[0].getAttribute("content"); | |
124 if (attribute.isEmpty()) | |
125 return std::string(); | |
126 | |
127 // The value is supposed to be ASCII. | |
128 if (!IsStringASCII(attribute)) | |
129 return std::string(); | |
130 | |
131 std::string language = StringToLowerASCII(UTF16ToASCII(attribute)); | |
132 size_t coma_index = language.find(','); | |
133 if (coma_index != std::string::npos) { | |
134 // There are more than 1 language specified, just keep the first one. | |
135 language = language.substr(0, coma_index); | |
136 } | |
137 TrimWhitespaceASCII(language, TRIM_ALL, &language); | |
138 return language; | |
139 } | |
140 | |
141 // static | |
142 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { | 117 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { |
143 std::string language = chrome::kUnknownLanguageCode; | 118 std::string language = chrome::kUnknownLanguageCode; |
144 int num_languages = 0; | 119 int num_languages = 0; |
145 int text_bytes = 0; | 120 int text_bytes = 0; |
146 bool is_reliable = false; | 121 bool is_reliable = false; |
147 Language cld_language = | 122 Language cld_language = |
148 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 123 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
149 &num_languages, NULL, &text_bytes); | 124 &num_languages, NULL, &text_bytes); |
150 // We don't trust the result if the CLD reports that the detection is not | 125 // We don't trust the result if the CLD reports that the detection is not |
151 // reliable, or if the actual text used to detect the language was less than | 126 // reliable, or if the actual text used to detect the language was less than |
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
443 WebView* web_view = render_view()->GetWebView(); | 418 WebView* web_view = render_view()->GetWebView(); |
444 if (!web_view) { | 419 if (!web_view) { |
445 // When the WebView is going away, the render view should have called | 420 // When the WebView is going away, the render view should have called |
446 // CancelPendingTranslation() which should have stopped any pending work, so | 421 // CancelPendingTranslation() which should have stopped any pending work, so |
447 // that case should not happen. | 422 // that case should not happen. |
448 NOTREACHED(); | 423 NOTREACHED(); |
449 return NULL; | 424 return NULL; |
450 } | 425 } |
451 return web_view->mainFrame(); | 426 return web_view->mainFrame(); |
452 } | 427 } |
OLD | NEW |