Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(641)

Side by Side Diff: chrome/renderer/translate_helper.cc

Issue 11052002: Get the document language directly from WebKit (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Reduced patch that only expects langauge from http-equiv setting. Created 8 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « chrome/renderer/translate_helper.h ('k') | chrome/renderer/translate_helper_browsertest.cc » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/translate_helper.h" 5 #include "chrome/renderer/translate_helper.h"
6 6
7 #include "base/bind.h" 7 #include "base/bind.h"
8 #include "base/compiler_specific.h" 8 #include "base/compiler_specific.h"
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/message_loop.h" 10 #include "base/message_loop.h"
11 #include "base/metrics/histogram.h" 11 #include "base/metrics/histogram.h"
12 #include "base/string16.h"
12 #include "base/utf_string_conversions.h" 13 #include "base/utf_string_conversions.h"
13 #include "chrome/common/chrome_constants.h" 14 #include "chrome/common/chrome_constants.h"
14 #include "chrome/common/render_messages.h" 15 #include "chrome/common/render_messages.h"
15 #include "content/public/renderer/render_view.h" 16 #include "content/public/renderer/render_view.h"
16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" 17 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h"
17 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" 18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h"
18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" 19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h"
19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebScriptSource.h" 20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebScriptSource.h"
20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" 21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h"
21 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" 22 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h"
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
53 page_id_(-1), 54 page_id_(-1),
54 ALLOW_THIS_IN_INITIALIZER_LIST(weak_method_factory_(this)) { 55 ALLOW_THIS_IN_INITIALIZER_LIST(weak_method_factory_(this)) {
55 } 56 }
56 57
57 TranslateHelper::~TranslateHelper() { 58 TranslateHelper::~TranslateHelper() {
58 CancelPendingTranslation(); 59 CancelPendingTranslation();
59 } 60 }
60 61
61 void TranslateHelper::PageCaptured(const string16& contents) { 62 void TranslateHelper::PageCaptured(const string16& contents) {
62 WebDocument document = render_view()->GetWebView()->mainFrame()->document(); 63 WebDocument document = render_view()->GetWebView()->mainFrame()->document();
63 // If the page explicitly specifies a language, use it, otherwise we'll 64
64 // determine it based on the text content using the CLD. 65 // Get the document language as set by WebKit from the http-equiv
65 std::string language = GetPageLanguageFromMetaTag(&document); 66 // meta tag for "content-language". This may or may not also
67 // have a value derived from the actual Content-Language HTTP
68 // header. The two actually have different meanings (despite the
69 // original intent of http-equiv to be an equivalent) with the former
70 // being the language of the document and the latter being the
71 // language of the intended audience (a distinction really only
72 // relevant for things like langauge textbooks). This distinction
73 // shouldn't affect translation.
74 std::string language = document.contentLanguage().utf8();
75 size_t coma_index = language.find(',');
76 if (coma_index != std::string::npos) {
77 // There are more than 1 language specified, just keep the first one.
78 language = language.substr(0, coma_index);
79 }
80 TrimWhitespaceASCII(language, TRIM_ALL, &language);
81 language = StringToLowerASCII(language);
82
66 if (language.empty()) { 83 if (language.empty()) {
67 base::TimeTicks begin_time = base::TimeTicks::Now(); 84 base::TimeTicks begin_time = base::TimeTicks::Now();
68 language = DetermineTextLanguage(contents); 85 language = DetermineTextLanguage(contents);
69 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", 86 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection",
70 base::TimeTicks::Now() - begin_time); 87 base::TimeTicks::Now() - begin_time);
71 } else { 88 } else {
72 VLOG(9) << "PageLanguageFromMetaTag: " << language; 89 VLOG(9) << "PageLanguageFromMetaTag: " << language;
73 } 90 }
74 91
75 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( 92 Send(new ChromeViewHostMsg_TranslateLanguageDetermined(
(...skipping 22 matching lines...) Expand all
98 attribute = iter->getAttribute("content"); 115 attribute = iter->getAttribute("content");
99 if (attribute.isNull()) 116 if (attribute.isNull())
100 continue; 117 continue;
101 if (LowerCaseEqualsASCII(attribute, "notranslate")) 118 if (LowerCaseEqualsASCII(attribute, "notranslate"))
102 return false; 119 return false;
103 } 120 }
104 return true; 121 return true;
105 } 122 }
106 123
107 // static 124 // static
108 std::string TranslateHelper::GetPageLanguageFromMetaTag(WebDocument* document) {
109 // The META language tag looks like:
110 // <meta http-equiv="content-language" content="en">
111 // It can contain more than one language:
112 // <meta http-equiv="content-language" content="en, fr">
113 std::vector<WebElement> meta_elements;
114 webkit_glue::GetMetaElementsWithAttribute(document,
115 ASCIIToUTF16("http-equiv"),
116 ASCIIToUTF16("content-language"),
117 &meta_elements);
118 if (meta_elements.empty())
119 return std::string();
120
121 // We don't expect more than one such tag. If there are several, just use the
122 // first one.
123 WebString attribute = meta_elements[0].getAttribute("content");
124 if (attribute.isEmpty())
125 return std::string();
126
127 // The value is supposed to be ASCII.
128 if (!IsStringASCII(attribute))
129 return std::string();
130
131 std::string language = StringToLowerASCII(UTF16ToASCII(attribute));
132 size_t coma_index = language.find(',');
133 if (coma_index != std::string::npos) {
134 // There are more than 1 language specified, just keep the first one.
135 language = language.substr(0, coma_index);
136 }
137 TrimWhitespaceASCII(language, TRIM_ALL, &language);
138 return language;
139 }
140
141 // static
142 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { 125 std::string TranslateHelper::DetermineTextLanguage(const string16& text) {
143 std::string language = chrome::kUnknownLanguageCode; 126 std::string language = chrome::kUnknownLanguageCode;
144 int num_languages = 0; 127 int num_languages = 0;
145 int text_bytes = 0; 128 int text_bytes = 0;
146 bool is_reliable = false; 129 bool is_reliable = false;
147 Language cld_language = 130 Language cld_language =
148 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, 131 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
149 &num_languages, NULL, &text_bytes); 132 &num_languages, NULL, &text_bytes);
150 // We don't trust the result if the CLD reports that the detection is not 133 // We don't trust the result if the CLD reports that the detection is not
151 // reliable, or if the actual text used to detect the language was less than 134 // reliable, or if the actual text used to detect the language was less than
(...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after
443 WebView* web_view = render_view()->GetWebView(); 426 WebView* web_view = render_view()->GetWebView();
444 if (!web_view) { 427 if (!web_view) {
445 // When the WebView is going away, the render view should have called 428 // When the WebView is going away, the render view should have called
446 // CancelPendingTranslation() which should have stopped any pending work, so 429 // CancelPendingTranslation() which should have stopped any pending work, so
447 // that case should not happen. 430 // that case should not happen.
448 NOTREACHED(); 431 NOTREACHED();
449 return NULL; 432 return NULL;
450 } 433 }
451 return web_view->mainFrame(); 434 return web_view->mainFrame();
452 } 435 }
OLDNEW
« no previous file with comments | « chrome/renderer/translate_helper.h ('k') | chrome/renderer/translate_helper_browsertest.cc » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698