| OLD | NEW |
| 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/renderer/translate_helper.h" | 5 #include "chrome/renderer/translate_helper.h" |
| 6 | 6 |
| 7 #include "base/bind.h" | 7 #include "base/bind.h" |
| 8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
| 9 #include "base/logging.h" | 9 #include "base/logging.h" |
| 10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
| 11 #include "base/metrics/histogram.h" | 11 #include "base/metrics/histogram.h" |
| 12 #include "base/string16.h" |
| 12 #include "base/utf_string_conversions.h" | 13 #include "base/utf_string_conversions.h" |
| 13 #include "chrome/common/chrome_constants.h" | 14 #include "chrome/common/chrome_constants.h" |
| 14 #include "chrome/common/render_messages.h" | 15 #include "chrome/common/render_messages.h" |
| 15 #include "content/public/renderer/render_view.h" | 16 #include "content/public/renderer/render_view.h" |
| 16 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" | 17 #include "third_party/WebKit/Source/WebKit/chromium/public/WebDocument.h" |
| 17 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" | 18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebElement.h" |
| 18 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" | 19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebFrame.h" |
| 19 #include "third_party/WebKit/Source/WebKit/chromium/public/WebScriptSource.h" | 20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebScriptSource.h" |
| 20 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" | 21 #include "third_party/WebKit/Source/WebKit/chromium/public/WebView.h" |
| 21 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" | 22 #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" |
| (...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 53 page_id_(-1), | 54 page_id_(-1), |
| 54 ALLOW_THIS_IN_INITIALIZER_LIST(weak_method_factory_(this)) { | 55 ALLOW_THIS_IN_INITIALIZER_LIST(weak_method_factory_(this)) { |
| 55 } | 56 } |
| 56 | 57 |
| 57 TranslateHelper::~TranslateHelper() { | 58 TranslateHelper::~TranslateHelper() { |
| 58 CancelPendingTranslation(); | 59 CancelPendingTranslation(); |
| 59 } | 60 } |
| 60 | 61 |
| 61 void TranslateHelper::PageCaptured(const string16& contents) { | 62 void TranslateHelper::PageCaptured(const string16& contents) { |
| 62 WebDocument document = render_view()->GetWebView()->mainFrame()->document(); | 63 WebDocument document = render_view()->GetWebView()->mainFrame()->document(); |
| 63 // If the page explicitly specifies a language, use it, otherwise we'll | 64 |
| 64 // determine it based on the text content using the CLD. | 65 // Get the document language as set by WebKit from the http-equiv |
| 65 std::string language = GetPageLanguageFromMetaTag(&document); | 66 // meta tag for "content-language". This may or may not also |
| 67 // have a value derived from the actual Content-Language HTTP |
| 68 // header. The two actually have different meanings (despite the |
| 69 // original intent of http-equiv to be an equivalent) with the former |
| 70 // being the language of the document and the latter being the |
| 71 // language of the intended audience (a distinction really only |
| 72 // relevant for things like langauge textbooks). This distinction |
| 73 // shouldn't affect translation. |
| 74 std::string language = document.contentLanguage().utf8(); |
| 75 size_t coma_index = language.find(','); |
| 76 if (coma_index != std::string::npos) { |
| 77 // There are more than 1 language specified, just keep the first one. |
| 78 language = language.substr(0, coma_index); |
| 79 } |
| 80 TrimWhitespaceASCII(language, TRIM_ALL, &language); |
| 81 language = StringToLowerASCII(language); |
| 82 |
| 66 if (language.empty()) { | 83 if (language.empty()) { |
| 67 base::TimeTicks begin_time = base::TimeTicks::Now(); | 84 base::TimeTicks begin_time = base::TimeTicks::Now(); |
| 68 language = DetermineTextLanguage(contents); | 85 language = DetermineTextLanguage(contents); |
| 69 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", | 86 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", |
| 70 base::TimeTicks::Now() - begin_time); | 87 base::TimeTicks::Now() - begin_time); |
| 71 } else { | 88 } else { |
| 72 VLOG(9) << "PageLanguageFromMetaTag: " << language; | 89 VLOG(9) << "PageLanguageFromMetaTag: " << language; |
| 73 } | 90 } |
| 74 | 91 |
| 75 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( | 92 Send(new ChromeViewHostMsg_TranslateLanguageDetermined( |
| (...skipping 22 matching lines...) Expand all Loading... |
| 98 attribute = iter->getAttribute("content"); | 115 attribute = iter->getAttribute("content"); |
| 99 if (attribute.isNull()) | 116 if (attribute.isNull()) |
| 100 continue; | 117 continue; |
| 101 if (LowerCaseEqualsASCII(attribute, "notranslate")) | 118 if (LowerCaseEqualsASCII(attribute, "notranslate")) |
| 102 return false; | 119 return false; |
| 103 } | 120 } |
| 104 return true; | 121 return true; |
| 105 } | 122 } |
| 106 | 123 |
| 107 // static | 124 // static |
| 108 std::string TranslateHelper::GetPageLanguageFromMetaTag(WebDocument* document) { | |
| 109 // The META language tag looks like: | |
| 110 // <meta http-equiv="content-language" content="en"> | |
| 111 // It can contain more than one language: | |
| 112 // <meta http-equiv="content-language" content="en, fr"> | |
| 113 std::vector<WebElement> meta_elements; | |
| 114 webkit_glue::GetMetaElementsWithAttribute(document, | |
| 115 ASCIIToUTF16("http-equiv"), | |
| 116 ASCIIToUTF16("content-language"), | |
| 117 &meta_elements); | |
| 118 if (meta_elements.empty()) | |
| 119 return std::string(); | |
| 120 | |
| 121 // We don't expect more than one such tag. If there are several, just use the | |
| 122 // first one. | |
| 123 WebString attribute = meta_elements[0].getAttribute("content"); | |
| 124 if (attribute.isEmpty()) | |
| 125 return std::string(); | |
| 126 | |
| 127 // The value is supposed to be ASCII. | |
| 128 if (!IsStringASCII(attribute)) | |
| 129 return std::string(); | |
| 130 | |
| 131 std::string language = StringToLowerASCII(UTF16ToASCII(attribute)); | |
| 132 size_t coma_index = language.find(','); | |
| 133 if (coma_index != std::string::npos) { | |
| 134 // There are more than 1 language specified, just keep the first one. | |
| 135 language = language.substr(0, coma_index); | |
| 136 } | |
| 137 TrimWhitespaceASCII(language, TRIM_ALL, &language); | |
| 138 return language; | |
| 139 } | |
| 140 | |
| 141 // static | |
| 142 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { | 125 std::string TranslateHelper::DetermineTextLanguage(const string16& text) { |
| 143 std::string language = chrome::kUnknownLanguageCode; | 126 std::string language = chrome::kUnknownLanguageCode; |
| 144 int num_languages = 0; | 127 int num_languages = 0; |
| 145 int text_bytes = 0; | 128 int text_bytes = 0; |
| 146 bool is_reliable = false; | 129 bool is_reliable = false; |
| 147 Language cld_language = | 130 Language cld_language = |
| 148 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, | 131 DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, |
| 149 &num_languages, NULL, &text_bytes); | 132 &num_languages, NULL, &text_bytes); |
| 150 // We don't trust the result if the CLD reports that the detection is not | 133 // We don't trust the result if the CLD reports that the detection is not |
| 151 // reliable, or if the actual text used to detect the language was less than | 134 // reliable, or if the actual text used to detect the language was less than |
| (...skipping 291 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 443 WebView* web_view = render_view()->GetWebView(); | 426 WebView* web_view = render_view()->GetWebView(); |
| 444 if (!web_view) { | 427 if (!web_view) { |
| 445 // When the WebView is going away, the render view should have called | 428 // When the WebView is going away, the render view should have called |
| 446 // CancelPendingTranslation() which should have stopped any pending work, so | 429 // CancelPendingTranslation() which should have stopped any pending work, so |
| 447 // that case should not happen. | 430 // that case should not happen. |
| 448 NOTREACHED(); | 431 NOTREACHED(); |
| 449 return NULL; | 432 return NULL; |
| 450 } | 433 } |
| 451 return web_view->mainFrame(); | 434 return web_view->mainFrame(); |
| 452 } | 435 } |
| OLD | NEW |