OLD | NEW |
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. | 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/renderer/translate_helper.h" | 5 #include "chrome/renderer/translate_helper.h" |
6 | 6 |
7 #include "base/bind.h" | 7 #include "base/bind.h" |
8 #include "base/compiler_specific.h" | 8 #include "base/compiler_specific.h" |
9 #include "base/logging.h" | 9 #include "base/logging.h" |
10 #include "base/message_loop.h" | 10 #include "base/message_loop.h" |
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
86 // Get the document language as set by WebKit from the http-equiv | 86 // Get the document language as set by WebKit from the http-equiv |
87 // meta tag for "content-language". This may or may not also | 87 // meta tag for "content-language". This may or may not also |
88 // have a value derived from the actual Content-Language HTTP | 88 // have a value derived from the actual Content-Language HTTP |
89 // header. The two actually have different meanings (despite the | 89 // header. The two actually have different meanings (despite the |
90 // original intent of http-equiv to be an equivalent) with the former | 90 // original intent of http-equiv to be an equivalent) with the former |
91 // being the language of the document and the latter being the | 91 // being the language of the document and the latter being the |
92 // language of the intended audience (a distinction really only | 92 // language of the intended audience (a distinction really only |
93 // relevant for things like langauge textbooks). This distinction | 93 // relevant for things like langauge textbooks). This distinction |
94 // shouldn't affect translation. | 94 // shouldn't affect translation. |
95 std::string language = document.contentLanguage().utf8(); | 95 std::string language = document.contentLanguage().utf8(); |
96 size_t coma_index = language.find(','); | 96 CorrectLanguageCodeTypo(&language); |
97 if (coma_index != std::string::npos) { | |
98 // There are more than 1 language specified, just keep the first one. | |
99 language = language.substr(0, coma_index); | |
100 } | |
101 TrimWhitespaceASCII(language, TRIM_ALL, &language); | |
102 | |
103 // An underscore instead of a dash is a frequent mistake. | |
104 size_t underscore_index = language.find('_'); | |
105 if (underscore_index != std::string::npos) | |
106 language[underscore_index] = '-'; | |
107 | |
108 // Change everything up to a dash to lower-case and everything after to upper. | |
109 size_t dash_index = language.find('-'); | |
110 if (dash_index != std::string::npos) { | |
111 language = StringToLowerASCII(language.substr(0, dash_index)) + | |
112 StringToUpperASCII(language.substr(dash_index)); | |
113 } else { | |
114 language = StringToLowerASCII(language); | |
115 } | |
116 | 97 |
117 #if defined(ENABLE_LANGUAGE_DETECTION) | 98 #if defined(ENABLE_LANGUAGE_DETECTION) |
118 if (language.empty()) { | 99 if (language.empty()) { |
119 base::TimeTicks begin_time = base::TimeTicks::Now(); | 100 base::TimeTicks begin_time = base::TimeTicks::Now(); |
120 language = DetermineTextLanguage(contents); | 101 language = DetermineTextLanguage(contents); |
121 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", | 102 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", |
122 base::TimeTicks::Now() - begin_time); | 103 base::TimeTicks::Now() - begin_time); |
123 } else { | 104 } else { |
124 VLOG(9) << "PageLanguageFromMetaTag: " << language; | 105 VLOG(9) << "PageLanguageFromMetaTag: " << language; |
125 } | 106 } |
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
188 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text | 169 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text |
189 << "\n*************************************\n"; | 170 << "\n*************************************\n"; |
190 return language; | 171 return language; |
191 } | 172 } |
192 #endif // defined(ENABLE_LANGUAGE_DETECTION) | 173 #endif // defined(ENABLE_LANGUAGE_DETECTION) |
193 | 174 |
194 //////////////////////////////////////////////////////////////////////////////// | 175 //////////////////////////////////////////////////////////////////////////////// |
195 // TranslateHelper, protected: | 176 // TranslateHelper, protected: |
196 // | 177 // |
197 // static | 178 // static |
| 179 void TranslateHelper::CorrectLanguageCodeTypo(std::string* code) { |
| 180 DCHECK(code); |
| 181 |
| 182 size_t coma_index = code->find(','); |
| 183 if (coma_index != std::string::npos) { |
| 184 // There are more than 1 language specified, just keep the first one. |
| 185 *code = code->substr(0, coma_index); |
| 186 } |
| 187 TrimWhitespaceASCII(*code, TRIM_ALL, code); |
| 188 |
| 189 // An underscore instead of a dash is a frequent mistake. |
| 190 size_t underscore_index = code->find('_'); |
| 191 if (underscore_index != std::string::npos) |
| 192 (*code)[underscore_index] = '-'; |
| 193 |
| 194 // Change everything up to a dash to lower-case and everything after to upper. |
| 195 size_t dash_index = code->find('-'); |
| 196 if (dash_index != std::string::npos) { |
| 197 *code = StringToLowerASCII(code->substr(0, dash_index)) + |
| 198 StringToUpperASCII(code->substr(dash_index)); |
| 199 } else { |
| 200 *code = StringToLowerASCII(*code); |
| 201 } |
| 202 } |
| 203 |
| 204 // static |
198 void TranslateHelper::ConvertLanguageCodeSynonym(std::string* code) { | 205 void TranslateHelper::ConvertLanguageCodeSynonym(std::string* code) { |
| 206 DCHECK(code); |
| 207 |
199 // Apply liner search here because number of items in the list is just four. | 208 // Apply liner search here because number of items in the list is just four. |
200 for (size_t i = 0; i < arraysize(kLanguageCodeSynonyms); ++i) { | 209 for (size_t i = 0; i < arraysize(kLanguageCodeSynonyms); ++i) { |
201 if (code->compare(kLanguageCodeSynonyms[i].from) == 0) { | 210 if (code->compare(kLanguageCodeSynonyms[i].from) == 0) { |
202 *code = std::string(kLanguageCodeSynonyms[i].to); | 211 *code = std::string(kLanguageCodeSynonyms[i].to); |
203 break; | 212 break; |
204 } | 213 } |
205 } | 214 } |
206 } | 215 } |
207 | 216 |
208 bool TranslateHelper::IsTranslateLibAvailable() { | 217 bool TranslateHelper::IsTranslateLibAvailable() { |
(...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
479 WebView* web_view = render_view()->GetWebView(); | 488 WebView* web_view = render_view()->GetWebView(); |
480 if (!web_view) { | 489 if (!web_view) { |
481 // When the WebView is going away, the render view should have called | 490 // When the WebView is going away, the render view should have called |
482 // CancelPendingTranslation() which should have stopped any pending work, so | 491 // CancelPendingTranslation() which should have stopped any pending work, so |
483 // that case should not happen. | 492 // that case should not happen. |
484 NOTREACHED(); | 493 NOTREACHED(); |
485 return NULL; | 494 return NULL; |
486 } | 495 } |
487 return web_view->mainFrame(); | 496 return web_view->mainFrame(); |
488 } | 497 } |
OLD | NEW |