Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(516)

Side by Side Diff: chrome/renderer/translate_helper.cc

Issue 12221085: Translate: split language code typo correction to apply unit tests. (Closed) Base URL: svn://svn.chromium.org/chrome/trunk/src
Patch Set: Created 7 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved. 1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/renderer/translate_helper.h" 5 #include "chrome/renderer/translate_helper.h"
6 6
7 #include "base/bind.h" 7 #include "base/bind.h"
8 #include "base/compiler_specific.h" 8 #include "base/compiler_specific.h"
9 #include "base/logging.h" 9 #include "base/logging.h"
10 #include "base/message_loop.h" 10 #include "base/message_loop.h"
(...skipping 75 matching lines...) Expand 10 before | Expand all | Expand 10 after
86 // Get the document language as set by WebKit from the http-equiv 86 // Get the document language as set by WebKit from the http-equiv
87 // meta tag for "content-language". This may or may not also 87 // meta tag for "content-language". This may or may not also
88 // have a value derived from the actual Content-Language HTTP 88 // have a value derived from the actual Content-Language HTTP
89 // header. The two actually have different meanings (despite the 89 // header. The two actually have different meanings (despite the
90 // original intent of http-equiv to be an equivalent) with the former 90 // original intent of http-equiv to be an equivalent) with the former
91 // being the language of the document and the latter being the 91 // being the language of the document and the latter being the
92 // language of the intended audience (a distinction really only 92 // language of the intended audience (a distinction really only
93 // relevant for things like langauge textbooks). This distinction 93 // relevant for things like langauge textbooks). This distinction
94 // shouldn't affect translation. 94 // shouldn't affect translation.
95 std::string language = document.contentLanguage().utf8(); 95 std::string language = document.contentLanguage().utf8();
96 size_t coma_index = language.find(','); 96 CorrectLanguageCodeTypo(&language);
97 if (coma_index != std::string::npos) {
98 // There are more than 1 language specified, just keep the first one.
99 language = language.substr(0, coma_index);
100 }
101 TrimWhitespaceASCII(language, TRIM_ALL, &language);
102
103 // An underscore instead of a dash is a frequent mistake.
104 size_t underscore_index = language.find('_');
105 if (underscore_index != std::string::npos)
106 language[underscore_index] = '-';
107
108 // Change everything up to a dash to lower-case and everything after to upper.
109 size_t dash_index = language.find('-');
110 if (dash_index != std::string::npos) {
111 language = StringToLowerASCII(language.substr(0, dash_index)) +
112 StringToUpperASCII(language.substr(dash_index));
113 } else {
114 language = StringToLowerASCII(language);
115 }
116 97
117 #if defined(ENABLE_LANGUAGE_DETECTION) 98 #if defined(ENABLE_LANGUAGE_DETECTION)
118 if (language.empty()) { 99 if (language.empty()) {
119 base::TimeTicks begin_time = base::TimeTicks::Now(); 100 base::TimeTicks begin_time = base::TimeTicks::Now();
120 language = DetermineTextLanguage(contents); 101 language = DetermineTextLanguage(contents);
121 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection", 102 UMA_HISTOGRAM_MEDIUM_TIMES("Renderer4.LanguageDetection",
122 base::TimeTicks::Now() - begin_time); 103 base::TimeTicks::Now() - begin_time);
123 } else { 104 } else {
124 VLOG(9) << "PageLanguageFromMetaTag: " << language; 105 VLOG(9) << "PageLanguageFromMetaTag: " << language;
125 } 106 }
(...skipping 62 matching lines...) Expand 10 before | Expand all | Expand 10 after
188 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text 169 VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text
189 << "\n*************************************\n"; 170 << "\n*************************************\n";
190 return language; 171 return language;
191 } 172 }
192 #endif // defined(ENABLE_LANGUAGE_DETECTION) 173 #endif // defined(ENABLE_LANGUAGE_DETECTION)
193 174
194 //////////////////////////////////////////////////////////////////////////////// 175 ////////////////////////////////////////////////////////////////////////////////
195 // TranslateHelper, protected: 176 // TranslateHelper, protected:
196 // 177 //
197 // static 178 // static
179 void TranslateHelper::CorrectLanguageCodeTypo(std::string* code) {
180 DCHECK(code);
181
182 size_t coma_index = code->find(',');
183 if (coma_index != std::string::npos) {
184 // There are more than 1 language specified, just keep the first one.
185 *code = code->substr(0, coma_index);
186 }
187 TrimWhitespaceASCII(*code, TRIM_ALL, code);
188
189 // An underscore instead of a dash is a frequent mistake.
190 size_t underscore_index = code->find('_');
191 if (underscore_index != std::string::npos)
192 (*code)[underscore_index] = '-';
193
194 // Change everything up to a dash to lower-case and everything after to upper.
195 size_t dash_index = code->find('-');
196 if (dash_index != std::string::npos) {
197 *code = StringToLowerASCII(code->substr(0, dash_index)) +
198 StringToUpperASCII(code->substr(dash_index));
199 } else {
200 *code = StringToLowerASCII(*code);
201 }
202 }
203
204 // static
198 void TranslateHelper::ConvertLanguageCodeSynonym(std::string* code) { 205 void TranslateHelper::ConvertLanguageCodeSynonym(std::string* code) {
206 DCHECK(code);
207
199 // Apply liner search here because number of items in the list is just four. 208 // Apply liner search here because number of items in the list is just four.
200 for (size_t i = 0; i < arraysize(kLanguageCodeSynonyms); ++i) { 209 for (size_t i = 0; i < arraysize(kLanguageCodeSynonyms); ++i) {
201 if (code->compare(kLanguageCodeSynonyms[i].from) == 0) { 210 if (code->compare(kLanguageCodeSynonyms[i].from) == 0) {
202 *code = std::string(kLanguageCodeSynonyms[i].to); 211 *code = std::string(kLanguageCodeSynonyms[i].to);
203 break; 212 break;
204 } 213 }
205 } 214 }
206 } 215 }
207 216
208 bool TranslateHelper::IsTranslateLibAvailable() { 217 bool TranslateHelper::IsTranslateLibAvailable() {
(...skipping 270 matching lines...) Expand 10 before | Expand all | Expand 10 after
479 WebView* web_view = render_view()->GetWebView(); 488 WebView* web_view = render_view()->GetWebView();
480 if (!web_view) { 489 if (!web_view) {
481 // When the WebView is going away, the render view should have called 490 // When the WebView is going away, the render view should have called
482 // CancelPendingTranslation() which should have stopped any pending work, so 491 // CancelPendingTranslation() which should have stopped any pending work, so
483 // that case should not happen. 492 // that case should not happen.
484 NOTREACHED(); 493 NOTREACHED();
485 return NULL; 494 return NULL;
486 } 495 }
487 return web_view->mainFrame(); 496 return web_view->mainFrame();
488 } 497 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698