Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1)

Side by Side Diff: chrome/browser/autofill/autofill_locale_model_unittest.cc

Issue 3226001: Detecting form locale (Closed) Base URL: http://src.chromium.org/git/chromium.git
Patch Set: Unit test for top websites Created 10 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
(Empty)
1 // Copyright (c) 2010 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
4
5 #include "chrome/browser/autofill/autofill_locale_model.h"
6
7 #include <string>
8 #include <vector>
9
10 #include "chrome/browser/autofill/form_structure.h"
11 #include "chrome/browser/renderer_host/test/test_render_view_host.h"
12 #include "chrome/browser/tab_contents/language_state.h"
13 #include "chrome/browser/tab_contents/tab_contents.h"
14 #include "chrome/browser/tab_contents/test_tab_contents.h"
15 #include "chrome/common/chrome_constants.h"
16 #include "testing/gtest/include/gtest/gtest.h"
17 #include "webkit/glue/form_data.h"
18
19 namespace {
20
21 // Mocks out the original_language() function to return a pre-set value.
22 class MockLanguageState : public LanguageState {
23 public:
24 explicit MockLanguageState(const std::string& locale)
25 : LanguageState(NULL),
26 original_language_(locale) {}
27 virtual ~MockLanguageState() {}
28
29 virtual const std::string& original_language() const {
30 return original_language_;
31 }
32
33 private:
34 const std::string& original_language_;
35
36 DISALLOW_COPY_AND_ASSIGN(MockLanguageState);
37 };
38
39 // Testing version of AutoFillLocaleModel that exposes access to the model's
40 // registry map.
41 class TestAutoFillLocaleModel : public AutoFillLocaleModel {
42 public:
43 TestAutoFillLocaleModel() : AutoFillLocaleModel(NULL) {}
44
45 // Verifies that the regions and languages in the registry map are valid.
46 void ValidateRegistryMap() {
47 for (AutoFillRegistryMap::const_iterator it = registries()->begin();
48 it != registries()->end(); ++it) {
49 SCOPED_TRACE("Region: \"" + it->second.region + "\"");
50 EXPECT_TRUE(AutoFillLocaleModel::IsValidRegionTag(it->second.region));
51
52 const std::vector<std::string>& languages = it->second.languages;
53 for (std::vector<std::string>::const_iterator it2 = languages.begin();
54 it2 != languages.end(); ++it2) {
55 SCOPED_TRACE("Language: \"" + *it2 + "\"");
56 EXPECT_TRUE(AutoFillLocaleModel::IsValidLanguageTag(*it2));
57 }
58 }
59 }
60
61 private:
62 DISALLOW_COPY_AND_ASSIGN(TestAutoFillLocaleModel);
63 };
64
65 class AutoFillLocaleModelTest : public RenderViewHostTestHarness {
66 public:
67 AutoFillLocaleModelTest() {}
68 virtual ~AutoFillLocaleModelTest() {}
69
70 // Mocks out the GetURL() and language_state() functions to return pre-set
71 // values.
72 class MockTabContents : public TestTabContents {
73 public:
74 MockTabContents(const TestTabContents* contents,
75 const std::string& detected_locale,
76 const GURL& url)
77 : TestTabContents(contents->profile(), contents->GetSiteInstance()),
78 language_state_(detected_locale),
79 url_(url) {}
80 virtual ~MockTabContents() {}
81
82 virtual const GURL& GetURL() const { return url_; }
83 virtual LanguageState& language_state() { return language_state_; }
84 virtual const LanguageState& language_state() const {
85 return language_state_;
86 }
87
88 private:
89 MockLanguageState language_state_;
90 const GURL url_;
91 };
92
93 void RunTest(const std::string& dom_locale,
94 const std::string& detected_locale,
95 const std::string& source_registry,
96 const std::string& target_registry,
97 const std::string& expected_locale) {
98 SCOPED_TRACE("\n\tDOM: \"" + dom_locale + "\""
99 "\n\tCLD: \"" + detected_locale + "\""
100 "\n\tSource registry: \"" + source_registry + "\""
101 "\n\tTarget registry: \"" + target_registry + "\"");
102
103 const GURL source_url("http://www.foo." + source_registry + "/form.html");
104 MockTabContents tab_contents(contents(), detected_locale, source_url);
105
106 AutoFillLocaleModel model(&tab_contents);
107 model.set_tab_language_determined(true);
108
109 webkit_glue::FormData form_data;
110 form_data.origin = source_url;
111 form_data.action = GURL("http://www.foo." + target_registry + "/go.html");
112 form_data.locale = dom_locale;
113 FormStructure form(form_data);
114 model.UpdateLocale(&form);
115
116 EXPECT_EQ(expected_locale, form.locale());
117 }
118
119 void RunWebsiteTest(const std::string& dom_locale,
120 const std::string& detected_locale,
121 const std::string& website,
122 const std::string& expected_locale) {
123 SCOPED_TRACE("\n\tDOM: \"" + dom_locale + "\""
124 "\n\tCLD: \"" + detected_locale + "\""
125 "\n\tSite: \"" + website + "\"");
126
127 const GURL source_url(website);
128 MockTabContents tab_contents(contents(), detected_locale, source_url);
129
130 AutoFillLocaleModel model(&tab_contents);
131 model.set_tab_language_determined(true);
132
133 webkit_glue::FormData form_data;
134 form_data.origin = source_url;
135 form_data.action = source_url;
136 form_data.locale = dom_locale;
137 FormStructure form(form_data);
138 model.UpdateLocale(&form);
139
140 EXPECT_EQ(expected_locale, form.locale());
141 }
142
143 private:
144 DISALLOW_COPY_AND_ASSIGN(AutoFillLocaleModelTest);
145 };
146
147 // TODO(isherman): This test currently assumes that a different function will be
148 // responsible for determining the best region for the locale, and that
149 // DetermineFormLocale() does not promote e.g. "fr" to "fr-FR".
150 //
151 // Make sure that we correctly prioritize our various locale indicators:
152 // * the form's locale according to the DOM, e.g. via the HTML "lang" attribute
153 // * the page's locale according to CLD
154 // * the page's registry
155 TEST_F(AutoFillLocaleModelTest, DetermineFormLocale) {
156 // If everything matches, this should be easy.
157 RunTest("en", "en", "com", "com", "en");
158 RunTest("en-US", "en-US", "org", "org", "en-US");
159 RunTest("fr", "fr", "net", "net", "fr");
160 RunTest("fr-CA", "fr-CA", "ca", "ca", "fr-CA");
161
162 // Match should be as specific as possible.
163 RunTest("en", "en-US", "com", "com", "en-US");
164 RunTest("en-US", "en", "com", "com", "en-US");
165 RunTest("fr", "fr-CA", "com", "com", "fr-CA");
166 RunTest("fr-CA", "fr", "com", "com", "fr-CA");
167
168 // Trust the HTML locale attribute, unless it is "en", "en-US", or empty.
169 // Otherwise trust the page language, unless it is unknown.
170 RunTest("fr", "en", "com", "com", "fr");
171 RunTest("fr-CA", "en-US", "com", "com", "fr-CA");
172 RunTest("en-GB", "fr", "com", "com", "en-GB");
173 RunTest("en", "fr", "com", "com", "fr");
174 RunTest("en-US", "fr", "com", "com", "fr");
175 RunTest("", "fr", "com", "com", "fr");
176 RunTest("en-US", chrome::kUnknownLanguageCode, "com", "com", "en-US");
177
178 // The registry can help us choose a region, and sometimes even the language.
179 RunTest("en", "en", "co.uk", "co.uk", "en-GB");
180 RunTest("", "en", "co.uk", "co.uk", "en-GB");
181 RunTest("", chrome::kUnknownLanguageCode, "ca", "ca", "fr-CA");
182
183 // Canadian pages can be in either English or French.
184 RunTest("fr", "fr", "ca", "ca", "fr-CA");
185 RunTest("en", "en", "ca", "ca", "en-CA");
186
187 // We trust the source url's registry more than the target url's registry.
188 RunTest("fr", "fr", "com", "ca", "fr-CA");
189 RunTest("fr", "fr", "fr", "ca", "fr-FR");
190
191 // Only use the registry to determine the region when the other indicators
192 // are empty or match the registry's predictions -- consider "bit.ly" and
193 // other such cutely named websites.
194 RunTest("", "en", "fr", "fr", "en");
195 RunTest("fr", chrome::kUnknownLanguageCode, "co.uk", "co.uk", "fr");
196
197 // Case should not affect the match.
198 RunTest("Fr-cA", "Fr-cA", "ca", "ca", "fr-CA");
199 RunTest("RU", chrome::kUnknownLanguageCode, "com", "com", "ru");
200 RunTest("EN-us", "fr", "com", "com", "fr");
201 RunTest("en", "en", "CO.UK", "CO.UK", "en-GB");
202
203 // We should trim whitespace.
204 RunTest(" en-GB ", chrome::kUnknownLanguageCode, "com", "com", "en-GB");
205
206 // If there are no signals at all, guess "en-US".
207 RunTest("", chrome::kUnknownLanguageCode, "com", "com", "en-US");
208
209 // Test a few invalid locales as well, just to make sure nothing breaks.
210 RunTest("xx-XX", chrome::kUnknownLanguageCode, "com", "com", "en-US");
211 RunTest(" x ", chrome::kUnknownLanguageCode, "com", "com", "en-US");
212 }
213
214 // Make sure that our registry map maps to valid regions and languages.
215 TEST_F(AutoFillLocaleModelTest, ValidateRegistryMap) {
216 TestAutoFillLocaleModel model;
217 model.ValidateRegistryMap();
218 }
219
220 // Tests that we correctly detect the locale for top international websites.
221 // To add a case to this list, run Chromium in debug mode with the code snippet
222 // below added to AutoFillLocaleModel::UpdateLocale(), then copy in the output.
223 // printf("UpdateLocale()\n"
224 // "\tDOM: \"%s\"\n"
225 // "\tCLD: \"%s\"\n"
226 // "\tsource: \"%s\"\n"
227 // "\ttarget: \"%s\"\n"
228 // "\tpage: \"%s\"\n",
229 // form->locale().c_str(),
230 // tab_contents_->language_state().original_language().c_str(),
231 // form->source_url().spec().c_str(),
232 // form->target_url().spec().c_str(),
233 // tab_contents_->GetURL().spec().c_str());
234 TEST_F(AutoFillLocaleModelTest, TopWebsiteLocales) {
235 // Chinese (Simplified): Amazon China
236 RunWebsiteTest("", "zh-CN", "https://www.amazon.cn/", "zh-CN");
237
238 // Chinese (Traditional): PChome
239 RunWebsiteTest("", "zh-TW", "https://ecssl.pchome.com.tw/", "zh-TW");
240
241 // English/AU: Bookworm
242 RunWebsiteTest("", "en", "https://www.bookworm.com.au", "en-AU");
243
244 // English/CA: Chapters/Indigo
245 RunWebsiteTest("", "en", "https://shop.chapters.indigo.ca", "en-CA");
246
247 // English/NZ: Fishpond
248 RunWebsiteTest("", "en", "https://www.fishpond.co.nz/", "en-NZ");
249
250 // Hebrew
251 RunWebsiteTest("he", "he", "https://book4book.co.il/", "he-IL");
252
253 // Italian: bol.it
254 RunWebsiteTest("", "it", "https://www.bol.it/", "it-IT");
255
256 // Japanese: rakuten
257 RunWebsiteTest("", "ja", "https://order.step.rakuten.co.jp/", "ja-JP");
258
259 // Portuguese/BR:
260 RunWebsiteTest("", "pt", "https://www.livrariasaraiva.com.br/", "pt-BR");
261
262 // Spanish/MX:
263 RunWebsiteTest("", "es", "http://www.librerialeo.com.mx/create_account.php",
264 "es-MX");
265 }
266
267 // TODO(isherman): for two of these, we can get the right answer just by
268 // guessing the most common region; but for the other two, that would still
269 // give the wrong answer.
270 // Tracks top websites where we detect the form locale incorrectly.
271 TEST_F(AutoFillLocaleModelTest, FAILS_TopWebsiteLocalesThatCurrentlyFail) {
272 // English/IN: rediff books
273 RunWebsiteTest("", "en", "http://commerce.rediff.com/", "en-IN");
274
275 // French: fnac
276 RunWebsiteTest("", "fr", "https://secure.fnac.com/", "fr-FR");
277
278 // Japanese/US: Fujisan
279 RunWebsiteTest("", "ja", "https://www.fujisan.com/control/newcustomer",
280 "ja-US");
281
282 // Japanese/US: rakuten, after selecting German as a langauge...
283 RunWebsiteTest("", "en", "https://en.order.step.rakuten.co.jp/", "en-US");
284 }
285
286 // TODO(isherman): unit test that verifies mapping a locale to a region
287
288 } // namespace
OLDNEW
« no previous file with comments | « chrome/browser/autofill/autofill_locale_model.cc ('k') | chrome/browser/autofill/autofill_manager.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698