Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(331)

Unified Diff: chrome/browser/autofill/autofill_locale_model_unittest.cc

Issue 3226001: Detecting form locale (Closed) Base URL: http://src.chromium.org/git/chromium.git
Patch Set: Unit test for top websites Created 10 years, 3 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « chrome/browser/autofill/autofill_locale_model.cc ('k') | chrome/browser/autofill/autofill_manager.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: chrome/browser/autofill/autofill_locale_model_unittest.cc
diff --git a/chrome/browser/autofill/autofill_locale_model_unittest.cc b/chrome/browser/autofill/autofill_locale_model_unittest.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a83603455a2b5756be88fd67f5c7a674d966a769
--- /dev/null
+++ b/chrome/browser/autofill/autofill_locale_model_unittest.cc
@@ -0,0 +1,288 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "chrome/browser/autofill/autofill_locale_model.h"
+
+#include <string>
+#include <vector>
+
+#include "chrome/browser/autofill/form_structure.h"
+#include "chrome/browser/renderer_host/test/test_render_view_host.h"
+#include "chrome/browser/tab_contents/language_state.h"
+#include "chrome/browser/tab_contents/tab_contents.h"
+#include "chrome/browser/tab_contents/test_tab_contents.h"
+#include "chrome/common/chrome_constants.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "webkit/glue/form_data.h"
+
+namespace {
+
+// Mocks out the original_language() function to return a pre-set value.
+class MockLanguageState : public LanguageState {
+ public:
+ explicit MockLanguageState(const std::string& locale)
+ : LanguageState(NULL),
+ original_language_(locale) {}
+ virtual ~MockLanguageState() {}
+
+ virtual const std::string& original_language() const {
+ return original_language_;
+ }
+
+ private:
+ const std::string& original_language_;
+
+ DISALLOW_COPY_AND_ASSIGN(MockLanguageState);
+};
+
+// Testing version of AutoFillLocaleModel that exposes access to the model's
+// registry map.
+class TestAutoFillLocaleModel : public AutoFillLocaleModel {
+ public:
+ TestAutoFillLocaleModel() : AutoFillLocaleModel(NULL) {}
+
+ // Verifies that the regions and languages in the registry map are valid.
+ void ValidateRegistryMap() {
+ for (AutoFillRegistryMap::const_iterator it = registries()->begin();
+ it != registries()->end(); ++it) {
+ SCOPED_TRACE("Region: \"" + it->second.region + "\"");
+ EXPECT_TRUE(AutoFillLocaleModel::IsValidRegionTag(it->second.region));
+
+ const std::vector<std::string>& languages = it->second.languages;
+ for (std::vector<std::string>::const_iterator it2 = languages.begin();
+ it2 != languages.end(); ++it2) {
+ SCOPED_TRACE("Language: \"" + *it2 + "\"");
+ EXPECT_TRUE(AutoFillLocaleModel::IsValidLanguageTag(*it2));
+ }
+ }
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(TestAutoFillLocaleModel);
+};
+
+class AutoFillLocaleModelTest : public RenderViewHostTestHarness {
+ public:
+ AutoFillLocaleModelTest() {}
+ virtual ~AutoFillLocaleModelTest() {}
+
+ // Mocks out the GetURL() and language_state() functions to return pre-set
+ // values.
+ class MockTabContents : public TestTabContents {
+ public:
+ MockTabContents(const TestTabContents* contents,
+ const std::string& detected_locale,
+ const GURL& url)
+ : TestTabContents(contents->profile(), contents->GetSiteInstance()),
+ language_state_(detected_locale),
+ url_(url) {}
+ virtual ~MockTabContents() {}
+
+ virtual const GURL& GetURL() const { return url_; }
+ virtual LanguageState& language_state() { return language_state_; }
+ virtual const LanguageState& language_state() const {
+ return language_state_;
+ }
+
+ private:
+ MockLanguageState language_state_;
+ const GURL url_;
+ };
+
+ void RunTest(const std::string& dom_locale,
+ const std::string& detected_locale,
+ const std::string& source_registry,
+ const std::string& target_registry,
+ const std::string& expected_locale) {
+ SCOPED_TRACE("\n\tDOM: \"" + dom_locale + "\""
+ "\n\tCLD: \"" + detected_locale + "\""
+ "\n\tSource registry: \"" + source_registry + "\""
+ "\n\tTarget registry: \"" + target_registry + "\"");
+
+ const GURL source_url("http://www.foo." + source_registry + "/form.html");
+ MockTabContents tab_contents(contents(), detected_locale, source_url);
+
+ AutoFillLocaleModel model(&tab_contents);
+ model.set_tab_language_determined(true);
+
+ webkit_glue::FormData form_data;
+ form_data.origin = source_url;
+ form_data.action = GURL("http://www.foo." + target_registry + "/go.html");
+ form_data.locale = dom_locale;
+ FormStructure form(form_data);
+ model.UpdateLocale(&form);
+
+ EXPECT_EQ(expected_locale, form.locale());
+ }
+
+ void RunWebsiteTest(const std::string& dom_locale,
+ const std::string& detected_locale,
+ const std::string& website,
+ const std::string& expected_locale) {
+ SCOPED_TRACE("\n\tDOM: \"" + dom_locale + "\""
+ "\n\tCLD: \"" + detected_locale + "\""
+ "\n\tSite: \"" + website + "\"");
+
+ const GURL source_url(website);
+ MockTabContents tab_contents(contents(), detected_locale, source_url);
+
+ AutoFillLocaleModel model(&tab_contents);
+ model.set_tab_language_determined(true);
+
+ webkit_glue::FormData form_data;
+ form_data.origin = source_url;
+ form_data.action = source_url;
+ form_data.locale = dom_locale;
+ FormStructure form(form_data);
+ model.UpdateLocale(&form);
+
+ EXPECT_EQ(expected_locale, form.locale());
+ }
+
+ private:
+ DISALLOW_COPY_AND_ASSIGN(AutoFillLocaleModelTest);
+};
+
+// TODO(isherman): This test currently assumes that a different function will be
+// responsible for determining the best region for the locale, and that
+// DetermineFormLocale() does not promote e.g. "fr" to "fr-FR".
+//
+// Make sure that we correctly prioritize our various locale indicators:
+// * the form's locale according to the DOM, e.g. via the HTML "lang" attribute
+// * the page's locale according to CLD
+// * the page's registry
+TEST_F(AutoFillLocaleModelTest, DetermineFormLocale) {
+ // If everything matches, this should be easy.
+ RunTest("en", "en", "com", "com", "en");
+ RunTest("en-US", "en-US", "org", "org", "en-US");
+ RunTest("fr", "fr", "net", "net", "fr");
+ RunTest("fr-CA", "fr-CA", "ca", "ca", "fr-CA");
+
+ // Match should be as specific as possible.
+ RunTest("en", "en-US", "com", "com", "en-US");
+ RunTest("en-US", "en", "com", "com", "en-US");
+ RunTest("fr", "fr-CA", "com", "com", "fr-CA");
+ RunTest("fr-CA", "fr", "com", "com", "fr-CA");
+
+ // Trust the HTML locale attribute, unless it is "en", "en-US", or empty.
+ // Otherwise trust the page language, unless it is unknown.
+ RunTest("fr", "en", "com", "com", "fr");
+ RunTest("fr-CA", "en-US", "com", "com", "fr-CA");
+ RunTest("en-GB", "fr", "com", "com", "en-GB");
+ RunTest("en", "fr", "com", "com", "fr");
+ RunTest("en-US", "fr", "com", "com", "fr");
+ RunTest("", "fr", "com", "com", "fr");
+ RunTest("en-US", chrome::kUnknownLanguageCode, "com", "com", "en-US");
+
+ // The registry can help us choose a region, and sometimes even the language.
+ RunTest("en", "en", "co.uk", "co.uk", "en-GB");
+ RunTest("", "en", "co.uk", "co.uk", "en-GB");
+ RunTest("", chrome::kUnknownLanguageCode, "ca", "ca", "fr-CA");
+
+ // Canadian pages can be in either English or French.
+ RunTest("fr", "fr", "ca", "ca", "fr-CA");
+ RunTest("en", "en", "ca", "ca", "en-CA");
+
+ // We trust the source url's registry more than the target url's registry.
+ RunTest("fr", "fr", "com", "ca", "fr-CA");
+ RunTest("fr", "fr", "fr", "ca", "fr-FR");
+
+ // Only use the registry to determine the region when the other indicators
+ // are empty or match the registry's predictions -- consider "bit.ly" and
+ // other such cutely named websites.
+ RunTest("", "en", "fr", "fr", "en");
+ RunTest("fr", chrome::kUnknownLanguageCode, "co.uk", "co.uk", "fr");
+
+ // Case should not affect the match.
+ RunTest("Fr-cA", "Fr-cA", "ca", "ca", "fr-CA");
+ RunTest("RU", chrome::kUnknownLanguageCode, "com", "com", "ru");
+ RunTest("EN-us", "fr", "com", "com", "fr");
+ RunTest("en", "en", "CO.UK", "CO.UK", "en-GB");
+
+ // We should trim whitespace.
+ RunTest(" en-GB ", chrome::kUnknownLanguageCode, "com", "com", "en-GB");
+
+ // If there are no signals at all, guess "en-US".
+ RunTest("", chrome::kUnknownLanguageCode, "com", "com", "en-US");
+
+ // Test a few invalid locales as well, just to make sure nothing breaks.
+ RunTest("xx-XX", chrome::kUnknownLanguageCode, "com", "com", "en-US");
+ RunTest(" x ", chrome::kUnknownLanguageCode, "com", "com", "en-US");
+}
+
+// Make sure that our registry map maps to valid regions and languages.
+TEST_F(AutoFillLocaleModelTest, ValidateRegistryMap) {
+ TestAutoFillLocaleModel model;
+ model.ValidateRegistryMap();
+}
+
+// Tests that we correctly detect the locale for top international websites.
+// To add a case to this list, run Chromium in debug mode with the code snippet
+// below added to AutoFillLocaleModel::UpdateLocale(), then copy in the output.
+// printf("UpdateLocale()\n"
+// "\tDOM: \"%s\"\n"
+// "\tCLD: \"%s\"\n"
+// "\tsource: \"%s\"\n"
+// "\ttarget: \"%s\"\n"
+// "\tpage: \"%s\"\n",
+// form->locale().c_str(),
+// tab_contents_->language_state().original_language().c_str(),
+// form->source_url().spec().c_str(),
+// form->target_url().spec().c_str(),
+// tab_contents_->GetURL().spec().c_str());
+TEST_F(AutoFillLocaleModelTest, TopWebsiteLocales) {
+ // Chinese (Simplified): Amazon China
+ RunWebsiteTest("", "zh-CN", "https://www.amazon.cn/", "zh-CN");
+
+ // Chinese (Traditional): PChome
+ RunWebsiteTest("", "zh-TW", "https://ecssl.pchome.com.tw/", "zh-TW");
+
+ // English/AU: Bookworm
+ RunWebsiteTest("", "en", "https://www.bookworm.com.au", "en-AU");
+
+ // English/CA: Chapters/Indigo
+ RunWebsiteTest("", "en", "https://shop.chapters.indigo.ca", "en-CA");
+
+ // English/NZ: Fishpond
+ RunWebsiteTest("", "en", "https://www.fishpond.co.nz/", "en-NZ");
+
+ // Hebrew
+ RunWebsiteTest("he", "he", "https://book4book.co.il/", "he-IL");
+
+ // Italian: bol.it
+ RunWebsiteTest("", "it", "https://www.bol.it/", "it-IT");
+
+ // Japanese: rakuten
+ RunWebsiteTest("", "ja", "https://order.step.rakuten.co.jp/", "ja-JP");
+
+ // Portuguese/BR:
+ RunWebsiteTest("", "pt", "https://www.livrariasaraiva.com.br/", "pt-BR");
+
+ // Spanish/MX:
+ RunWebsiteTest("", "es", "http://www.librerialeo.com.mx/create_account.php",
+ "es-MX");
+}
+
+// TODO(isherman): for two of these, we can get the right answer just by
+// guessing the most common region; but for the other two, that would still
+// give the wrong answer.
+// Tracks top websites where we detect the form locale incorrectly.
+TEST_F(AutoFillLocaleModelTest, FAILS_TopWebsiteLocalesThatCurrentlyFail) {
+ // English/IN: rediff books
+ RunWebsiteTest("", "en", "http://commerce.rediff.com/", "en-IN");
+
+ // French: fnac
+ RunWebsiteTest("", "fr", "https://secure.fnac.com/", "fr-FR");
+
+ // Japanese/US: Fujisan
+ RunWebsiteTest("", "ja", "https://www.fujisan.com/control/newcustomer",
+ "ja-US");
+
+ // Japanese/US: rakuten, after selecting German as a langauge...
+ RunWebsiteTest("", "en", "https://en.order.step.rakuten.co.jp/", "en-US");
+}
+
+// TODO(isherman): unit test that verifies mapping a locale to a region
+
+} // namespace
« no previous file with comments | « chrome/browser/autofill/autofill_locale_model.cc ('k') | chrome/browser/autofill/autofill_manager.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698