Index: components/search_engines/template_url_unittest.cc |
diff --git a/components/search_engines/template_url_unittest.cc b/components/search_engines/template_url_unittest.cc |
index 6d09bf89c38f3db31adc90576fe14af1150ab690..861ce7935438887337c55db68eec943e405c9f68 100644 |
--- a/components/search_engines/template_url_unittest.cc |
+++ b/components/search_engines/template_url_unittest.cc |
@@ -1142,6 +1142,81 @@ TEST_F(TemplateURLTest, ExtractSearchTermsFromURLPath) { |
EXPECT_EQ(base::string16(), result); |
} |
+// Checks that the ExtractSearchTermsFromURL function works correctly |
+// for urls containing non-latin characters in UTF8 encoding. |
+TEST_F(TemplateURLTest, ExtractSearchTermsFromUTF8URL) { |
+ TemplateURLData data; |
+ data.SetURL("http://utf-8.ru/?q={searchTerms}"); |
+ data.alternate_urls.push_back("http://utf-8.ru/#q={searchTerms}"); |
+ data.alternate_urls.push_back("http://utf-8.ru/path/{searchTerms}"); |
+ TemplateURL url(data); |
+ base::string16 result; |
+ |
+ // Russian text encoded with UTF-8. |
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL( |
+ GURL("http://utf-8.ru/?q=\xD0\x97\xD0\xB4\xD1\x80\xD0\xB0\xD0\xB2\xD1\x81" |
+ "\xD1\x82\xD0\xB2\xD1\x83\xD0\xB9,+\xD0\xBC\xD0\xB8\xD1\x80!"), |
+ search_terms_data_, &result)); |
+ EXPECT_EQ( |
+ base::WideToUTF16( |
+ L"\x0417\x0434\x0440\x0430\x0432\x0441\x0442\x0432\x0443\x0439, " |
+ L"\x043C\x0438\x0440!"), |
+ result); |
+ |
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL( |
+ GURL("http://utf-8.ru/#q=\xD0\xB4\xD0\xB2\xD0\xB0+\xD1\x81\xD0\xBB" |
+ "\xD0\xBE\xD0\xB2\xD0\xB0"), |
+ search_terms_data_, &result)); |
+ EXPECT_EQ( |
+ base::WideToUTF16(L"\x0434\x0432\x0430 \x0441\x043B\x043E\x0432\x0430"), |
+ result); |
+ |
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL( |
+ GURL("http://utf-8.ru/path/\xD0\xB1\xD1\x83\xD0\xBA\xD0\xB2\xD1\x8B%20" |
+ "\xD0\x90%20\xD0\xB8%20A"), |
+ search_terms_data_, &result)); |
+ EXPECT_EQ( |
+ base::WideToUTF16(L"\x0431\x0443\x043A\x0432\x044B \x0410 \x0438 A"), |
+ result); |
+} |
+ |
+// Checks that the ExtractSearchTermsFromURL function works correctly |
+// for urls containing non-latin characters in non-UTF8 encoding. |
+TEST_F(TemplateURLTest, ExtractSearchTermsFromNonUTF8URL) { |
+ TemplateURLData data; |
+ data.SetURL("http://windows-1251.ru/?q={searchTerms}"); |
+ data.alternate_urls.push_back("http://windows-1251.ru/#q={searchTerms}"); |
+ data.alternate_urls.push_back("http://windows-1251.ru/path/{searchTerms}"); |
+ data.input_encodings.push_back("windows-1251"); |
+ TemplateURL url(data); |
+ base::string16 result; |
+ |
+ // Russian text encoded with Windows-1251. |
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL( |
+ GURL("http://windows-1251.ru/?q=%C7%E4%F0%E0%E2%F1%F2%E2%F3%E9%2C+" |
+ "%EC%E8%F0!"), |
+ search_terms_data_, &result)); |
+ EXPECT_EQ( |
+ base::WideToUTF16( |
+ L"\x0417\x0434\x0440\x0430\x0432\x0441\x0442\x0432\x0443\x0439, " |
+ L"\x043C\x0438\x0440!"), |
+ result); |
+ |
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL( |
+ GURL("http://windows-1251.ru/#q=%E4%E2%E0+%F1%EB%EE%E2%E0"), |
+ search_terms_data_, &result)); |
+ EXPECT_EQ( |
+ base::WideToUTF16(L"\x0434\x0432\x0430 \x0441\x043B\x043E\x0432\x0430"), |
+ result); |
+ |
+ EXPECT_TRUE(url.ExtractSearchTermsFromURL( |
+ GURL("http://windows-1251.ru/path/%E1%F3%EA%E2%FB%20%C0%20%E8%20A"), |
+ search_terms_data_, &result)); |
+ EXPECT_EQ( |
+ base::WideToUTF16(L"\x0431\x0443\x043A\x0432\x044B \x0410 \x0438 A"), |
+ result); |
+} |
+ |
TEST_F(TemplateURLTest, HasSearchTermsReplacementKey) { |
TemplateURLData data; |
data.SetURL("http://google.com/?q={searchTerms}"); |