Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(109)

Side by Side Diff: components/translate/language_detection/language_detection_util_unittest.cc

Issue 25531002: Move language detection to a component (Closed) Base URL: http://git.chromium.org/chromium/src.git@master
Patch Set: Fix compilation Created 7 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2013 The Chromium Authors. All rights reserved. 1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "chrome/common/translate/language_detection_util.h" 5 #include "components/translate/language_detection/language_detection_util.h"
6 6
7 #include "base/strings/string16.h" 7 #include "base/strings/string16.h"
8 #include "base/strings/utf_string_conversions.h" 8 #include "base/strings/utf_string_conversions.h"
9 #include "chrome/common/chrome_constants.h" 9 #include "components/translate/common/translate_constants.h"
10 #include "testing/gtest/include/gtest/gtest.h" 10 #include "testing/gtest/include/gtest/gtest.h"
11 11
12 typedef testing::Test LanguageDetectionUtilTest; 12 typedef testing::Test LanguageDetectionUtilTest;
13 13
14 // Tests that well-known language code typos are fixed. 14 // Tests that well-known language code typos are fixed.
15 TEST_F(LanguageDetectionUtilTest, LanguageCodeTypoCorrection) { 15 TEST_F(LanguageDetectionUtilTest, LanguageCodeTypoCorrection) {
16 std::string language; 16 std::string language;
17 17
18 // Strip the second and later codes. 18 // Strip the second and later codes.
19 language = std::string("ja,en"); 19 language = std::string("ja,en");
20 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); 20 translate::CorrectLanguageCodeTypo(&language);
21 EXPECT_EQ("ja", language); 21 EXPECT_EQ("ja", language);
22 22
23 // Replace dash with hyphen. 23 // Replace dash with hyphen.
24 language = std::string("ja_JP"); 24 language = std::string("ja_JP");
25 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); 25 translate::CorrectLanguageCodeTypo(&language);
26 EXPECT_EQ("ja-JP", language); 26 EXPECT_EQ("ja-JP", language);
27 27
28 // Correct wrong cases. 28 // Correct wrong cases.
29 language = std::string("JA-jp"); 29 language = std::string("JA-jp");
30 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); 30 translate::CorrectLanguageCodeTypo(&language);
31 EXPECT_EQ("ja-JP", language); 31 EXPECT_EQ("ja-JP", language);
32 } 32 }
33 33
34 // Tests if the language codes' format is invalid. 34 // Tests if the language codes' format is invalid.
35 TEST_F(LanguageDetectionUtilTest, IsValidLanguageCode) { 35 TEST_F(LanguageDetectionUtilTest, IsValidLanguageCode) {
36 std::string language; 36 std::string language;
37 37
38 language = std::string("ja"); 38 language = std::string("ja");
39 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); 39 EXPECT_TRUE(translate::IsValidLanguageCode(language));
40 40
41 language = std::string("ja-JP"); 41 language = std::string("ja-JP");
42 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); 42 EXPECT_TRUE(translate::IsValidLanguageCode(language));
43 43
44 language = std::string("ceb"); 44 language = std::string("ceb");
45 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); 45 EXPECT_TRUE(translate::IsValidLanguageCode(language));
46 46
47 language = std::string("ceb-XX"); 47 language = std::string("ceb-XX");
48 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); 48 EXPECT_TRUE(translate::IsValidLanguageCode(language));
49 49
50 // Invalid because the sub code consists of a number. 50 // Invalid because the sub code consists of a number.
51 language = std::string("utf-8"); 51 language = std::string("utf-8");
52 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); 52 EXPECT_FALSE(translate::IsValidLanguageCode(language));
53 53
54 // Invalid because of six characters after hyphen. 54 // Invalid because of six characters after hyphen.
55 language = std::string("ja-YUKARI"); 55 language = std::string("ja-YUKARI");
56 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); 56 EXPECT_FALSE(translate::IsValidLanguageCode(language));
57 57
58 // Invalid because of four characters. 58 // Invalid because of four characters.
59 language = std::string("DHMO"); 59 language = std::string("DHMO");
60 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); 60 EXPECT_FALSE(translate::IsValidLanguageCode(language));
61 } 61 }
62 62
63 // Tests that similar language table works. 63 // Tests that similar language table works.
64 TEST_F(LanguageDetectionUtilTest, SimilarLanguageCode) { 64 TEST_F(LanguageDetectionUtilTest, SimilarLanguageCode) {
65 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "en")); 65 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("en", "en"));
66 EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "ja")); 66 EXPECT_FALSE(translate::IsSameOrSimilarLanguages("en", "ja"));
67 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hr")); 67 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("bs", "hr"));
68 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("sr-ME", "sr")); 68 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("sr-ME", "sr"));
69 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("ne", "hi")); 69 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("ne", "hi"));
70 EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hi")); 70 EXPECT_FALSE(translate::IsSameOrSimilarLanguages("bs", "hi"));
71 } 71 }
72 72
73 // Tests that well-known languages which often have wrong server configuration 73 // Tests that well-known languages which often have wrong server configuration
74 // are handles. 74 // are handles.
75 TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) { 75 TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) {
76 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", "ja")); 76 EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en", "ja"));
77 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en-US", 77 EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en-US", "ja"));
78 "ja")); 78 EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en", "zh-CN"));
79 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", 79 EXPECT_FALSE(translate::MaybeServerWrongConfiguration("ja", "en"));
80 "zh-CN")); 80 EXPECT_FALSE(translate::MaybeServerWrongConfiguration("en", "he"));
81 EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("ja",
82 "en"));
83 EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en",
84 "he"));
85 } 81 }
86 82
87 // Tests that the language meta tag providing wrong information is ignored by 83 // Tests that the language meta tag providing wrong information is ignored by
88 // LanguageDetectionUtil due to disagreement between meta tag and CLD. 84 // LanguageDetectionUtil due to disagreement between meta tag and CLD.
89 TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) { 85 TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) {
90 base::string16 contents = ASCIIToUTF16( 86 base::string16 contents = ASCIIToUTF16(
91 "<html><head><meta http-equiv='Content-Language' content='ja'></head>" 87 "<html><head><meta http-equiv='Content-Language' content='ja'></head>"
92 "<body>This is a page apparently written in English. Even though " 88 "<body>This is a page apparently written in English. Even though "
93 "content-language is provided, the value will be ignored if the value " 89 "content-language is provided, the value will be ignored if the value "
94 "is suspicious.</body></html>"); 90 "is suspicious.</body></html>");
95 std::string cld_language; 91 std::string cld_language;
96 bool is_cld_reliable; 92 bool is_cld_reliable;
97 std::string language = LanguageDetectionUtil::DeterminePageLanguage( 93 std::string language = translate::DeterminePageLanguage(std::string("ja"),
98 std::string("ja"), std::string(), contents, &cld_language, 94 std::string(),
99 &is_cld_reliable); 95 contents,
100 EXPECT_EQ(chrome::kUnknownLanguageCode, language); 96 &cld_language,
97 &is_cld_reliable);
98 EXPECT_EQ(translate::kUnknownLanguageCode, language);
101 EXPECT_EQ("en", cld_language); 99 EXPECT_EQ("en", cld_language);
102 EXPECT_TRUE(is_cld_reliable); 100 EXPECT_TRUE(is_cld_reliable);
103 } 101 }
104 102
105 // Tests that the language meta tag providing "en-US" style information is 103 // Tests that the language meta tag providing "en-US" style information is
106 // agreed by CLD. 104 // agreed by CLD.
107 TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) { 105 TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) {
108 base::string16 contents = ASCIIToUTF16( 106 base::string16 contents = ASCIIToUTF16(
109 "<html><head><meta http-equiv='Content-Language' content='en-US'></head>" 107 "<html><head><meta http-equiv='Content-Language' content='en-US'></head>"
110 "<body>This is a page apparently written in English. Even though " 108 "<body>This is a page apparently written in English. Even though "
111 "content-language is provided, the value will be ignored if the value " 109 "content-language is provided, the value will be ignored if the value "
112 "is suspicious.</body></html>"); 110 "is suspicious.</body></html>");
113 std::string cld_language; 111 std::string cld_language;
114 bool is_cld_reliable; 112 bool is_cld_reliable;
115 std::string language = LanguageDetectionUtil::DeterminePageLanguage( 113 std::string language = translate::DeterminePageLanguage(std::string("en-US"),
116 std::string("en-US"), std::string(), contents, &cld_language, 114 std::string(),
117 &is_cld_reliable); 115 contents,
116 &cld_language,
117 &is_cld_reliable);
118 EXPECT_EQ("en-US", language); 118 EXPECT_EQ("en-US", language);
119 EXPECT_EQ("en", cld_language); 119 EXPECT_EQ("en", cld_language);
120 EXPECT_TRUE(is_cld_reliable); 120 EXPECT_TRUE(is_cld_reliable);
121 } 121 }
122 122
123 // Tests that the language meta tag providing wrong information is ignored and 123 // Tests that the language meta tag providing wrong information is ignored and
124 // CLD's language will be adopted by LanguageDetectionUtil due to an invalid 124 // CLD's language will be adopted by LanguageDetectionUtil due to an invalid
125 // meta tag. 125 // meta tag.
126 TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) { 126 TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) {
127 base::string16 contents = ASCIIToUTF16( 127 base::string16 contents = ASCIIToUTF16(
128 "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>" 128 "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>"
129 "<body>This is a page apparently written in English. Even though " 129 "<body>This is a page apparently written in English. Even though "
130 "content-language is provided, the value will be ignored and CLD's" 130 "content-language is provided, the value will be ignored and CLD's"
131 " language will be adopted if the value is invalid.</body></html>"); 131 " language will be adopted if the value is invalid.</body></html>");
132 std::string cld_language; 132 std::string cld_language;
133 bool is_cld_reliable; 133 bool is_cld_reliable;
134 std::string language = LanguageDetectionUtil::DeterminePageLanguage( 134 std::string language = translate::DeterminePageLanguage(std::string("utf-8"),
135 std::string("utf-8"), std::string(), contents, &cld_language, 135 std::string(),
136 &is_cld_reliable); 136 contents,
137 &cld_language,
138 &is_cld_reliable);
137 EXPECT_EQ("en", language); 139 EXPECT_EQ("en", language);
138 EXPECT_EQ("en", cld_language); 140 EXPECT_EQ("en", cld_language);
139 EXPECT_TRUE(is_cld_reliable); 141 EXPECT_TRUE(is_cld_reliable);
140 } 142 }
141 143
142 // Tests that the language meta tag providing wrong information is ignored 144 // Tests that the language meta tag providing wrong information is ignored
143 // because of valid html lang attribute. 145 // because of valid html lang attribute.
144 TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) { 146 TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) {
145 base::string16 contents = ASCIIToUTF16( 147 base::string16 contents = ASCIIToUTF16(
146 "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>" 148 "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>"
147 "</head><body>This is a page apparently written in English. Even though " 149 "</head><body>This is a page apparently written in English. Even though "
148 "content-language is provided, the value will be ignored if the value " 150 "content-language is provided, the value will be ignored if the value "
149 "is suspicious.</body></html>"); 151 "is suspicious.</body></html>");
150 std::string cld_language; 152 std::string cld_language;
151 bool is_cld_reliable; 153 bool is_cld_reliable;
152 std::string language = LanguageDetectionUtil::DeterminePageLanguage( 154 std::string language = translate::DeterminePageLanguage(std::string("ja"),
153 std::string("ja"), std::string("en"), contents, &cld_language, 155 std::string("en"),
154 &is_cld_reliable); 156 contents,
157 &cld_language,
158 &is_cld_reliable);
155 EXPECT_EQ("en", language); 159 EXPECT_EQ("en", language);
156 EXPECT_EQ("en", cld_language); 160 EXPECT_EQ("en", cld_language);
157 EXPECT_TRUE(is_cld_reliable); 161 EXPECT_TRUE(is_cld_reliable);
158 } 162 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698