OLD | NEW |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 #include "chrome/common/translate/language_detection_util.h" | 5 #include "components/translate/language_detection/language_detection_util.h" |
6 | 6 |
7 #include "base/strings/string16.h" | 7 #include "base/strings/string16.h" |
8 #include "base/strings/utf_string_conversions.h" | 8 #include "base/strings/utf_string_conversions.h" |
9 #include "chrome/common/chrome_constants.h" | 9 #include "components/translate/common/translate_constants.h" |
10 #include "testing/gtest/include/gtest/gtest.h" | 10 #include "testing/gtest/include/gtest/gtest.h" |
11 | 11 |
12 typedef testing::Test LanguageDetectionUtilTest; | 12 typedef testing::Test LanguageDetectionUtilTest; |
13 | 13 |
14 // Tests that well-known language code typos are fixed. | 14 // Tests that well-known language code typos are fixed. |
15 TEST_F(LanguageDetectionUtilTest, LanguageCodeTypoCorrection) { | 15 TEST_F(LanguageDetectionUtilTest, LanguageCodeTypoCorrection) { |
16 std::string language; | 16 std::string language; |
17 | 17 |
18 // Strip the second and later codes. | 18 // Strip the second and later codes. |
19 language = std::string("ja,en"); | 19 language = std::string("ja,en"); |
20 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); | 20 translate::CorrectLanguageCodeTypo(&language); |
21 EXPECT_EQ("ja", language); | 21 EXPECT_EQ("ja", language); |
22 | 22 |
23 // Replace dash with hyphen. | 23 // Replace dash with hyphen. |
24 language = std::string("ja_JP"); | 24 language = std::string("ja_JP"); |
25 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); | 25 translate::CorrectLanguageCodeTypo(&language); |
26 EXPECT_EQ("ja-JP", language); | 26 EXPECT_EQ("ja-JP", language); |
27 | 27 |
28 // Correct wrong cases. | 28 // Correct wrong cases. |
29 language = std::string("JA-jp"); | 29 language = std::string("JA-jp"); |
30 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); | 30 translate::CorrectLanguageCodeTypo(&language); |
31 EXPECT_EQ("ja-JP", language); | 31 EXPECT_EQ("ja-JP", language); |
32 } | 32 } |
33 | 33 |
34 // Tests if the language codes' format is invalid. | 34 // Tests if the language codes' format is invalid. |
35 TEST_F(LanguageDetectionUtilTest, IsValidLanguageCode) { | 35 TEST_F(LanguageDetectionUtilTest, IsValidLanguageCode) { |
36 std::string language; | 36 std::string language; |
37 | 37 |
38 language = std::string("ja"); | 38 language = std::string("ja"); |
39 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 39 EXPECT_TRUE(translate::IsValidLanguageCode(language)); |
40 | 40 |
41 language = std::string("ja-JP"); | 41 language = std::string("ja-JP"); |
42 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 42 EXPECT_TRUE(translate::IsValidLanguageCode(language)); |
43 | 43 |
44 language = std::string("ceb"); | 44 language = std::string("ceb"); |
45 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 45 EXPECT_TRUE(translate::IsValidLanguageCode(language)); |
46 | 46 |
47 language = std::string("ceb-XX"); | 47 language = std::string("ceb-XX"); |
48 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 48 EXPECT_TRUE(translate::IsValidLanguageCode(language)); |
49 | 49 |
50 // Invalid because the sub code consists of a number. | 50 // Invalid because the sub code consists of a number. |
51 language = std::string("utf-8"); | 51 language = std::string("utf-8"); |
52 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 52 EXPECT_FALSE(translate::IsValidLanguageCode(language)); |
53 | 53 |
54 // Invalid because of six characters after hyphen. | 54 // Invalid because of six characters after hyphen. |
55 language = std::string("ja-YUKARI"); | 55 language = std::string("ja-YUKARI"); |
56 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 56 EXPECT_FALSE(translate::IsValidLanguageCode(language)); |
57 | 57 |
58 // Invalid because of four characters. | 58 // Invalid because of four characters. |
59 language = std::string("DHMO"); | 59 language = std::string("DHMO"); |
60 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 60 EXPECT_FALSE(translate::IsValidLanguageCode(language)); |
61 } | 61 } |
62 | 62 |
63 // Tests that similar language table works. | 63 // Tests that similar language table works. |
64 TEST_F(LanguageDetectionUtilTest, SimilarLanguageCode) { | 64 TEST_F(LanguageDetectionUtilTest, SimilarLanguageCode) { |
65 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "en")); | 65 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("en", "en")); |
66 EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "ja")); | 66 EXPECT_FALSE(translate::IsSameOrSimilarLanguages("en", "ja")); |
67 | 67 |
68 // Language codes are same if the main parts are same. The synonyms should be | 68 // Language codes are same if the main parts are same. The synonyms should be |
69 // took into account (ex: 'iw' and 'he'). | 69 // took into account (ex: 'iw' and 'he'). |
70 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("sr-ME", "sr")); | 70 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("sr-ME", "sr")); |
71 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("sr", "sr-ME")); | 71 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("sr", "sr-ME")); |
72 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("he", "he-IL")); | 72 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("he", "he-IL")); |
73 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("eng", "eng-US")); | 73 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("eng", "eng-US")); |
74 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("eng-US", "eng")); | 74 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("eng-US", "eng")); |
75 EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("eng", "enm")); | 75 EXPECT_FALSE(translate::IsSameOrSimilarLanguages("eng", "enm")); |
76 | 76 |
77 // Even though the main parts are different, some special language pairs are | 77 // Even though the main parts are different, some special language pairs are |
78 // recognized as same languages. | 78 // recognized as same languages. |
79 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hr")); | 79 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("bs", "hr")); |
80 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("ne", "hi")); | 80 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("ne", "hi")); |
81 EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hi")); | 81 EXPECT_FALSE(translate::IsSameOrSimilarLanguages("bs", "hi")); |
82 } | 82 } |
83 | 83 |
84 // Tests that well-known languages which often have wrong server configuration | 84 // Tests that well-known languages which often have wrong server configuration |
85 // are handles. | 85 // are handles. |
86 TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) { | 86 TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) { |
87 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", "ja")); | 87 EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en", "ja")); |
88 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en-US", | 88 EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en-US", "ja")); |
89 "ja")); | 89 EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en", "zh-CN")); |
90 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", | 90 EXPECT_FALSE(translate::MaybeServerWrongConfiguration("ja", "en")); |
91 "zh-CN")); | 91 EXPECT_FALSE(translate::MaybeServerWrongConfiguration("en", "he")); |
92 EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("ja", | |
93 "en")); | |
94 EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", | |
95 "he")); | |
96 } | 92 } |
97 | 93 |
98 // Tests that the language meta tag providing wrong information is ignored by | 94 // Tests that the language meta tag providing wrong information is ignored by |
99 // LanguageDetectionUtil due to disagreement between meta tag and CLD. | 95 // LanguageDetectionUtil due to disagreement between meta tag and CLD. |
100 TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) { | 96 TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) { |
101 base::string16 contents = ASCIIToUTF16( | 97 base::string16 contents = ASCIIToUTF16( |
102 "<html><head><meta http-equiv='Content-Language' content='ja'></head>" | 98 "<html><head><meta http-equiv='Content-Language' content='ja'></head>" |
103 "<body>This is a page apparently written in English. Even though " | 99 "<body>This is a page apparently written in English. Even though " |
104 "content-language is provided, the value will be ignored if the value " | 100 "content-language is provided, the value will be ignored if the value " |
105 "is suspicious.</body></html>"); | 101 "is suspicious.</body></html>"); |
106 std::string cld_language; | 102 std::string cld_language; |
107 bool is_cld_reliable; | 103 bool is_cld_reliable; |
108 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | 104 std::string language = translate::DeterminePageLanguage(std::string("ja"), |
109 std::string("ja"), std::string(), contents, &cld_language, | 105 std::string(), |
110 &is_cld_reliable); | 106 contents, |
111 EXPECT_EQ(chrome::kUnknownLanguageCode, language); | 107 &cld_language, |
| 108 &is_cld_reliable); |
| 109 EXPECT_EQ(translate::kUnknownLanguageCode, language); |
112 EXPECT_EQ("en", cld_language); | 110 EXPECT_EQ("en", cld_language); |
113 EXPECT_TRUE(is_cld_reliable); | 111 EXPECT_TRUE(is_cld_reliable); |
114 } | 112 } |
115 | 113 |
116 // Tests that the language meta tag providing "en-US" style information is | 114 // Tests that the language meta tag providing "en-US" style information is |
117 // agreed by CLD. | 115 // agreed by CLD. |
118 TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) { | 116 TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) { |
119 base::string16 contents = ASCIIToUTF16( | 117 base::string16 contents = ASCIIToUTF16( |
120 "<html><head><meta http-equiv='Content-Language' content='en-US'></head>" | 118 "<html><head><meta http-equiv='Content-Language' content='en-US'></head>" |
121 "<body>This is a page apparently written in English. Even though " | 119 "<body>This is a page apparently written in English. Even though " |
122 "content-language is provided, the value will be ignored if the value " | 120 "content-language is provided, the value will be ignored if the value " |
123 "is suspicious.</body></html>"); | 121 "is suspicious.</body></html>"); |
124 std::string cld_language; | 122 std::string cld_language; |
125 bool is_cld_reliable; | 123 bool is_cld_reliable; |
126 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | 124 std::string language = translate::DeterminePageLanguage(std::string("en-US"), |
127 std::string("en-US"), std::string(), contents, &cld_language, | 125 std::string(), |
128 &is_cld_reliable); | 126 contents, |
| 127 &cld_language, |
| 128 &is_cld_reliable); |
129 EXPECT_EQ("en-US", language); | 129 EXPECT_EQ("en-US", language); |
130 EXPECT_EQ("en", cld_language); | 130 EXPECT_EQ("en", cld_language); |
131 EXPECT_TRUE(is_cld_reliable); | 131 EXPECT_TRUE(is_cld_reliable); |
132 } | 132 } |
133 | 133 |
134 // Tests that the language meta tag providing wrong information is ignored and | 134 // Tests that the language meta tag providing wrong information is ignored and |
135 // CLD's language will be adopted by LanguageDetectionUtil due to an invalid | 135 // CLD's language will be adopted by LanguageDetectionUtil due to an invalid |
136 // meta tag. | 136 // meta tag. |
137 TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) { | 137 TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) { |
138 base::string16 contents = ASCIIToUTF16( | 138 base::string16 contents = ASCIIToUTF16( |
139 "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>" | 139 "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>" |
140 "<body>This is a page apparently written in English. Even though " | 140 "<body>This is a page apparently written in English. Even though " |
141 "content-language is provided, the value will be ignored and CLD's" | 141 "content-language is provided, the value will be ignored and CLD's" |
142 " language will be adopted if the value is invalid.</body></html>"); | 142 " language will be adopted if the value is invalid.</body></html>"); |
143 std::string cld_language; | 143 std::string cld_language; |
144 bool is_cld_reliable; | 144 bool is_cld_reliable; |
145 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | 145 std::string language = translate::DeterminePageLanguage(std::string("utf-8"), |
146 std::string("utf-8"), std::string(), contents, &cld_language, | 146 std::string(), |
147 &is_cld_reliable); | 147 contents, |
| 148 &cld_language, |
| 149 &is_cld_reliable); |
148 EXPECT_EQ("en", language); | 150 EXPECT_EQ("en", language); |
149 EXPECT_EQ("en", cld_language); | 151 EXPECT_EQ("en", cld_language); |
150 EXPECT_TRUE(is_cld_reliable); | 152 EXPECT_TRUE(is_cld_reliable); |
151 } | 153 } |
152 | 154 |
153 // Tests that the language meta tag providing wrong information is ignored | 155 // Tests that the language meta tag providing wrong information is ignored |
154 // because of valid html lang attribute. | 156 // because of valid html lang attribute. |
155 TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) { | 157 TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) { |
156 base::string16 contents = ASCIIToUTF16( | 158 base::string16 contents = ASCIIToUTF16( |
157 "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>" | 159 "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>" |
158 "</head><body>This is a page apparently written in English. Even though " | 160 "</head><body>This is a page apparently written in English. Even though " |
159 "content-language is provided, the value will be ignored if the value " | 161 "content-language is provided, the value will be ignored if the value " |
160 "is suspicious.</body></html>"); | 162 "is suspicious.</body></html>"); |
161 std::string cld_language; | 163 std::string cld_language; |
162 bool is_cld_reliable; | 164 bool is_cld_reliable; |
163 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | 165 std::string language = translate::DeterminePageLanguage(std::string("ja"), |
164 std::string("ja"), std::string("en"), contents, &cld_language, | 166 std::string("en"), |
165 &is_cld_reliable); | 167 contents, |
| 168 &cld_language, |
| 169 &is_cld_reliable); |
166 EXPECT_EQ("en", language); | 170 EXPECT_EQ("en", language); |
167 EXPECT_EQ("en", cld_language); | 171 EXPECT_EQ("en", cld_language); |
168 EXPECT_TRUE(is_cld_reliable); | 172 EXPECT_TRUE(is_cld_reliable); |
169 } | 173 } |
OLD | NEW |