OLD | NEW |
| (Empty) |
1 // Copyright 2013 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "chrome/common/translate/language_detection_util.h" | |
6 | |
7 #include "base/strings/string16.h" | |
8 #include "base/strings/utf_string_conversions.h" | |
9 #include "chrome/common/chrome_constants.h" | |
10 #include "testing/gtest/include/gtest/gtest.h" | |
11 | |
12 typedef testing::Test LanguageDetectionUtilTest; | |
13 | |
14 // Tests that well-known language code typos are fixed. | |
15 TEST_F(LanguageDetectionUtilTest, LanguageCodeTypoCorrection) { | |
16 std::string language; | |
17 | |
18 // Strip the second and later codes. | |
19 language = std::string("ja,en"); | |
20 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); | |
21 EXPECT_EQ("ja", language); | |
22 | |
23 // Replace dash with hyphen. | |
24 language = std::string("ja_JP"); | |
25 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); | |
26 EXPECT_EQ("ja-JP", language); | |
27 | |
28 // Correct wrong cases. | |
29 language = std::string("JA-jp"); | |
30 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); | |
31 EXPECT_EQ("ja-JP", language); | |
32 } | |
33 | |
34 // Tests if the language codes' format is invalid. | |
35 TEST_F(LanguageDetectionUtilTest, IsValidLanguageCode) { | |
36 std::string language; | |
37 | |
38 language = std::string("ja"); | |
39 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | |
40 | |
41 language = std::string("ja-JP"); | |
42 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | |
43 | |
44 language = std::string("ceb"); | |
45 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | |
46 | |
47 language = std::string("ceb-XX"); | |
48 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | |
49 | |
50 // Invalid because the sub code consists of a number. | |
51 language = std::string("utf-8"); | |
52 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); | |
53 | |
54 // Invalid because of six characters after hyphen. | |
55 language = std::string("ja-YUKARI"); | |
56 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); | |
57 | |
58 // Invalid because of four characters. | |
59 language = std::string("DHMO"); | |
60 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); | |
61 } | |
62 | |
63 // Tests that similar language table works. | |
64 TEST_F(LanguageDetectionUtilTest, SimilarLanguageCode) { | |
65 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "en")); | |
66 EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "ja")); | |
67 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hr")); | |
68 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("sr-ME", "sr")); | |
69 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("ne", "hi")); | |
70 EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hi")); | |
71 } | |
72 | |
73 // Tests that well-known languages which often have wrong server configuration | |
74 // are handles. | |
75 TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) { | |
76 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", "ja")); | |
77 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en-US", | |
78 "ja")); | |
79 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", | |
80 "zh-CN")); | |
81 EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("ja", | |
82 "en")); | |
83 EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", | |
84 "he")); | |
85 } | |
86 | |
87 // Tests that the language meta tag providing wrong information is ignored by | |
88 // LanguageDetectionUtil due to disagreement between meta tag and CLD. | |
89 TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) { | |
90 base::string16 contents = ASCIIToUTF16( | |
91 "<html><head><meta http-equiv='Content-Language' content='ja'></head>" | |
92 "<body>This is a page apparently written in English. Even though " | |
93 "content-language is provided, the value will be ignored if the value " | |
94 "is suspicious.</body></html>"); | |
95 std::string cld_language; | |
96 bool is_cld_reliable; | |
97 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | |
98 std::string("ja"), std::string(), contents, &cld_language, | |
99 &is_cld_reliable); | |
100 EXPECT_EQ(chrome::kUnknownLanguageCode, language); | |
101 EXPECT_EQ("en", cld_language); | |
102 EXPECT_TRUE(is_cld_reliable); | |
103 } | |
104 | |
105 // Tests that the language meta tag providing "en-US" style information is | |
106 // agreed by CLD. | |
107 TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) { | |
108 base::string16 contents = ASCIIToUTF16( | |
109 "<html><head><meta http-equiv='Content-Language' content='en-US'></head>" | |
110 "<body>This is a page apparently written in English. Even though " | |
111 "content-language is provided, the value will be ignored if the value " | |
112 "is suspicious.</body></html>"); | |
113 std::string cld_language; | |
114 bool is_cld_reliable; | |
115 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | |
116 std::string("en-US"), std::string(), contents, &cld_language, | |
117 &is_cld_reliable); | |
118 EXPECT_EQ("en-US", language); | |
119 EXPECT_EQ("en", cld_language); | |
120 EXPECT_TRUE(is_cld_reliable); | |
121 } | |
122 | |
123 // Tests that the language meta tag providing wrong information is ignored and | |
124 // CLD's language will be adopted by LanguageDetectionUtil due to an invalid | |
125 // meta tag. | |
126 TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) { | |
127 base::string16 contents = ASCIIToUTF16( | |
128 "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>" | |
129 "<body>This is a page apparently written in English. Even though " | |
130 "content-language is provided, the value will be ignored and CLD's" | |
131 " language will be adopted if the value is invalid.</body></html>"); | |
132 std::string cld_language; | |
133 bool is_cld_reliable; | |
134 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | |
135 std::string("utf-8"), std::string(), contents, &cld_language, | |
136 &is_cld_reliable); | |
137 EXPECT_EQ("en", language); | |
138 EXPECT_EQ("en", cld_language); | |
139 EXPECT_TRUE(is_cld_reliable); | |
140 } | |
141 | |
142 // Tests that the language meta tag providing wrong information is ignored | |
143 // because of valid html lang attribute. | |
144 TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) { | |
145 base::string16 contents = ASCIIToUTF16( | |
146 "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>" | |
147 "</head><body>This is a page apparently written in English. Even though " | |
148 "content-language is provided, the value will be ignored if the value " | |
149 "is suspicious.</body></html>"); | |
150 std::string cld_language; | |
151 bool is_cld_reliable; | |
152 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | |
153 std::string("ja"), std::string("en"), contents, &cld_language, | |
154 &is_cld_reliable); | |
155 EXPECT_EQ("en", language); | |
156 EXPECT_EQ("en", cld_language); | |
157 EXPECT_TRUE(is_cld_reliable); | |
158 } | |
OLD | NEW |