| OLD | NEW |
| 1 // Copyright 2013 The Chromium Authors. All rights reserved. | 1 // Copyright 2013 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "chrome/common/translate/language_detection_util.h" | 5 #include "components/translate/language_detection/language_detection_util.h" |
| 6 | 6 |
| 7 #include "base/strings/string16.h" | 7 #include "base/strings/string16.h" |
| 8 #include "base/strings/utf_string_conversions.h" | 8 #include "base/strings/utf_string_conversions.h" |
| 9 #include "chrome/common/chrome_constants.h" | 9 #include "components/translate/common/translate_constants.h" |
| 10 #include "testing/gtest/include/gtest/gtest.h" | 10 #include "testing/gtest/include/gtest/gtest.h" |
| 11 | 11 |
| 12 typedef testing::Test LanguageDetectionUtilTest; | 12 typedef testing::Test LanguageDetectionUtilTest; |
| 13 | 13 |
| 14 // Tests that well-known language code typos are fixed. | 14 // Tests that well-known language code typos are fixed. |
| 15 TEST_F(LanguageDetectionUtilTest, LanguageCodeTypoCorrection) { | 15 TEST_F(LanguageDetectionUtilTest, LanguageCodeTypoCorrection) { |
| 16 std::string language; | 16 std::string language; |
| 17 | 17 |
| 18 // Strip the second and later codes. | 18 // Strip the second and later codes. |
| 19 language = std::string("ja,en"); | 19 language = std::string("ja,en"); |
| 20 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); | 20 translate::CorrectLanguageCodeTypo(&language); |
| 21 EXPECT_EQ("ja", language); | 21 EXPECT_EQ("ja", language); |
| 22 | 22 |
| 23 // Replace dash with hyphen. | 23 // Replace dash with hyphen. |
| 24 language = std::string("ja_JP"); | 24 language = std::string("ja_JP"); |
| 25 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); | 25 translate::CorrectLanguageCodeTypo(&language); |
| 26 EXPECT_EQ("ja-JP", language); | 26 EXPECT_EQ("ja-JP", language); |
| 27 | 27 |
| 28 // Correct wrong cases. | 28 // Correct wrong cases. |
| 29 language = std::string("JA-jp"); | 29 language = std::string("JA-jp"); |
| 30 LanguageDetectionUtil::CorrectLanguageCodeTypo(&language); | 30 translate::CorrectLanguageCodeTypo(&language); |
| 31 EXPECT_EQ("ja-JP", language); | 31 EXPECT_EQ("ja-JP", language); |
| 32 } | 32 } |
| 33 | 33 |
| 34 // Tests if the language codes' format is invalid. | 34 // Tests if the language codes' format is invalid. |
| 35 TEST_F(LanguageDetectionUtilTest, IsValidLanguageCode) { | 35 TEST_F(LanguageDetectionUtilTest, IsValidLanguageCode) { |
| 36 std::string language; | 36 std::string language; |
| 37 | 37 |
| 38 language = std::string("ja"); | 38 language = std::string("ja"); |
| 39 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 39 EXPECT_TRUE(translate::IsValidLanguageCode(language)); |
| 40 | 40 |
| 41 language = std::string("ja-JP"); | 41 language = std::string("ja-JP"); |
| 42 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 42 EXPECT_TRUE(translate::IsValidLanguageCode(language)); |
| 43 | 43 |
| 44 language = std::string("ceb"); | 44 language = std::string("ceb"); |
| 45 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 45 EXPECT_TRUE(translate::IsValidLanguageCode(language)); |
| 46 | 46 |
| 47 language = std::string("ceb-XX"); | 47 language = std::string("ceb-XX"); |
| 48 EXPECT_TRUE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 48 EXPECT_TRUE(translate::IsValidLanguageCode(language)); |
| 49 | 49 |
| 50 // Invalid because the sub code consists of a number. | 50 // Invalid because the sub code consists of a number. |
| 51 language = std::string("utf-8"); | 51 language = std::string("utf-8"); |
| 52 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 52 EXPECT_FALSE(translate::IsValidLanguageCode(language)); |
| 53 | 53 |
| 54 // Invalid because of six characters after hyphen. | 54 // Invalid because of six characters after hyphen. |
| 55 language = std::string("ja-YUKARI"); | 55 language = std::string("ja-YUKARI"); |
| 56 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 56 EXPECT_FALSE(translate::IsValidLanguageCode(language)); |
| 57 | 57 |
| 58 // Invalid because of four characters. | 58 // Invalid because of four characters. |
| 59 language = std::string("DHMO"); | 59 language = std::string("DHMO"); |
| 60 EXPECT_FALSE(LanguageDetectionUtil::IsValidLanguageCode(language)); | 60 EXPECT_FALSE(translate::IsValidLanguageCode(language)); |
| 61 } | 61 } |
| 62 | 62 |
| 63 // Tests that similar language table works. | 63 // Tests that similar language table works. |
| 64 TEST_F(LanguageDetectionUtilTest, SimilarLanguageCode) { | 64 TEST_F(LanguageDetectionUtilTest, SimilarLanguageCode) { |
| 65 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "en")); | 65 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("en", "en")); |
| 66 EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("en", "ja")); | 66 EXPECT_FALSE(translate::IsSameOrSimilarLanguages("en", "ja")); |
| 67 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hr")); | 67 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("bs", "hr")); |
| 68 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("sr-ME", "sr")); | 68 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("sr-ME", "sr")); |
| 69 EXPECT_TRUE(LanguageDetectionUtil::IsSameOrSimilarLanguages("ne", "hi")); | 69 EXPECT_TRUE(translate::IsSameOrSimilarLanguages("ne", "hi")); |
| 70 EXPECT_FALSE(LanguageDetectionUtil::IsSameOrSimilarLanguages("bs", "hi")); | 70 EXPECT_FALSE(translate::IsSameOrSimilarLanguages("bs", "hi")); |
| 71 } | 71 } |
| 72 | 72 |
| 73 // Tests that well-known languages which often have wrong server configuration | 73 // Tests that well-known languages which often have wrong server configuration |
| 74 // are handles. | 74 // are handles. |
| 75 TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) { | 75 TEST_F(LanguageDetectionUtilTest, WellKnownWrongConfiguration) { |
| 76 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", "ja")); | 76 EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en", "ja")); |
| 77 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en-US", | 77 EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en-US", "ja")); |
| 78 "ja")); | 78 EXPECT_TRUE(translate::MaybeServerWrongConfiguration("en", "zh-CN")); |
| 79 EXPECT_TRUE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", | 79 EXPECT_FALSE(translate::MaybeServerWrongConfiguration("ja", "en")); |
| 80 "zh-CN")); | 80 EXPECT_FALSE(translate::MaybeServerWrongConfiguration("en", "he")); |
| 81 EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("ja", | |
| 82 "en")); | |
| 83 EXPECT_FALSE(LanguageDetectionUtil::MaybeServerWrongConfiguration("en", | |
| 84 "he")); | |
| 85 } | 81 } |
| 86 | 82 |
| 87 // Tests that the language meta tag providing wrong information is ignored by | 83 // Tests that the language meta tag providing wrong information is ignored by |
| 88 // LanguageDetectionUtil due to disagreement between meta tag and CLD. | 84 // LanguageDetectionUtil due to disagreement between meta tag and CLD. |
| 89 TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) { | 85 TEST_F(LanguageDetectionUtilTest, CLDDisagreeWithWrongLanguageCode) { |
| 90 base::string16 contents = ASCIIToUTF16( | 86 base::string16 contents = ASCIIToUTF16( |
| 91 "<html><head><meta http-equiv='Content-Language' content='ja'></head>" | 87 "<html><head><meta http-equiv='Content-Language' content='ja'></head>" |
| 92 "<body>This is a page apparently written in English. Even though " | 88 "<body>This is a page apparently written in English. Even though " |
| 93 "content-language is provided, the value will be ignored if the value " | 89 "content-language is provided, the value will be ignored if the value " |
| 94 "is suspicious.</body></html>"); | 90 "is suspicious.</body></html>"); |
| 95 std::string cld_language; | 91 std::string cld_language; |
| 96 bool is_cld_reliable; | 92 bool is_cld_reliable; |
| 97 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | 93 std::string language = translate::DeterminePageLanguage(std::string("ja"), |
| 98 std::string("ja"), std::string(), contents, &cld_language, | 94 std::string(), |
| 99 &is_cld_reliable); | 95 contents, |
| 100 EXPECT_EQ(chrome::kUnknownLanguageCode, language); | 96 &cld_language, |
| 97 &is_cld_reliable); |
| 98 EXPECT_EQ(translate::kUnknownLanguageCode, language); |
| 101 EXPECT_EQ("en", cld_language); | 99 EXPECT_EQ("en", cld_language); |
| 102 EXPECT_TRUE(is_cld_reliable); | 100 EXPECT_TRUE(is_cld_reliable); |
| 103 } | 101 } |
| 104 | 102 |
| 105 // Tests that the language meta tag providing "en-US" style information is | 103 // Tests that the language meta tag providing "en-US" style information is |
| 106 // agreed by CLD. | 104 // agreed by CLD. |
| 107 TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) { | 105 TEST_F(LanguageDetectionUtilTest, CLDAgreeWithLanguageCodeHavingCountryCode) { |
| 108 base::string16 contents = ASCIIToUTF16( | 106 base::string16 contents = ASCIIToUTF16( |
| 109 "<html><head><meta http-equiv='Content-Language' content='en-US'></head>" | 107 "<html><head><meta http-equiv='Content-Language' content='en-US'></head>" |
| 110 "<body>This is a page apparently written in English. Even though " | 108 "<body>This is a page apparently written in English. Even though " |
| 111 "content-language is provided, the value will be ignored if the value " | 109 "content-language is provided, the value will be ignored if the value " |
| 112 "is suspicious.</body></html>"); | 110 "is suspicious.</body></html>"); |
| 113 std::string cld_language; | 111 std::string cld_language; |
| 114 bool is_cld_reliable; | 112 bool is_cld_reliable; |
| 115 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | 113 std::string language = translate::DeterminePageLanguage(std::string("en-US"), |
| 116 std::string("en-US"), std::string(), contents, &cld_language, | 114 std::string(), |
| 117 &is_cld_reliable); | 115 contents, |
| 116 &cld_language, |
| 117 &is_cld_reliable); |
| 118 EXPECT_EQ("en-US", language); | 118 EXPECT_EQ("en-US", language); |
| 119 EXPECT_EQ("en", cld_language); | 119 EXPECT_EQ("en", cld_language); |
| 120 EXPECT_TRUE(is_cld_reliable); | 120 EXPECT_TRUE(is_cld_reliable); |
| 121 } | 121 } |
| 122 | 122 |
| 123 // Tests that the language meta tag providing wrong information is ignored and | 123 // Tests that the language meta tag providing wrong information is ignored and |
| 124 // CLD's language will be adopted by LanguageDetectionUtil due to an invalid | 124 // CLD's language will be adopted by LanguageDetectionUtil due to an invalid |
| 125 // meta tag. | 125 // meta tag. |
| 126 TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) { | 126 TEST_F(LanguageDetectionUtilTest, InvalidLanguageMetaTagProviding) { |
| 127 base::string16 contents = ASCIIToUTF16( | 127 base::string16 contents = ASCIIToUTF16( |
| 128 "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>" | 128 "<html><head><meta http-equiv='Content-Language' content='utf-8'></head>" |
| 129 "<body>This is a page apparently written in English. Even though " | 129 "<body>This is a page apparently written in English. Even though " |
| 130 "content-language is provided, the value will be ignored and CLD's" | 130 "content-language is provided, the value will be ignored and CLD's" |
| 131 " language will be adopted if the value is invalid.</body></html>"); | 131 " language will be adopted if the value is invalid.</body></html>"); |
| 132 std::string cld_language; | 132 std::string cld_language; |
| 133 bool is_cld_reliable; | 133 bool is_cld_reliable; |
| 134 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | 134 std::string language = translate::DeterminePageLanguage(std::string("utf-8"), |
| 135 std::string("utf-8"), std::string(), contents, &cld_language, | 135 std::string(), |
| 136 &is_cld_reliable); | 136 contents, |
| 137 &cld_language, |
| 138 &is_cld_reliable); |
| 137 EXPECT_EQ("en", language); | 139 EXPECT_EQ("en", language); |
| 138 EXPECT_EQ("en", cld_language); | 140 EXPECT_EQ("en", cld_language); |
| 139 EXPECT_TRUE(is_cld_reliable); | 141 EXPECT_TRUE(is_cld_reliable); |
| 140 } | 142 } |
| 141 | 143 |
| 142 // Tests that the language meta tag providing wrong information is ignored | 144 // Tests that the language meta tag providing wrong information is ignored |
| 143 // because of valid html lang attribute. | 145 // because of valid html lang attribute. |
| 144 TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) { | 146 TEST_F(LanguageDetectionUtilTest, AdoptHtmlLang) { |
| 145 base::string16 contents = ASCIIToUTF16( | 147 base::string16 contents = ASCIIToUTF16( |
| 146 "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>" | 148 "<html lang='en'><head><meta http-equiv='Content-Language' content='ja'>" |
| 147 "</head><body>This is a page apparently written in English. Even though " | 149 "</head><body>This is a page apparently written in English. Even though " |
| 148 "content-language is provided, the value will be ignored if the value " | 150 "content-language is provided, the value will be ignored if the value " |
| 149 "is suspicious.</body></html>"); | 151 "is suspicious.</body></html>"); |
| 150 std::string cld_language; | 152 std::string cld_language; |
| 151 bool is_cld_reliable; | 153 bool is_cld_reliable; |
| 152 std::string language = LanguageDetectionUtil::DeterminePageLanguage( | 154 std::string language = translate::DeterminePageLanguage(std::string("ja"), |
| 153 std::string("ja"), std::string("en"), contents, &cld_language, | 155 std::string("en"), |
| 154 &is_cld_reliable); | 156 contents, |
| 157 &cld_language, |
| 158 &is_cld_reliable); |
| 155 EXPECT_EQ("en", language); | 159 EXPECT_EQ("en", language); |
| 156 EXPECT_EQ("en", cld_language); | 160 EXPECT_EQ("en", cld_language); |
| 157 EXPECT_TRUE(is_cld_reliable); | 161 EXPECT_TRUE(is_cld_reliable); |
| 158 } | 162 } |
| OLD | NEW |