Chromium Code Reviews| OLD | NEW |
|---|---|
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "platform/text/TextEncodingDetector.h" | 5 #include "platform/text/TextEncodingDetector.h" |
| 6 | 6 |
| 7 #include "testing/gtest/include/gtest/gtest.h" | 7 #include "testing/gtest/include/gtest/gtest.h" |
| 8 #include "wtf/text/TextEncoding.h" | 8 #include "wtf/text/TextEncoding.h" |
| 9 | 9 |
| 10 namespace blink { | 10 namespace blink { |
| 11 | 11 |
| 12 TEST(TextResourceDecoderTest, RespectIso2022Jp) { | 12 TEST(TextResourceDecoderTest, RespectIso2022Jp) { |
| 13 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard. | 13 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard. |
| 14 std::string iso2022jp = | 14 std::string iso2022jp = |
| 15 " \x1B" | 15 " \x1B" |
| 16 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\""; | 16 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\""; |
| 17 WTF::TextEncoding encoding; | 17 WTF::TextEncoding encoding; |
| 18 bool result = detectTextEncoding(iso2022jp.c_str(), iso2022jp.length(), | 18 bool result = detectTextEncoding(iso2022jp.c_str(), iso2022jp.length(), |
| 19 nullptr, &encoding); | 19 nullptr, String(), nullptr, &encoding); |
| 20 EXPECT_TRUE(result); | 20 EXPECT_TRUE(result); |
| 21 EXPECT_EQ(WTF::TextEncoding("ISO-2022-JP"), encoding); | 21 EXPECT_EQ(WTF::TextEncoding("ISO-2022-JP"), encoding); |
| 22 } | 22 } |
| 23 | 23 |
| 24 TEST(TextResourceDecoderTest, Ignore7BitEncoding) { | 24 TEST(TextResourceDecoderTest, Ignore7BitEncoding) { |
| 25 // 7-bit encodings except ISO-2022-JP are not supported by WHATWG. | 25 // 7-bit encodings except ISO-2022-JP are not supported by WHATWG. |
| 26 // They should be detected as plain text (US-ASCII). | 26 // They should be detected as plain text (US-ASCII). |
| 27 std::string hzGb2312 = | 27 std::string hzGb2312 = |
| 28 " ~{\x54\x42\x31\x7D\x37\x22\x55\x39\x35\x3D\x3D\x71~} abc"; | 28 " ~{\x54\x42\x31\x7D\x37\x22\x55\x39\x35\x3D\x3D\x71~} abc"; |
| 29 WTF::TextEncoding encoding; | 29 WTF::TextEncoding encoding; |
| 30 bool result = detectTextEncoding(hzGb2312.c_str(), hzGb2312.length(), nullptr, | 30 bool result = detectTextEncoding(hzGb2312.c_str(), hzGb2312.length(), nullptr, |
| 31 &encoding); | 31 String(), nullptr, &encoding); |
| 32 EXPECT_TRUE(result); | 32 EXPECT_TRUE(result); |
| 33 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding); | 33 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding); |
| 34 } | 34 } |
| 35 | 35 |
| 36 TEST(TextEncodingDetectorTest, UrlHintHelpsEUCJP) { | |
|
Jinsuk Kim
2017/01/20 21:50:47
Can you add another EXPECT to show that additional
tkent
2017/01/23 00:02:57
Done.
| |
| 37 std::string eucjpBytes = | |
| 38 "<TITLE>" | |
| 39 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA" | |
| 40 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</" | |
| 41 "TITLE>"; | |
| 42 WTF::TextEncoding encoding; | |
| 43 bool result = | |
| 44 detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr, | |
| 45 "http://example.co.jp/", nullptr, &encoding); | |
| 46 EXPECT_TRUE(result); | |
| 47 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding); | |
| 48 } | |
| 49 | |
| 50 TEST(TextEncodingDetectorTest, LanguageHintHelpsEUCJP) { | |
| 51 std::string eucjpBytes = | |
| 52 "<TITLE>" | |
| 53 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA" | |
| 54 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</" | |
| 55 "TITLE>"; | |
| 56 WTF::TextEncoding encoding; | |
| 57 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), | |
| 58 nullptr, String(), "ja", &encoding); | |
| 59 EXPECT_TRUE(result); | |
| 60 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding); | |
| 61 } | |
| 62 | |
| 36 } // namespace blink | 63 } // namespace blink |
| OLD | NEW |