Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1169)

Unified Diff: third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp

Issue 2655203002: Merge "Pass more hints to encoding detector." to M57 branch (Closed)
Patch Set: Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp ('k') | no next file » | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp
diff --git a/third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp b/third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp
index f768df9b1d6bc5c85f1e5e1891fac1b7cec7c728..1900b172b876617ce573f891f67f90b49bb727b3 100644
--- a/third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp
+++ b/third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp
@@ -16,7 +16,7 @@ TEST(TextResourceDecoderTest, RespectIso2022Jp) {
"$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\"";
WTF::TextEncoding encoding;
bool result = detectTextEncoding(iso2022jp.c_str(), iso2022jp.length(),
- nullptr, &encoding);
+ nullptr, nullptr, nullptr, &encoding);
EXPECT_TRUE(result);
EXPECT_EQ(WTF::TextEncoding("ISO-2022-JP"), encoding);
}
@@ -28,9 +28,49 @@ TEST(TextResourceDecoderTest, Ignore7BitEncoding) {
" ~{\x54\x42\x31\x7D\x37\x22\x55\x39\x35\x3D\x3D\x71~} abc";
WTF::TextEncoding encoding;
bool result = detectTextEncoding(hzGb2312.c_str(), hzGb2312.length(), nullptr,
- &encoding);
+ nullptr, nullptr, &encoding);
EXPECT_TRUE(result);
EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding);
}
+TEST(TextEncodingDetectorTest, UrlHintHelpsEUCJP) {
+ std::string eucjpBytes =
+ "<TITLE>"
+ "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA"
+ "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</"
+ "TITLE>";
+ WTF::TextEncoding encoding;
+ bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(),
+ nullptr, nullptr, nullptr, &encoding);
+ EXPECT_TRUE(result);
+ EXPECT_EQ(WTF::TextEncoding("GBK"), encoding)
+ << "Without language hint, it's detected as GBK";
+
+ result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr,
+ "http://example.co.jp/", nullptr, &encoding);
+ EXPECT_TRUE(result);
+ EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding)
+ << "With URL hint including '.jp', it's detected as EUC-JP";
+}
+
+TEST(TextEncodingDetectorTest, LanguageHintHelpsEUCJP) {
+ std::string eucjpBytes =
+ "<TITLE>"
+ "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA"
+ "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</"
+ "TITLE>";
+ WTF::TextEncoding encoding;
+ bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(),
+ nullptr, nullptr, nullptr, &encoding);
+ EXPECT_TRUE(result);
+ EXPECT_EQ(WTF::TextEncoding("GBK"), encoding)
+ << "Without language hint, it's detected as GBK";
+
+ result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr,
+ nullptr, "ja", &encoding);
+ EXPECT_TRUE(result);
+ EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding)
+ << "With language hint 'ja', it's detected as EUC-JP";
+}
+
} // namespace blink
« no previous file with comments | « third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698