Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1124)

Side by Side Diff: third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp

Issue 2655203002: Merge "Pass more hints to encoding detector." to M57 branch (Closed)
Patch Set: Created 3 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "platform/text/TextEncodingDetector.h" 5 #include "platform/text/TextEncodingDetector.h"
6 6
7 #include "testing/gtest/include/gtest/gtest.h" 7 #include "testing/gtest/include/gtest/gtest.h"
8 #include "wtf/text/TextEncoding.h" 8 #include "wtf/text/TextEncoding.h"
9 9
10 namespace blink { 10 namespace blink {
11 11
12 TEST(TextResourceDecoderTest, RespectIso2022Jp) { 12 TEST(TextResourceDecoderTest, RespectIso2022Jp) {
13 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard. 13 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard.
14 std::string iso2022jp = 14 std::string iso2022jp =
15 " \x1B" 15 " \x1B"
16 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\""; 16 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\"";
17 WTF::TextEncoding encoding; 17 WTF::TextEncoding encoding;
18 bool result = detectTextEncoding(iso2022jp.c_str(), iso2022jp.length(), 18 bool result = detectTextEncoding(iso2022jp.c_str(), iso2022jp.length(),
19 nullptr, &encoding); 19 nullptr, nullptr, nullptr, &encoding);
20 EXPECT_TRUE(result); 20 EXPECT_TRUE(result);
21 EXPECT_EQ(WTF::TextEncoding("ISO-2022-JP"), encoding); 21 EXPECT_EQ(WTF::TextEncoding("ISO-2022-JP"), encoding);
22 } 22 }
23 23
24 TEST(TextResourceDecoderTest, Ignore7BitEncoding) { 24 TEST(TextResourceDecoderTest, Ignore7BitEncoding) {
25 // 7-bit encodings except ISO-2022-JP are not supported by WHATWG. 25 // 7-bit encodings except ISO-2022-JP are not supported by WHATWG.
26 // They should be detected as plain text (US-ASCII). 26 // They should be detected as plain text (US-ASCII).
27 std::string hzGb2312 = 27 std::string hzGb2312 =
28 " ~{\x54\x42\x31\x7D\x37\x22\x55\x39\x35\x3D\x3D\x71~} abc"; 28 " ~{\x54\x42\x31\x7D\x37\x22\x55\x39\x35\x3D\x3D\x71~} abc";
29 WTF::TextEncoding encoding; 29 WTF::TextEncoding encoding;
30 bool result = detectTextEncoding(hzGb2312.c_str(), hzGb2312.length(), nullptr, 30 bool result = detectTextEncoding(hzGb2312.c_str(), hzGb2312.length(), nullptr,
31 &encoding); 31 nullptr, nullptr, &encoding);
32 EXPECT_TRUE(result); 32 EXPECT_TRUE(result);
33 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding); 33 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding);
34 } 34 }
35 35
36 TEST(TextEncodingDetectorTest, UrlHintHelpsEUCJP) {
37 std::string eucjpBytes =
38 "<TITLE>"
39 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA"
40 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</"
41 "TITLE>";
42 WTF::TextEncoding encoding;
43 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(),
44 nullptr, nullptr, nullptr, &encoding);
45 EXPECT_TRUE(result);
46 EXPECT_EQ(WTF::TextEncoding("GBK"), encoding)
47 << "Without language hint, it's detected as GBK";
48
49 result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr,
50 "http://example.co.jp/", nullptr, &encoding);
51 EXPECT_TRUE(result);
52 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding)
53 << "With URL hint including '.jp', it's detected as EUC-JP";
54 }
55
56 TEST(TextEncodingDetectorTest, LanguageHintHelpsEUCJP) {
57 std::string eucjpBytes =
58 "<TITLE>"
59 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA"
60 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</"
61 "TITLE>";
62 WTF::TextEncoding encoding;
63 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(),
64 nullptr, nullptr, nullptr, &encoding);
65 EXPECT_TRUE(result);
66 EXPECT_EQ(WTF::TextEncoding("GBK"), encoding)
67 << "Without language hint, it's detected as GBK";
68
69 result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr,
70 nullptr, "ja", &encoding);
71 EXPECT_TRUE(result);
72 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding)
73 << "With language hint 'ja', it's detected as EUC-JP";
74 }
75
36 } // namespace blink 76 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698