Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(1657)

Side by Side Diff: third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp

Issue 2648703003: Pass more hints to encoding detector. (Closed)
Patch Set: _ Created 3 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "platform/text/TextEncodingDetector.h" 5 #include "platform/text/TextEncodingDetector.h"
6 6
7 #include "testing/gtest/include/gtest/gtest.h" 7 #include "testing/gtest/include/gtest/gtest.h"
8 #include "wtf/text/TextEncoding.h" 8 #include "wtf/text/TextEncoding.h"
9 9
10 namespace blink { 10 namespace blink {
11 11
12 TEST(TextResourceDecoderTest, RespectIso2022Jp) { 12 TEST(TextResourceDecoderTest, RespectIso2022Jp) {
13 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard. 13 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard.
14 std::string iso2022jp = 14 std::string iso2022jp =
15 " \x1B" 15 " \x1B"
16 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\""; 16 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\"";
17 WTF::TextEncoding encoding; 17 WTF::TextEncoding encoding;
18 bool result = detectTextEncoding(iso2022jp.c_str(), iso2022jp.length(), 18 bool result = detectTextEncoding(iso2022jp.c_str(), iso2022jp.length(),
19 nullptr, &encoding); 19 nullptr, String(), nullptr, &encoding);
20 EXPECT_TRUE(result); 20 EXPECT_TRUE(result);
21 EXPECT_EQ(WTF::TextEncoding("ISO-2022-JP"), encoding); 21 EXPECT_EQ(WTF::TextEncoding("ISO-2022-JP"), encoding);
22 } 22 }
23 23
24 TEST(TextResourceDecoderTest, Ignore7BitEncoding) { 24 TEST(TextResourceDecoderTest, Ignore7BitEncoding) {
25 // 7-bit encodings except ISO-2022-JP are not supported by WHATWG. 25 // 7-bit encodings except ISO-2022-JP are not supported by WHATWG.
26 // They should be detected as plain text (US-ASCII). 26 // They should be detected as plain text (US-ASCII).
27 std::string hzGb2312 = 27 std::string hzGb2312 =
28 " ~{\x54\x42\x31\x7D\x37\x22\x55\x39\x35\x3D\x3D\x71~} abc"; 28 " ~{\x54\x42\x31\x7D\x37\x22\x55\x39\x35\x3D\x3D\x71~} abc";
29 WTF::TextEncoding encoding; 29 WTF::TextEncoding encoding;
30 bool result = detectTextEncoding(hzGb2312.c_str(), hzGb2312.length(), nullptr, 30 bool result = detectTextEncoding(hzGb2312.c_str(), hzGb2312.length(), nullptr,
31 &encoding); 31 String(), nullptr, &encoding);
32 EXPECT_TRUE(result); 32 EXPECT_TRUE(result);
33 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding); 33 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding);
34 } 34 }
35 35
36 TEST(TextEncodingDetectorTest, UrlHintHelpsEUCJP) {
Jinsuk Kim 2017/01/20 21:50:47 Can you add another EXPECT to show that additional
tkent 2017/01/23 00:02:57 Done.
37 std::string eucjpBytes =
38 "<TITLE>"
39 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA"
40 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</"
41 "TITLE>";
42 WTF::TextEncoding encoding;
43 bool result =
44 detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr,
45 "http://example.co.jp/", nullptr, &encoding);
46 EXPECT_TRUE(result);
47 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding);
48 }
49
50 TEST(TextEncodingDetectorTest, LanguageHintHelpsEUCJP) {
51 std::string eucjpBytes =
52 "<TITLE>"
53 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA"
54 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</"
55 "TITLE>";
56 WTF::TextEncoding encoding;
57 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(),
58 nullptr, String(), "ja", &encoding);
59 EXPECT_TRUE(result);
60 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding);
61 }
62
36 } // namespace blink 63 } // namespace blink
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698