Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(613)

Side by Side Diff: third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp

Issue 2737033003: Convert non-WHATWG text encoding to ASCII (Closed)
Patch Set: comment Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "platform/text/TextEncodingDetector.h" 5 #include "platform/text/TextEncodingDetector.h"
6 6
7 #include "testing/gtest/include/gtest/gtest.h" 7 #include "testing/gtest/include/gtest/gtest.h"
8 #include "wtf/text/TextEncoding.h" 8 #include "wtf/text/TextEncoding.h"
9 9
10 namespace blink { 10 namespace blink {
11 11
12 TEST(TextResourceDecoderTest, RespectIso2022Jp) { 12 TEST(TextEncodingDetectorTest, RespectIso2022Jp) {
13 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard. 13 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard.
14 std::string iso2022jp = 14 std::string iso2022jp =
15 " \x1B" 15 " \x1B"
16 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\""; 16 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\"";
17 WTF::TextEncoding encoding; 17 WTF::TextEncoding encoding;
18 bool result = detectTextEncoding(iso2022jp.c_str(), iso2022jp.length(), 18 bool result = detectTextEncoding(iso2022jp.c_str(), iso2022jp.length(),
19 nullptr, nullptr, nullptr, &encoding); 19 nullptr, nullptr, nullptr, &encoding);
20 EXPECT_TRUE(result); 20 EXPECT_TRUE(result);
21 EXPECT_EQ(WTF::TextEncoding("ISO-2022-JP"), encoding); 21 EXPECT_EQ(WTF::TextEncoding("ISO-2022-JP"), encoding);
22 } 22 }
23 23
24 TEST(TextResourceDecoderTest, Ignore7BitEncoding) { 24 TEST(TextEncodingDetectorTest, Ignore7BitEncoding) {
25 // 7-bit encodings except ISO-2022-JP are not supported by WHATWG. 25 // 7-bit encodings except ISO-2022-JP are not supported by WHATWG.
26 // They should be detected as plain text (US-ASCII). 26 // They should be detected as plain text (US-ASCII).
27 std::string hzGb2312 = 27 std::string hzGb2312 =
28 " ~{\x54\x42\x31\x7D\x37\x22\x55\x39\x35\x3D\x3D\x71~} abc"; 28 " ~{\x54\x42\x31\x7D\x37\x22\x55\x39\x35\x3D\x3D\x71~} abc";
29 WTF::TextEncoding encoding; 29 WTF::TextEncoding encoding;
30 bool result = detectTextEncoding(hzGb2312.c_str(), hzGb2312.length(), nullptr, 30 bool result = detectTextEncoding(hzGb2312.c_str(), hzGb2312.length(), nullptr,
31 nullptr, nullptr, &encoding); 31 nullptr, nullptr, &encoding);
32 EXPECT_TRUE(result); 32 EXPECT_TRUE(result);
33 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding); 33 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding);
34 } 34 }
35 35
36 TEST(TextEncodingDetectorTest, NonWHATWGEncodingBecomesAscii) {
37 std::string pseudoJpg =
38 "\xff\xd8\xff\xe0\x00\x10JFIF foo bar baz\xff\xe1\x00\xa5"
39 "\x01\xd7\xff\x01\x57\x33\x44\x55\x66\x77\xed\xcb\xa9\x87"
40 "\xff\xd7\xff\xe0\x00\x10JFIF foo bar baz\xff\xe1\x00\xa5"
41 "\x87\x01\xd7\xff\x01\x57\x33\x44\x55\x66\x77\xed\xcb\xa9";
42 WTF::TextEncoding encoding;
43 bool result = detectTextEncoding(pseudoJpg.c_str(), pseudoJpg.length(),
44 nullptr, nullptr, nullptr, &encoding);
45 EXPECT_TRUE(result);
46 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding);
47 }
48
36 TEST(TextEncodingDetectorTest, UrlHintHelpsEUCJP) { 49 TEST(TextEncodingDetectorTest, UrlHintHelpsEUCJP) {
37 std::string eucjpBytes = 50 std::string eucjpBytes =
38 "<TITLE>" 51 "<TITLE>"
39 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA" 52 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA"
40 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</" 53 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</"
41 "TITLE>"; 54 "TITLE>";
42 WTF::TextEncoding encoding; 55 WTF::TextEncoding encoding;
43 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), 56 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(),
44 nullptr, nullptr, nullptr, &encoding); 57 nullptr, nullptr, nullptr, &encoding);
45 EXPECT_TRUE(result); 58 EXPECT_TRUE(result);
(...skipping 31 matching lines...) Expand 10 before | Expand all | Expand 10 after
77 std::string utf8Bytes = 90 std::string utf8Bytes =
78 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3" 91 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3"
79 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah"; 92 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah";
80 WTF::TextEncoding encoding; 93 WTF::TextEncoding encoding;
81 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(), 94 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(),
82 nullptr, nullptr, nullptr, &encoding); 95 nullptr, nullptr, nullptr, &encoding);
83 EXPECT_FALSE(result); 96 EXPECT_FALSE(result);
84 } 97 }
85 98
86 } // namespace blink 99 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698