Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(426)

Side by Side Diff: third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp

Issue 2786913002: Replace the type of hint url for blink::detectTextEncoding (Closed)
Patch Set: const KURL Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "platform/text/TextEncodingDetector.h" 5 #include "platform/text/TextEncodingDetector.h"
6 6
7 #include "platform/weborigin/KURL.h"
7 #include "testing/gtest/include/gtest/gtest.h" 8 #include "testing/gtest/include/gtest/gtest.h"
8 #include "wtf/text/TextEncoding.h" 9 #include "wtf/text/TextEncoding.h"
9 10
10 namespace blink { 11 namespace blink {
11 12
12 TEST(TextEncodingDetectorTest, RespectIso2022Jp) { 13 TEST(TextEncodingDetectorTest, RespectIso2022Jp) {
13 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard. 14 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard.
14 std::string iso2022jp = 15 std::string iso2022jp =
15 " \x1B" 16 " \x1B"
16 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\""; 17 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\"";
17 WTF::TextEncoding encoding; 18 WTF::TextEncoding encoding;
18 bool result = detectTextEncoding(iso2022jp.c_str(), iso2022jp.length(), 19 bool result = detectTextEncoding(iso2022jp.c_str(), iso2022jp.length(),
19 nullptr, nullptr, nullptr, &encoding); 20 nullptr, KURL(), nullptr, &encoding);
20 EXPECT_TRUE(result); 21 EXPECT_TRUE(result);
21 EXPECT_EQ(WTF::TextEncoding("ISO-2022-JP"), encoding); 22 EXPECT_EQ(WTF::TextEncoding("ISO-2022-JP"), encoding);
22 } 23 }
23 24
24 TEST(TextEncodingDetectorTest, Ignore7BitEncoding) { 25 TEST(TextEncodingDetectorTest, Ignore7BitEncoding) {
25 // 7-bit encodings except ISO-2022-JP are not supported by WHATWG. 26 // 7-bit encodings except ISO-2022-JP are not supported by WHATWG.
26 // They should be detected as plain text (US-ASCII). 27 // They should be detected as plain text (US-ASCII).
27 std::string hzGb2312 = 28 std::string hzGb2312 =
28 " ~{\x54\x42\x31\x7D\x37\x22\x55\x39\x35\x3D\x3D\x71~} abc"; 29 " ~{\x54\x42\x31\x7D\x37\x22\x55\x39\x35\x3D\x3D\x71~} abc";
29 WTF::TextEncoding encoding; 30 WTF::TextEncoding encoding;
30 bool result = detectTextEncoding(hzGb2312.c_str(), hzGb2312.length(), nullptr, 31 bool result = detectTextEncoding(hzGb2312.c_str(), hzGb2312.length(), nullptr,
31 nullptr, nullptr, &encoding); 32 KURL(), nullptr, &encoding);
32 EXPECT_TRUE(result); 33 EXPECT_TRUE(result);
33 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding); 34 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding);
34 } 35 }
35 36
36 TEST(TextEncodingDetectorTest, NonWHATWGEncodingBecomesAscii) { 37 TEST(TextEncodingDetectorTest, NonWHATWGEncodingBecomesAscii) {
37 std::string pseudoJpg = 38 std::string pseudoJpg =
38 "\xff\xd8\xff\xe0\x00\x10JFIF foo bar baz\xff\xe1\x00\xa5" 39 "\xff\xd8\xff\xe0\x00\x10JFIF foo bar baz\xff\xe1\x00\xa5"
39 "\x01\xd7\xff\x01\x57\x33\x44\x55\x66\x77\xed\xcb\xa9\x87" 40 "\x01\xd7\xff\x01\x57\x33\x44\x55\x66\x77\xed\xcb\xa9\x87"
40 "\xff\xd7\xff\xe0\x00\x10JFIF foo bar baz\xff\xe1\x00\xa5" 41 "\xff\xd7\xff\xe0\x00\x10JFIF foo bar baz\xff\xe1\x00\xa5"
41 "\x87\x01\xd7\xff\x01\x57\x33\x44\x55\x66\x77\xed\xcb\xa9"; 42 "\x87\x01\xd7\xff\x01\x57\x33\x44\x55\x66\x77\xed\xcb\xa9";
42 WTF::TextEncoding encoding; 43 WTF::TextEncoding encoding;
43 bool result = detectTextEncoding(pseudoJpg.c_str(), pseudoJpg.length(), 44 bool result = detectTextEncoding(pseudoJpg.c_str(), pseudoJpg.length(),
44 nullptr, nullptr, nullptr, &encoding); 45 nullptr, KURL(), nullptr, &encoding);
45 EXPECT_TRUE(result); 46 EXPECT_TRUE(result);
46 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding); 47 EXPECT_EQ(WTF::TextEncoding("US-ASCII"), encoding);
47 } 48 }
48 49
49 TEST(TextEncodingDetectorTest, UrlHintHelpsEUCJP) { 50 TEST(TextEncodingDetectorTest, UrlHintHelpsEUCJP) {
50 std::string eucjpBytes = 51 std::string eucjpBytes =
51 "<TITLE>" 52 "<TITLE>"
52 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA" 53 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA"
53 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</" 54 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</"
54 "TITLE>"; 55 "TITLE>";
55 WTF::TextEncoding encoding; 56 WTF::TextEncoding encoding;
56 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), 57 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(),
57 nullptr, nullptr, nullptr, &encoding); 58 nullptr, KURL(), nullptr, &encoding);
58 EXPECT_TRUE(result); 59 EXPECT_TRUE(result);
59 EXPECT_EQ(WTF::TextEncoding("GBK"), encoding) 60 EXPECT_EQ(WTF::TextEncoding("GBK"), encoding)
60 << "Without language hint, it's detected as GBK"; 61 << "Without language hint, it's detected as GBK";
61 62
63 KURL urlJpDomain(ParsedURLString, "http://example.co.jp/");
62 result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr, 64 result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr,
63 "http://example.co.jp/", nullptr, &encoding); 65 urlJpDomain, nullptr, &encoding);
64 EXPECT_TRUE(result); 66 EXPECT_TRUE(result);
65 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding) 67 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding)
66 << "With URL hint including '.jp', it's detected as EUC-JP"; 68 << "With URL hint including '.jp', it's detected as EUC-JP";
67 } 69 }
68 70
69 TEST(TextEncodingDetectorTest, LanguageHintHelpsEUCJP) { 71 TEST(TextEncodingDetectorTest, LanguageHintHelpsEUCJP) {
70 std::string eucjpBytes = 72 std::string eucjpBytes =
71 "<TITLE>" 73 "<TITLE>"
72 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA" 74 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA"
73 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</" 75 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</"
74 "TITLE>"; 76 "TITLE>";
75 WTF::TextEncoding encoding; 77 WTF::TextEncoding encoding;
76 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), 78 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(),
77 nullptr, nullptr, nullptr, &encoding); 79 nullptr, KURL(), nullptr, &encoding);
78 EXPECT_TRUE(result); 80 EXPECT_TRUE(result);
79 EXPECT_EQ(WTF::TextEncoding("GBK"), encoding) 81 EXPECT_EQ(WTF::TextEncoding("GBK"), encoding)
80 << "Without language hint, it's detected as GBK"; 82 << "Without language hint, it's detected as GBK";
81 83
82 result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr, 84 result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr,
83 nullptr, "ja", &encoding); 85 KURL(), "ja", &encoding);
84 EXPECT_TRUE(result); 86 EXPECT_TRUE(result);
85 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding) 87 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding)
86 << "With language hint 'ja', it's detected as EUC-JP"; 88 << "With language hint 'ja', it's detected as EUC-JP";
87 } 89 }
88 90
89 TEST(TextEncodingDetectorTest, UTF8DetectionShouldFail) { 91 TEST(TextEncodingDetectorTest, UTF8DetectionShouldFail) {
90 std::string utf8Bytes = 92 std::string utf8Bytes =
91 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3" 93 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3"
92 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah"; 94 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah";
93 WTF::TextEncoding encoding; 95 WTF::TextEncoding encoding;
94 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(), 96 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(),
95 nullptr, nullptr, nullptr, &encoding); 97 nullptr, KURL(), nullptr, &encoding);
96 EXPECT_FALSE(result); 98 EXPECT_FALSE(result);
97 } 99 }
98 100
99 TEST(TextEncodingDetectorTest, RespectUTF8DetectionForFileResource) { 101 TEST(TextEncodingDetectorTest, RespectUTF8DetectionForFileResource) {
100 std::string utf8Bytes = 102 std::string utf8Bytes =
101 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3" 103 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3"
102 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah"; 104 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah";
103 WTF::TextEncoding encoding; 105 WTF::TextEncoding encoding;
106 KURL fileUrl(ParsedURLString, "file:///text.txt");
104 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(), 107 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(),
105 nullptr, "file:///text", nullptr, &encoding); 108 nullptr, fileUrl, nullptr, &encoding);
106 EXPECT_TRUE(result); 109 EXPECT_TRUE(result);
107 } 110 }
108 111
109 } // namespace blink 112 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698