Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(963)

Side by Side Diff: third_party/WebKit/Source/platform/text/TextEncodingDetectorTest.cpp

Issue 2803563004: Avoid using language hint in encoding detection (Closed)
Patch Set: fix bug Created 3 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2016 The Chromium Authors. All rights reserved. 1 // Copyright 2016 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 #include "platform/text/TextEncodingDetector.h" 5 #include "platform/text/TextEncodingDetector.h"
6 6
7 #include "platform/weborigin/KURL.h" 7 #include "platform/weborigin/KURL.h"
8 #include "testing/gtest/include/gtest/gtest.h" 8 #include "testing/gtest/include/gtest/gtest.h"
9 #include "wtf/text/TextEncoding.h" 9 #include "wtf/text/TextEncoding.h"
10 10
(...skipping 63 matching lines...) Expand 10 before | Expand all | Expand 10 after
74 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA" 74 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA"
75 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</" 75 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</"
76 "TITLE>"; 76 "TITLE>";
77 WTF::TextEncoding encoding; 77 WTF::TextEncoding encoding;
78 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), 78 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(),
79 nullptr, KURL(), nullptr, &encoding); 79 nullptr, KURL(), nullptr, &encoding);
80 EXPECT_TRUE(result); 80 EXPECT_TRUE(result);
81 EXPECT_EQ(WTF::TextEncoding("GBK"), encoding) 81 EXPECT_EQ(WTF::TextEncoding("GBK"), encoding)
82 << "Without language hint, it's detected as GBK"; 82 << "Without language hint, it's detected as GBK";
83 83
84 KURL url(ParsedURLString, "http://example.com/");
84 result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr, 85 result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr,
85 KURL(), "ja", &encoding); 86 url, "ja", &encoding);
87 EXPECT_TRUE(result);
88 EXPECT_EQ(WTF::TextEncoding("GBK"), encoding)
89 << "Language hint doesn't help for normal URL. Should be detected as GBK";
90
91 KURL fileUrl(ParsedURLString, "file:///text.txt");
92 result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr,
93 fileUrl, "ja", &encoding);
86 EXPECT_TRUE(result); 94 EXPECT_TRUE(result);
87 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding) 95 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding)
88 << "With language hint 'ja', it's detected as EUC-JP"; 96 << "Language hint works for file resource. Should be detected as EUC-JP";
89 } 97 }
90 98
91 TEST(TextEncodingDetectorTest, UTF8DetectionShouldFail) { 99 TEST(TextEncodingDetectorTest, UTF8DetectionShouldFail) {
92 std::string utf8Bytes = 100 std::string utf8Bytes =
93 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3" 101 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3"
94 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah"; 102 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah";
95 WTF::TextEncoding encoding; 103 WTF::TextEncoding encoding;
96 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(), 104 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(),
97 nullptr, KURL(), nullptr, &encoding); 105 nullptr, KURL(), nullptr, &encoding);
98 EXPECT_FALSE(result); 106 EXPECT_FALSE(result);
99 } 107 }
100 108
101 TEST(TextEncodingDetectorTest, RespectUTF8DetectionForFileResource) { 109 TEST(TextEncodingDetectorTest, RespectUTF8DetectionForFileResource) {
102 std::string utf8Bytes = 110 std::string utf8Bytes =
103 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3" 111 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3"
104 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah"; 112 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah";
105 WTF::TextEncoding encoding; 113 WTF::TextEncoding encoding;
106 KURL fileUrl(ParsedURLString, "file:///text.txt"); 114 KURL fileUrl(ParsedURLString, "file:///text.txt");
107 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(), 115 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(),
108 nullptr, fileUrl, nullptr, &encoding); 116 nullptr, fileUrl, nullptr, &encoding);
109 EXPECT_TRUE(result); 117 EXPECT_TRUE(result);
110 } 118 }
111 119
112 } // namespace blink 120 } // namespace blink
OLDNEW
« no previous file with comments | « third_party/WebKit/Source/platform/text/TextEncodingDetector.cpp ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698