| OLD | NEW |
| 1 // Copyright 2016 The Chromium Authors. All rights reserved. | 1 // Copyright 2016 The Chromium Authors. All rights reserved. |
| 2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
| 3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
| 4 | 4 |
| 5 #include "platform/text/TextEncodingDetector.h" | 5 #include "platform/text/TextEncodingDetector.h" |
| 6 | 6 |
| 7 #include "platform/weborigin/KURL.h" |
| 7 #include "testing/gtest/include/gtest/gtest.h" | 8 #include "testing/gtest/include/gtest/gtest.h" |
| 8 #include "wtf/text/TextEncoding.h" | 9 #include "wtf/text/TextEncoding.h" |
| 9 | 10 |
| 10 namespace blink { | 11 namespace blink { |
| 11 | 12 |
| 12 TEST(TextEncodingDetectorTest, RespectIso2022Jp) { | 13 TEST(TextEncodingDetectorTest, RespectIso2022Jp) { |
| 13 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard. | 14 // ISO-2022-JP is the only 7-bit encoding defined in WHATWG standard. |
| 14 std::string iso2022jp = | 15 std::string iso2022jp = |
| 15 " \x1B" | 16 " \x1B" |
| 16 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\""; | 17 "$BKL3$F;F|K\\%O%`%U%!%$%?!<%:$,%=%U%H%P%s%/$H$N%W%l!<%*%U$r@)$7!\""; |
| (...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 52 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA" | 53 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA" |
| 53 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</" | 54 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</" |
| 54 "TITLE>"; | 55 "TITLE>"; |
| 55 WTF::TextEncoding encoding; | 56 WTF::TextEncoding encoding; |
| 56 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), | 57 bool result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), |
| 57 nullptr, nullptr, nullptr, &encoding); | 58 nullptr, nullptr, nullptr, &encoding); |
| 58 EXPECT_TRUE(result); | 59 EXPECT_TRUE(result); |
| 59 EXPECT_EQ(WTF::TextEncoding("GBK"), encoding) | 60 EXPECT_EQ(WTF::TextEncoding("GBK"), encoding) |
| 60 << "Without language hint, it's detected as GBK"; | 61 << "Without language hint, it's detected as GBK"; |
| 61 | 62 |
| 63 KURL urlJpDomain(ParsedURLString, "http://example.co.jp/"); |
| 62 result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr, | 64 result = detectTextEncoding(eucjpBytes.c_str(), eucjpBytes.length(), nullptr, |
| 63 "http://example.co.jp/", nullptr, &encoding); | 65 &urlJpDomain, nullptr, &encoding); |
| 64 EXPECT_TRUE(result); | 66 EXPECT_TRUE(result); |
| 65 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding) | 67 EXPECT_EQ(WTF::TextEncoding("EUC-JP"), encoding) |
| 66 << "With URL hint including '.jp', it's detected as EUC-JP"; | 68 << "With URL hint including '.jp', it's detected as EUC-JP"; |
| 67 } | 69 } |
| 68 | 70 |
| 69 TEST(TextEncodingDetectorTest, LanguageHintHelpsEUCJP) { | 71 TEST(TextEncodingDetectorTest, LanguageHintHelpsEUCJP) { |
| 70 std::string eucjpBytes = | 72 std::string eucjpBytes = |
| 71 "<TITLE>" | 73 "<TITLE>" |
| 72 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA" | 74 "\xA5\xD1\xA5\xEF\xA1\xBC\xA5\xC1\xA5\xE3\xA1\xBC\xA5\xC8\xA1\xC3\xC5\xEA" |
| 73 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</" | 75 "\xBB\xF1\xBE\xF0\xCA\xF3\xA4\xCE\xA5\xD5\xA5\xA3\xA5\xB9\xA5\xB3</" |
| (...skipping 20 matching lines...) Expand all Loading... |
| 94 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(), | 96 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(), |
| 95 nullptr, nullptr, nullptr, &encoding); | 97 nullptr, nullptr, nullptr, &encoding); |
| 96 EXPECT_FALSE(result); | 98 EXPECT_FALSE(result); |
| 97 } | 99 } |
| 98 | 100 |
| 99 TEST(TextEncodingDetectorTest, RespectUTF8DetectionForFileResource) { | 101 TEST(TextEncodingDetectorTest, RespectUTF8DetectionForFileResource) { |
| 100 std::string utf8Bytes = | 102 std::string utf8Bytes = |
| 101 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3" | 103 "tnegirjji gosa gii beare s\xC3\xA1htt\xC3\xA1 \xC4\x8D\xC3" |
| 102 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah"; | 104 "\xA1llit artihkkaliid. Maid don s\xC3\xA1ht\xC3\xA1t dievasmah"; |
| 103 WTF::TextEncoding encoding; | 105 WTF::TextEncoding encoding; |
| 106 KURL fileUrl(ParsedURLString, "file:///text.txt"); |
| 104 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(), | 107 bool result = detectTextEncoding(utf8Bytes.c_str(), utf8Bytes.length(), |
| 105 nullptr, "file:///text", nullptr, &encoding); | 108 nullptr, &fileUrl, nullptr, &encoding); |
| 106 EXPECT_TRUE(result); | 109 EXPECT_TRUE(result); |
| 107 } | 110 } |
| 108 | 111 |
| 109 } // namespace blink | 112 } // namespace blink |
| OLD | NEW |