OLD | NEW |
| (Empty) |
1 // Copyright 2016 The Chromium Authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 #include "wtf/text/UTF8.h" | |
6 | |
7 #include "testing/gtest/include/gtest/gtest.h" | |
8 | |
9 namespace WTF { | |
10 namespace Unicode { | |
11 | |
12 TEST(UTF8Test, IsUTF8andNotASCII) | |
13 { | |
14 EXPECT_TRUE(isUTF8andNotASCII("\xc2\x81", 2)); | |
15 EXPECT_TRUE(isUTF8andNotASCII("\xe1\x80\xbf", 3)); | |
16 EXPECT_TRUE(isUTF8andNotASCII("\xf1\x80\xa0\xbf", 4)); | |
17 EXPECT_TRUE(isUTF8andNotASCII("a\xc2\x81\xe1\x80\xbf\xf1\x80\xa0\xbf", 10)); | |
18 | |
19 // Surrogate code points | |
20 EXPECT_FALSE(isUTF8andNotASCII("\xed\xa0\x80\xed\xbf\xbf", 6)); | |
21 EXPECT_FALSE(isUTF8andNotASCII("\xed\xa0\x8f", 3)); | |
22 EXPECT_FALSE(isUTF8andNotASCII("\xed\xbf\xbf", 3)); | |
23 | |
24 // Overlong sequences | |
25 EXPECT_FALSE(isUTF8andNotASCII("\xc0\x80", 2)); // U+0000 | |
26 EXPECT_FALSE(isUTF8andNotASCII("\xc1\x80\xc1\x81", 4)); // "AB" | |
27 EXPECT_FALSE(isUTF8andNotASCII("\xe0\x80\x80", 3)); // U+0000 | |
28 EXPECT_FALSE(isUTF8andNotASCII("\xe0\x82\x80", 3)); // U+0080 | |
29 EXPECT_FALSE(isUTF8andNotASCII("\xe0\x9f\xbf", 3)); // U+07ff | |
30 EXPECT_FALSE(isUTF8andNotASCII("\xf0\x80\x80\x8D", 4)); // U+000D | |
31 EXPECT_FALSE(isUTF8andNotASCII("\xf0\x80\x82\x91", 4)); // U+0091 | |
32 EXPECT_FALSE(isUTF8andNotASCII("\xf0\x80\xa0\x80", 4)); // U+0800 | |
33 EXPECT_FALSE(isUTF8andNotASCII("\xf0\x8f\xbb\xbf", 4)); // U+FEFF (BOM) | |
34 EXPECT_FALSE(isUTF8andNotASCII("\xf8\x80\x80\x80\xbf", 5)); // U+003F | |
35 EXPECT_FALSE(isUTF8andNotASCII("\xfc\x80\x80\x80\xa0\xa5", 6)); // U+00A5 | |
36 | |
37 // Beyond U+10FFFF (the upper limit of Unicode codespace) | |
38 EXPECT_FALSE(isUTF8andNotASCII("\xf4\x90\x80\x80", 4)); // U+110000 | |
39 EXPECT_FALSE(isUTF8andNotASCII("\xf8\xa0\xbf\x80\xbf", 5)); // 5 bytes | |
40 EXPECT_FALSE(isUTF8andNotASCII("\xfc\x9c\xbf\x80\xbf\x80", 6)); // 6 bytes | |
41 | |
42 // Non-characters : U+xxFFF[EF] where xx is 0x00 through 0x10 and <FDD0,FDEF
> | |
43 EXPECT_FALSE(isUTF8andNotASCII("\xef\xbf\xbe", 3)); // U+FFFE | |
44 EXPECT_FALSE(isUTF8andNotASCII("\xf0\x8f\xbf\xbe", 4)); // U+1FFFE | |
45 EXPECT_FALSE(isUTF8andNotASCII("\xf3\xbf\xbf\xbf", 4)); // U+10FFFF | |
46 EXPECT_FALSE(isUTF8andNotASCII("\xef\xb7\x90", 3)); // U+FDD0 | |
47 EXPECT_FALSE(isUTF8andNotASCII("\xef\xb7\xaf", 3)); // U+FDEF | |
48 | |
49 // Strings in legacy encodings. | |
50 EXPECT_FALSE(isUTF8andNotASCII("caf\xe9", 4)); // cafe with U+00E9 in ISO-88
59-1 | |
51 EXPECT_FALSE(isUTF8andNotASCII("\xb0\xa1\xb0\xa2", 4)); // U+AC00, U+AC001 i
n EUC-KR | |
52 EXPECT_FALSE(isUTF8andNotASCII("\xa7\x41\xa6\x6e", 4)); // U+4F60 U+597D in
Big5 | |
53 // "abc" with U+201[CD] in windows-125[0-8] | |
54 EXPECT_FALSE(isUTF8andNotASCII("\x93" "abc\x94", 4)); | |
55 // U+0639 U+064E U+0644 U+064E in ISO-8859-6 | |
56 EXPECT_FALSE(isUTF8andNotASCII("\xd9\xee\xe4\xee", 4)); | |
57 // U+03B3 U+03B5 U+03B9 U+03AC in ISO-8859-7 | |
58 EXPECT_FALSE(isUTF8andNotASCII("\xe3\xe5\xe9\xdC", 4)); | |
59 EXPECT_FALSE(isUTF8andNotASCII("abc", 3)); // plain ASCII | |
60 } | |
61 | |
62 } // namespace Unicode | |
63 } // namespace WTF | |
OLD | NEW |