OLD | NEW |
1 <html> | 1 <html> |
2 <head> | 2 <head> |
3 <script src="../../resources/js-test.js"></script> | 3 <script src="../../resources/js-test.js"></script> |
4 <script src="resources/char-decoding-utils.js"></script> | 4 <script src="resources/char-decoding-utils.js"></script> |
5 </head> | 5 </head> |
6 <body> | 6 <body> |
7 <script> | 7 <script> |
8 | 8 |
9 description("This tests decoding characters in various character sets."); | 9 description("This tests decoding characters in various character sets."); |
10 | 10 |
11 testDecode('UTF-8', '%E2%88%9A', 'U+221A'); | 11 testDecode('UTF-8', '%E2%88%9A', 'U+221A'); |
12 | 12 |
13 // <http://bugs.webkit.org/show_bug.cgi?id=17014> EUC-CN code A3A0 is mapped to
U+E5E5 instead of U+3000 | 13 // \xA3\xA0 in GBK should be mapped to U+3000 instead of U+E5E5. |
14 testDecode('gb2312', '%A3%A0', 'U+3000'); | 14 testDecode('gb2312', '%A3%A0', 'U+3000'); |
| 15 testDecode('gb_2312', '%A3%A0', 'U+3000'); |
15 testDecode('gb_2312-80', '%A3%A0', 'U+3000'); | 16 testDecode('gb_2312-80', '%A3%A0', 'U+3000'); |
| 17 testDecode('csgb2312', '%A3%A0', 'U+3000'); |
| 18 testDecode('iso-ir-58', '%A3%A0', 'U+3000'); |
| 19 testDecode('csiso58gb231280', '%A3%A0', 'U+3000'); |
16 testDecode('chinese', '%A3%A0', 'U+3000'); | 20 testDecode('chinese', '%A3%A0', 'U+3000'); |
17 testDecode('gbk', '%A3%A0', 'U+3000'); | 21 testDecode('gbk', '%A3%A0', 'U+3000'); |
| 22 testDecode('x-gbk', '%A3%A0', 'U+3000'); |
18 testDecode('gb18030', '%A3%A0', 'U+3000'); | 23 testDecode('gb18030', '%A3%A0', 'U+3000'); |
19 testDecode('EUC-CN', '%A3%A0', 'U+3000'); | 24 testDecode('EUC-CN', '%A3%A0', 'U+3000'); |
20 | 25 |
| 26 // Align GBK with GB18030 |
| 27 testDecode('gbk', '%A8%BF', 'U+01F9'); |
| 28 testDecode('gbk', '%A1%AD', 'U+2026'); |
| 29 testDecode('gbk', '%A1%AB', 'U+FF5E'); |
| 30 testDecode('gb18030', '%A8%BF', 'U+01F9'); |
| 31 testDecode('gb18030', '%A8%BC', 'U+1E3F'); |
| 32 testDecode('gb18030', '%A1%AD', 'U+2026'); |
| 33 testDecode('gb18030', '%A1%AB', 'U+FF5E'); |
| 34 |
| 35 // Replace U+E7C7 with U+1E3F once |
| 36 // https://www.w3.org/Bugs/Public/show_bug.cgi?id=28740#c3 is resolved. |
| 37 testDecode('gbk', '%A8%BC', 'U+E7C7'); |
| 38 |
21 // Test Shift_JIS aliases. | 39 // Test Shift_JIS aliases. |
22 testDecode('Shift_JIS', '%82%d0', 'U+3072'); | 40 testDecode('Shift_JIS', '%82%d0', 'U+3072'); |
23 testDecode('shift-jis', '%82%d0', 'U+3072'); | 41 testDecode('shift-jis', '%82%d0', 'U+3072'); |
| 42 testDecode('csshiftjis', '%82%d0', 'U+3072'); |
| 43 testDecode('sjis', '%82%d0', 'U+3072'); |
| 44 testDecode('x-sjis', '%82%d0', 'U+3072'); |
| 45 testDecode('ms_kanji', '%82%d0', 'U+3072'); |
| 46 testDecode('windows-31j', '%82%d0', 'U+3072'); |
24 | 47 |
25 // Test that all Korean encodings of EUC-KR family are treated as windows-949. | 48 // Test that all Korean encodings of EUC-KR family are treated as windows-949. |
26 var korean = { | 49 var korean = { |
27 encodings: ['korean', 'EUC-KR', 'windows-949', 'x-windows-949', 'x-uhc', | 50 encodings: ['korean', 'EUC-KR', 'windows-949', 'cseuckr', 'csksc56011987', |
28 'iso-ir-149', 'KS_C_5601-1987', 'KS_C_5601-1989', | 51 'iso-ir-149', 'KS_C_5601-1987', 'KS_C_5601-1989', |
29 'KSC5601', 'KSC_5601'], | 52 'KSC5601', 'KSC_5601'], |
30 encoded: ['%A2%E6', '%A1%A4', '%A1%A9', '%A1%AA', '%A1%AD', '%A2%A6', | 53 encoded: ['%A2%E6', '%A1%A4', '%A1%A9', '%A1%AA', '%A1%AD', '%A2%A6', |
31 '%A2%C1', '%1A', '%1C', '%8F%A1', '%B4%D3', '%A2%41'], | 54 '%A2%C1', '%1A', '%1C', '%8F%A1', '%B4%D3', '%A2%41'], |
32 unicode: ['U+20AC', 'U+00B7', 'U+00AD', 'U+2015', 'U+223C', 'U+FF5E', | 55 unicode: ['U+20AC', 'U+00B7', 'U+00AD', 'U+2015', 'U+223C', 'U+FF5E', |
33 'U+2299', 'U+001A', 'U+001C', 'U+B8EA', 'U+B2D2', 'U+C910'] | 56 'U+2299', 'U+001A', 'U+001C', 'U+B8EA', 'U+B2D2', 'U+C910'] |
34 }; | 57 }; |
35 | 58 |
36 batchTestDecode(korean); | 59 batchTestDecode(korean); |
37 | 60 |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
107 // Replacement encodings should decode as replacement (U+FFFD) then EOF | 130 // Replacement encodings should decode as replacement (U+FFFD) then EOF |
108 testDecode("csiso2022kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 131 testDecode("csiso2022kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
109 testDecode("hz-gb-2312", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 132 testDecode("hz-gb-2312", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
110 testDecode("iso-2022-cn", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 133 testDecode("iso-2022-cn", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
111 testDecode("iso-2022-cn-ext", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 134 testDecode("iso-2022-cn-ext", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
112 testDecode("iso-2022-kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 135 testDecode("iso-2022-kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
113 | 136 |
114 </script> | 137 </script> |
115 </body> | 138 </body> |
116 </html> | 139 </html> |
OLD | NEW |