OLD | NEW |
1 <html> | 1 <html> |
2 <head> | 2 <head> |
3 <script src="../../resources/js-test.js"></script> | 3 <script src="../../resources/js-test.js"></script> |
4 <script src="resources/char-decoding-utils.js"></script> | 4 <script src="resources/char-decoding-utils.js"></script> |
5 </head> | 5 </head> |
6 <body> | 6 <body> |
7 <script> | 7 <script> |
8 | 8 |
9 description("This tests decoding characters in various character sets."); | 9 description("This tests decoding characters in various character sets."); |
10 | 10 |
(...skipping 11 matching lines...) Expand all Loading... |
22 testDecode('Shift_JIS', '%82%d0', 'U+3072'); | 22 testDecode('Shift_JIS', '%82%d0', 'U+3072'); |
23 testDecode('shift-jis', '%82%d0', 'U+3072'); | 23 testDecode('shift-jis', '%82%d0', 'U+3072'); |
24 | 24 |
25 // Test that all Korean encodings of EUC-KR family are treated as windows-949. | 25 // Test that all Korean encodings of EUC-KR family are treated as windows-949. |
26 var korean = { | 26 var korean = { |
27 encodings: ['korean', 'EUC-KR', 'windows-949', 'x-windows-949', 'x-uhc', | 27 encodings: ['korean', 'EUC-KR', 'windows-949', 'x-windows-949', 'x-uhc', |
28 'iso-ir-149', 'KS_C_5601-1987', 'KS_C_5601-1989', | 28 'iso-ir-149', 'KS_C_5601-1987', 'KS_C_5601-1989', |
29 'KSC5601', 'KSC_5601'], | 29 'KSC5601', 'KSC_5601'], |
30 encoded: ['%A2%E6', '%A1%A4', '%A1%A9', '%A1%AA', '%A1%AD', '%A2%A6', | 30 encoded: ['%A2%E6', '%A1%A4', '%A1%A9', '%A1%AA', '%A1%AD', '%A2%A6', |
31 '%A2%C1', '%1A', '%1C', '%8F%A1', '%B4%D3', '%A2%41'], | 31 '%A2%C1', '%1A', '%1C', '%8F%A1', '%B4%D3', '%A2%41'], |
32 unicode: ['U+20AC', 'U+00B7', 'U+00AD', 'U+2015', 'U+223C', 'U+FF5E', | 32 unicode: ['U+20AC', 'U+00B7', 'U+00AD', 'U+2015', 'U+223C', 'U+FF5E', |
33 'U+2299', 'U+001A', 'U+001C', 'U+B8EA', 'U+B2D2', 'U+C910'] | 33 'U+2299', 'U+001A', 'U+001C', 'U+B8EA', 'U+B2D2', 'U+C910'] |
34 }; | 34 }; |
35 | 35 |
36 batchTestDecode(korean); | 36 batchTestDecode(korean); |
37 | 37 |
38 // Test that ISO-8859-9 (Turkish) is upgraded to windows-1254 with Euro symbol. | 38 // Test that ISO-8859-9 (Turkish) is upgraded to windows-1254 with Euro symbol. |
39 var turkish = { | 39 var turkish = { |
40 encodings: ['iso-8859-9', 'latin5', 'windows-1254'], | 40 encodings: ['iso-8859-9', 'latin5', 'windows-1254'], |
41 encoded: ['%80', '%9F', '%FD'], | 41 encoded: ['%80', '%9F', '%FD'], |
42 unicode: ['U+20AC', 'U+0178', 'U+0131'] | 42 unicode: ['U+20AC', 'U+0178', 'U+0131'] |
43 }; | 43 }; |
44 | 44 |
45 batchTestDecode(turkish); | 45 batchTestDecode(turkish); |
46 | 46 |
47 // FIXME: Have to add tests for Euro and a few new characters added to ISO-8859-
x | 47 // FIXME: Have to add tests for Euro and a few new characters added to ISO-8859-
x |
48 // that are NOT subsets of the corresponding Windows codepages. For instance, | 48 // that are NOT subsets of the corresponding Windows codepages. For instance, |
49 // ISO-8859-7:2003 has Euro at 0xA4 and a couple of other new characters. | 49 // ISO-8859-7:2003 has Euro at 0xA4 and a couple of other new characters. |
50 // ICU 3.8.x or later has them. Perhaps, we need to have a separate test that | 50 // ICU 3.8.x or later has them. Perhaps, we need to have a separate test that |
51 // can be enabled only with modern ICU. | 51 // can be enabled only with modern ICU. |
52 | 52 |
53 // Baltic encodings fine points. | 53 // Baltic encodings fine points. |
54 testDecode('ISO-8859-13', '%A1', 'U+201D'); | 54 testDecode('ISO-8859-13', '%A1', 'U+201D'); |
55 testDecode('ISO-8859-13', '%A5', 'U+201E'); | 55 testDecode('ISO-8859-13', '%A5', 'U+201E'); |
56 testDecode('ISO-8859-13', '%B4', 'U+201C'); | 56 testDecode('ISO-8859-13', '%B4', 'U+201C'); |
57 testDecode('ISO-8859-13', '%FF', 'U+2019'); | 57 testDecode('ISO-8859-13', '%FF', 'U+2019'); |
(...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
98 testDecode('UTF-16', '%69%D8%D6%DE', 'U+D869/U+DED6'); | 98 testDecode('UTF-16', '%69%D8%D6%DE', 'U+D869/U+DED6'); |
99 testDecode('ISO-10646-UCS-2', '%69%D8%D6%DE', 'U+D869/U+DED6'); | 99 testDecode('ISO-10646-UCS-2', '%69%D8%D6%DE', 'U+D869/U+DED6'); |
100 testDecode('UCS-2', '%69%D8%D6%DE', 'U+D869/U+DED6'); | 100 testDecode('UCS-2', '%69%D8%D6%DE', 'U+D869/U+DED6'); |
101 testDecode('Unicode', '%69%D8%D6%DE', 'U+D869/U+DED6'); | 101 testDecode('Unicode', '%69%D8%D6%DE', 'U+D869/U+DED6'); |
102 testDecode('csUnicode', '%69%D8%D6%DE', 'U+D869/U+DED6'); | 102 testDecode('csUnicode', '%69%D8%D6%DE', 'U+D869/U+DED6'); |
103 | 103 |
104 // UTF-16BE and variants. | 104 // UTF-16BE and variants. |
105 testDecode('UTF-16BE', '%D8%69%DE%D6', 'U+D869/U+DED6'); | 105 testDecode('UTF-16BE', '%D8%69%DE%D6', 'U+D869/U+DED6'); |
106 testDecode('unicodeFFFE', '%D8%69%DE%D6', 'U+D869/U+DED6'); | 106 testDecode('unicodeFFFE', '%D8%69%DE%D6', 'U+D869/U+DED6'); |
107 | 107 |
| 108 // Replacement encodings should decode as replacement (U+FFFD) then EOF |
| 109 testDecode("csiso2022kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 110 testDecode("hz-gb-2312", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 111 testDecode("iso-2022-cn", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 112 testDecode("iso-2022-cn-ext", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 113 testDecode("iso-2022-kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 114 |
108 </script> | 115 </script> |
109 </body> | 116 </body> |
110 </html> | 117 </html> |
OLD | NEW |