OLD | NEW |
1 <html> | 1 <html> |
2 <head> | 2 <head> |
3 <script src="../../resources/js-test.js"></script> | 3 <script src="../../resources/js-test.js"></script> |
4 <script src="resources/char-decoding-utils.js"></script> | 4 <script src="resources/char-decoding-utils.js"></script> |
5 </head> | 5 </head> |
6 <body> | 6 <body> |
7 <script> | 7 <script> |
8 | 8 |
9 description("This tests decoding characters in various character sets."); | 9 description("This tests decoding characters in various character sets."); |
10 | 10 |
(...skipping 29 matching lines...) Expand all Loading... |
40 encodings: ['iso-8859-9', 'latin5', 'windows-1254'], | 40 encodings: ['iso-8859-9', 'latin5', 'windows-1254'], |
41 encoded: ['%80', '%9F', '%FD'], | 41 encoded: ['%80', '%9F', '%FD'], |
42 unicode: ['U+20AC', 'U+0178', 'U+0131'] | 42 unicode: ['U+20AC', 'U+0178', 'U+0131'] |
43 }; | 43 }; |
44 | 44 |
45 batchTestDecode(turkish); | 45 batchTestDecode(turkish); |
46 | 46 |
47 // FIXME: Have to add tests for Euro and a few new characters added to ISO-8859-
x | 47 // FIXME: Have to add tests for Euro and a few new characters added to ISO-8859-
x |
48 // that are NOT subsets of the corresponding Windows codepages. For instance, | 48 // that are NOT subsets of the corresponding Windows codepages. For instance, |
49 // ISO-8859-7:2003 has Euro at 0xA4 and a couple of other new characters. | 49 // ISO-8859-7:2003 has Euro at 0xA4 and a couple of other new characters. |
50 // ICU 3.8.x or later has them. Perhaps, we need to have a separate test that | |
51 // can be enabled only with modern ICU. | |
52 | 50 |
53 // Baltic encodings fine points. | 51 // Baltic encodings fine points. |
54 testDecode('ISO-8859-13', '%A1', 'U+201D'); | 52 testDecode('ISO-8859-13', '%A1', 'U+201D'); |
55 testDecode('ISO-8859-13', '%A5', 'U+201E'); | 53 testDecode('ISO-8859-13', '%A5', 'U+201E'); |
56 testDecode('ISO-8859-13', '%B4', 'U+201C'); | 54 testDecode('ISO-8859-13', '%B4', 'U+201C'); |
57 testDecode('ISO-8859-13', '%FF', 'U+2019'); | 55 testDecode('ISO-8859-13', '%FF', 'U+2019'); |
58 testDecode('windows-1257', '%80', 'U+20AC'); | 56 testDecode('windows-1257', '%80', 'U+20AC'); |
59 testDecode('windows-1257', '%B4', 'U+00B4'); | 57 testDecode('windows-1257', '%B4', 'U+00B4'); |
60 testDecode('windows-1257', '%FF', 'U+02D9'); | 58 testDecode('windows-1257', '%FF', 'U+02D9'); |
61 | 59 |
62 // Greek encodings fine points. | 60 // Greek encodings fine points. |
63 testDecode('iso-8859-7', '%A1', 'U+2018'); | 61 testDecode('iso-8859-7', '%A1', 'U+2018'); |
64 testDecode('iso-8859-7', '%B5', 'U+0385'); | 62 testDecode('iso-8859-7', '%B5', 'U+0385'); |
65 testDecode('iso-8859-7', '%B6', 'U+0386'); | 63 testDecode('iso-8859-7', '%B6', 'U+0386'); |
| 64 testDecode('iso-8859-7', '%A4', 'U+20AC'); |
| 65 testDecode('iso-8859-7', '%A5', 'U+20AF'); |
| 66 testDecode('iso-8859-7', '%AA', 'U+037A'); |
66 testDecode('windows-1253', '%80', 'U+20AC'); | 67 testDecode('windows-1253', '%80', 'U+20AC'); |
67 testDecode('windows-1253', '%A1', 'U+0385'); | 68 testDecode('windows-1253', '%A1', 'U+0385'); |
68 testDecode('windows-1253', '%B5', 'U+00B5'); | 69 testDecode('windows-1253', '%B5', 'U+00B5'); |
69 testDecode('windows-1253', '%B6', 'U+00B6'); | 70 testDecode('windows-1253', '%B6', 'U+00B6'); |
70 | 71 |
71 // KOI-8 variants | 72 // KOI-8 variants |
72 testDecode('KOI8-R', '%A4', 'U+2553'); | 73 testDecode('KOI8-R', '%A4', 'U+2553'); |
73 testDecode('KOI8-R', '%AD', 'U+255C'); | 74 testDecode('KOI8-R', '%AD', 'U+255C'); |
74 testDecode('KOI8-U', '%A4', 'U+0454'); | 75 testDecode('KOI8-U', '%A4', 'U+0454'); |
75 testDecode('KOI8-U', '%AD', 'U+0491'); | 76 testDecode('KOI8-U', '%AD', 'U+0491'); |
76 | 77 |
77 // Test that TIS-620 and ISO-8859-11 (Thai) are upgraded to windows-874. | 78 // Test that TIS-620 and ISO-8859-11 (Thai) are upgraded to windows-874. |
78 // "0xDB => U+F8C1" is a weird PUA mapping that doesn't seem to be of | |
79 // any use, even on Windows. | |
80 var thai = { | 79 var thai = { |
81 encodings: ['TIS-620', 'ISO-8859-11', 'windows-874', 'dos-874'], | 80 encodings: ['TIS-620', 'ISO-8859-11', 'windows-874', 'dos-874'], |
82 encoded: ['%80', '%96', '%A0', '%A1', '%DB'], | 81 encoded: ['%80', '%96', '%A0', '%A1', '%DA'], |
83 unicode: ['U+20AC', 'U+2013', 'U+00A0', 'U+0E01', 'U+F8C1'] | 82 unicode: ['U+20AC', 'U+2013', 'U+00A0', 'U+0E01', 'U+0E3A'] |
84 }; | 83 }; |
85 | 84 |
86 batchTestDecode(thai); | 85 batchTestDecode(thai); |
87 | 86 |
88 // UTF-7 is expressly forbidden, so decoding it should not work correctly. | 87 // UTF-7 is expressly forbidden, so decoding it should not work correctly. |
89 // This attempts to decode '<' as UTF-7 (+AD4) but it ends up being decoded | 88 // This attempts to decode '<' as UTF-7 (+AD4) but it ends up being decoded |
90 // as a '+AD4'. | 89 // as a '+AD4'. |
91 testDecode('UTF-7', '+AD4', 'U+002B/U+0041/U+0044/U+0034'); | 90 testDecode('UTF-7', '+AD4', 'U+002B/U+0041/U+0044/U+0034'); |
92 testDecode('utf-7', '+AD4', 'U+002B/U+0041/U+0044/U+0034'); | 91 testDecode('utf-7', '+AD4', 'U+002B/U+0041/U+0044/U+0034'); |
93 | 92 |
(...skipping 14 matching lines...) Expand all Loading... |
108 // Replacement encodings should decode as replacement (U+FFFD) then EOF | 107 // Replacement encodings should decode as replacement (U+FFFD) then EOF |
109 testDecode("csiso2022kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 108 testDecode("csiso2022kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
110 testDecode("hz-gb-2312", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 109 testDecode("hz-gb-2312", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
111 testDecode("iso-2022-cn", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 110 testDecode("iso-2022-cn", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
112 testDecode("iso-2022-cn-ext", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 111 testDecode("iso-2022-cn-ext", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
113 testDecode("iso-2022-kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 112 testDecode("iso-2022-kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
114 | 113 |
115 </script> | 114 </script> |
116 </body> | 115 </body> |
117 </html> | 116 </html> |
OLD | NEW |