| OLD | NEW |
| 1 <html> | 1 <html> |
| 2 <head> | 2 <head> |
| 3 <script src="../../resources/js-test.js"></script> | 3 <script src="../../resources/js-test.js"></script> |
| 4 <script src="resources/char-decoding-utils.js"></script> | 4 <script src="resources/char-decoding-utils.js"></script> |
| 5 </head> | 5 </head> |
| 6 <body> | 6 <body> |
| 7 <script> | 7 <script> |
| 8 | 8 |
| 9 description("This tests decoding characters in various character sets."); | 9 description("This tests decoding characters in various character sets."); |
| 10 | 10 |
| (...skipping 29 matching lines...) Expand all Loading... |
| 40 encodings: ['iso-8859-9', 'latin5', 'windows-1254'], | 40 encodings: ['iso-8859-9', 'latin5', 'windows-1254'], |
| 41 encoded: ['%80', '%9F', '%FD'], | 41 encoded: ['%80', '%9F', '%FD'], |
| 42 unicode: ['U+20AC', 'U+0178', 'U+0131'] | 42 unicode: ['U+20AC', 'U+0178', 'U+0131'] |
| 43 }; | 43 }; |
| 44 | 44 |
| 45 batchTestDecode(turkish); | 45 batchTestDecode(turkish); |
| 46 | 46 |
| 47 // FIXME: Have to add tests for Euro and a few new characters added to ISO-8859-
x | 47 // FIXME: Have to add tests for Euro and a few new characters added to ISO-8859-
x |
| 48 // that are NOT subsets of the corresponding Windows codepages. For instance, | 48 // that are NOT subsets of the corresponding Windows codepages. For instance, |
| 49 // ISO-8859-7:2003 has Euro at 0xA4 and a couple of other new characters. | 49 // ISO-8859-7:2003 has Euro at 0xA4 and a couple of other new characters. |
| 50 // ICU 3.8.x or later has them. Perhaps, we need to have a separate test that | |
| 51 // can be enabled only with modern ICU. | |
| 52 | 50 |
| 53 // Baltic encodings fine points. | 51 // Baltic encodings fine points. |
| 54 testDecode('ISO-8859-13', '%A1', 'U+201D'); | 52 testDecode('ISO-8859-13', '%A1', 'U+201D'); |
| 55 testDecode('ISO-8859-13', '%A5', 'U+201E'); | 53 testDecode('ISO-8859-13', '%A5', 'U+201E'); |
| 56 testDecode('ISO-8859-13', '%B4', 'U+201C'); | 54 testDecode('ISO-8859-13', '%B4', 'U+201C'); |
| 57 testDecode('ISO-8859-13', '%FF', 'U+2019'); | 55 testDecode('ISO-8859-13', '%FF', 'U+2019'); |
| 58 testDecode('windows-1257', '%80', 'U+20AC'); | 56 testDecode('windows-1257', '%80', 'U+20AC'); |
| 59 testDecode('windows-1257', '%B4', 'U+00B4'); | 57 testDecode('windows-1257', '%B4', 'U+00B4'); |
| 60 testDecode('windows-1257', '%FF', 'U+02D9'); | 58 testDecode('windows-1257', '%FF', 'U+02D9'); |
| 61 | 59 |
| 62 // Greek encodings fine points. | 60 // Greek encodings fine points. |
| 63 testDecode('iso-8859-7', '%A1', 'U+2018'); | 61 testDecode('iso-8859-7', '%A1', 'U+2018'); |
| 64 testDecode('iso-8859-7', '%B5', 'U+0385'); | 62 testDecode('iso-8859-7', '%B5', 'U+0385'); |
| 65 testDecode('iso-8859-7', '%B6', 'U+0386'); | 63 testDecode('iso-8859-7', '%B6', 'U+0386'); |
| 64 testDecode('iso-8859-7', '%A4', 'U+20AC'); |
| 65 testDecode('iso-8859-7', '%A5', 'U+20AF'); |
| 66 testDecode('iso-8859-7', '%AA', 'U+037A'); |
| 66 testDecode('windows-1253', '%80', 'U+20AC'); | 67 testDecode('windows-1253', '%80', 'U+20AC'); |
| 67 testDecode('windows-1253', '%A1', 'U+0385'); | 68 testDecode('windows-1253', '%A1', 'U+0385'); |
| 68 testDecode('windows-1253', '%B5', 'U+00B5'); | 69 testDecode('windows-1253', '%B5', 'U+00B5'); |
| 69 testDecode('windows-1253', '%B6', 'U+00B6'); | 70 testDecode('windows-1253', '%B6', 'U+00B6'); |
| 70 | 71 |
| 71 // KOI-8 variants | 72 // KOI-8 variants |
| 72 testDecode('KOI8-R', '%A4', 'U+2553'); | 73 testDecode('KOI8-R', '%A4', 'U+2553'); |
| 73 testDecode('KOI8-R', '%AD', 'U+255C'); | 74 testDecode('KOI8-R', '%AD', 'U+255C'); |
| 74 testDecode('KOI8-U', '%A4', 'U+0454'); | 75 testDecode('KOI8-U', '%A4', 'U+0454'); |
| 75 testDecode('KOI8-U', '%AD', 'U+0491'); | 76 testDecode('KOI8-U', '%AD', 'U+0491'); |
| 76 | 77 |
| 77 // Test that TIS-620 and ISO-8859-11 (Thai) are upgraded to windows-874. | 78 // Test that TIS-620 and ISO-8859-11 (Thai) are upgraded to windows-874. |
| 78 // "0xDB => U+F8C1" is a weird PUA mapping that doesn't seem to be of | |
| 79 // any use, even on Windows. | |
| 80 var thai = { | 79 var thai = { |
| 81 encodings: ['TIS-620', 'ISO-8859-11', 'windows-874', 'dos-874'], | 80 encodings: ['TIS-620', 'ISO-8859-11', 'windows-874', 'dos-874'], |
| 82 encoded: ['%80', '%96', '%A0', '%A1', '%DB'], | 81 encoded: ['%80', '%96', '%A0', '%A1', '%DA'], |
| 83 unicode: ['U+20AC', 'U+2013', 'U+00A0', 'U+0E01', 'U+F8C1'] | 82 unicode: ['U+20AC', 'U+2013', 'U+00A0', 'U+0E01', 'U+0E3A'] |
| 84 }; | 83 }; |
| 85 | 84 |
| 86 batchTestDecode(thai); | 85 batchTestDecode(thai); |
| 87 | 86 |
| 88 // UTF-7 is expressly forbidden, so decoding it should not work correctly. | 87 // UTF-7 is expressly forbidden, so decoding it should not work correctly. |
| 89 // This attempts to decode '<' as UTF-7 (+AD4) but it ends up being decoded | 88 // This attempts to decode '<' as UTF-7 (+AD4) but it ends up being decoded |
| 90 // as a '+AD4'. | 89 // as a '+AD4'. |
| 91 testDecode('UTF-7', '+AD4', 'U+002B/U+0041/U+0044/U+0034'); | 90 testDecode('UTF-7', '+AD4', 'U+002B/U+0041/U+0044/U+0034'); |
| 92 testDecode('utf-7', '+AD4', 'U+002B/U+0041/U+0044/U+0034'); | 91 testDecode('utf-7', '+AD4', 'U+002B/U+0041/U+0044/U+0034'); |
| 93 | 92 |
| (...skipping 14 matching lines...) Expand all Loading... |
| 108 // Replacement encodings should decode as replacement (U+FFFD) then EOF | 107 // Replacement encodings should decode as replacement (U+FFFD) then EOF |
| 109 testDecode("csiso2022kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 108 testDecode("csiso2022kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 110 testDecode("hz-gb-2312", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 109 testDecode("hz-gb-2312", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 111 testDecode("iso-2022-cn", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 110 testDecode("iso-2022-cn", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 112 testDecode("iso-2022-cn-ext", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 111 testDecode("iso-2022-cn-ext", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 113 testDecode("iso-2022-kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); | 112 testDecode("iso-2022-kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 114 | 113 |
| 115 </script> | 114 </script> |
| 116 </body> | 115 </body> |
| 117 </html> | 116 </html> |
| OLD | NEW |