| OLD | NEW |
| 1 <html> | 1 <html> |
| 2 <head> | 2 <head> |
| 3 <script src="../../resources/js-test.js"></script> | 3 <script src="../../resources/js-test.js"></script> |
| 4 <script src="resources/char-decoding-utils.js"></script> | 4 <script src="resources/char-decoding-utils.js"></script> |
| 5 </head> | 5 </head> |
| 6 <body> | 6 <body> |
| 7 <script> | 7 <script> |
| 8 | 8 |
| 9 description("This tests decoding characters in various character sets."); | 9 description("This tests decoding characters in various character sets."); |
| 10 | 10 |
| (...skipping 11 matching lines...) Expand all Loading... |
| 22 testDecode('Shift_JIS', '%82%d0', 'U+3072'); | 22 testDecode('Shift_JIS', '%82%d0', 'U+3072'); |
| 23 testDecode('shift-jis', '%82%d0', 'U+3072'); | 23 testDecode('shift-jis', '%82%d0', 'U+3072'); |
| 24 | 24 |
| 25 // Test that all Korean encodings of EUC-KR family are treated as windows-949. | 25 // Test that all Korean encodings of EUC-KR family are treated as windows-949. |
| 26 var korean = { | 26 var korean = { |
| 27 encodings: ['korean', 'EUC-KR', 'windows-949', 'x-windows-949', 'x-uhc', | 27 encodings: ['korean', 'EUC-KR', 'windows-949', 'x-windows-949', 'x-uhc', |
| 28 'iso-ir-149', 'KS_C_5601-1987', 'KS_C_5601-1989', | 28 'iso-ir-149', 'KS_C_5601-1987', 'KS_C_5601-1989', |
| 29 'KSC5601', 'KSC_5601'], | 29 'KSC5601', 'KSC_5601'], |
| 30 encoded: ['%A2%E6', '%A1%A4', '%A1%A9', '%A1%AA', '%A1%AD', '%A2%A6', | 30 encoded: ['%A2%E6', '%A1%A4', '%A1%A9', '%A1%AA', '%A1%AD', '%A2%A6', |
| 31 '%A2%C1', '%1A', '%1C', '%8F%A1', '%B4%D3', '%A2%41'], | 31 '%A2%C1', '%1A', '%1C', '%8F%A1', '%B4%D3', '%A2%41'], |
| 32 unicode: ['U+20AC', 'U+00B7', 'U+00AD', 'U+2015', 'U+223C', 'U+FF5E', | 32 unicode: ['U+20AC', 'U+00B7', 'U+00AD', 'U+2015', 'U+223C', 'U+FF5E', |
| 33 'U+2299', 'U+001A', 'U+001C', 'U+B8EA', 'U+B2D2', 'U+C910'] | 33 'U+2299', 'U+001A', 'U+001C', 'U+B8EA', 'U+B2D2', 'U+C910'] |
| 34 }; | 34 }; |
| 35 | 35 |
| 36 batchTestDecode(korean); | 36 batchTestDecode(korean); |
| 37 | 37 |
| 38 // Test that ISO-8859-9 (Turkish) is upgraded to windows-1254 with Euro symbol. | 38 // Test that ISO-8859-9 (Turkish) is upgraded to windows-1254 with Euro symbol. |
| 39 var turkish = { | 39 var turkish = { |
| 40 encodings: ['iso-8859-9', 'latin5', 'windows-1254'], | 40 encodings: ['iso-8859-9', 'latin5', 'windows-1254'], |
| 41 encoded: ['%80', '%9F', '%FD'], | 41 encoded: ['%80', '%9F', '%FD'], |
| 42 unicode: ['U+20AC', 'U+0178', 'U+0131'] | 42 unicode: ['U+20AC', 'U+0178', 'U+0131'] |
| 43 }; | 43 }; |
| 44 | 44 |
| 45 batchTestDecode(turkish); | 45 batchTestDecode(turkish); |
| 46 | 46 |
| 47 // FIXME: Have to add tests for Euro and a few new characters added to ISO-8859-
x | 47 // FIXME: Have to add tests for Euro and a few new characters added to ISO-8859-
x |
| 48 // that are NOT subsets of the corresponding Windows codepages. For instance, | 48 // that are NOT subsets of the corresponding Windows codepages. For instance, |
| 49 // ISO-8859-7:2003 has Euro at 0xA4 and a couple of other new characters. | 49 // ISO-8859-7:2003 has Euro at 0xA4 and a couple of other new characters. |
| 50 // ICU 3.8.x or later has them. Perhaps, we need to have a separate test that | 50 // ICU 3.8.x or later has them. Perhaps, we need to have a separate test that |
| 51 // can be enabled only with modern ICU. | 51 // can be enabled only with modern ICU. |
| 52 | 52 |
| 53 // Baltic encodings fine points. | 53 // Baltic encodings fine points. |
| 54 testDecode('ISO-8859-13', '%A1', 'U+201D'); | 54 testDecode('ISO-8859-13', '%A1', 'U+201D'); |
| 55 testDecode('ISO-8859-13', '%A5', 'U+201E'); | 55 testDecode('ISO-8859-13', '%A5', 'U+201E'); |
| 56 testDecode('ISO-8859-13', '%B4', 'U+201C'); | 56 testDecode('ISO-8859-13', '%B4', 'U+201C'); |
| 57 testDecode('ISO-8859-13', '%FF', 'U+2019'); | 57 testDecode('ISO-8859-13', '%FF', 'U+2019'); |
| (...skipping 40 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 98 testDecode('UTF-16', '%69%D8%D6%DE', 'U+D869/U+DED6'); | 98 testDecode('UTF-16', '%69%D8%D6%DE', 'U+D869/U+DED6'); |
| 99 testDecode('ISO-10646-UCS-2', '%69%D8%D6%DE', 'U+D869/U+DED6'); | 99 testDecode('ISO-10646-UCS-2', '%69%D8%D6%DE', 'U+D869/U+DED6'); |
| 100 testDecode('UCS-2', '%69%D8%D6%DE', 'U+D869/U+DED6'); | 100 testDecode('UCS-2', '%69%D8%D6%DE', 'U+D869/U+DED6'); |
| 101 testDecode('Unicode', '%69%D8%D6%DE', 'U+D869/U+DED6'); | 101 testDecode('Unicode', '%69%D8%D6%DE', 'U+D869/U+DED6'); |
| 102 testDecode('csUnicode', '%69%D8%D6%DE', 'U+D869/U+DED6'); | 102 testDecode('csUnicode', '%69%D8%D6%DE', 'U+D869/U+DED6'); |
| 103 | 103 |
| 104 // UTF-16BE and variants. | 104 // UTF-16BE and variants. |
| 105 testDecode('UTF-16BE', '%D8%69%DE%D6', 'U+D869/U+DED6'); | 105 testDecode('UTF-16BE', '%D8%69%DE%D6', 'U+D869/U+DED6'); |
| 106 testDecode('unicodeFFFE', '%D8%69%DE%D6', 'U+D869/U+DED6'); | 106 testDecode('unicodeFFFE', '%D8%69%DE%D6', 'U+D869/U+DED6'); |
| 107 | 107 |
| 108 // Replacement encodings should decode as replacement (U+FFFD) then EOF |
| 109 testDecode("csiso2022kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 110 testDecode("hz-gb-2312", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 111 testDecode("iso-2022-cn", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 112 testDecode("iso-2022-cn-ext", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 113 testDecode("iso-2022-kr", "%41%42%43%61%62%63%31%32%33%A0", "U+FFFD"); |
| 114 |
| 108 </script> | 115 </script> |
| 109 </body> | 116 </body> |
| 110 </html> | 117 </html> |
| OLD | NEW |