OLD | NEW |
1 <!DOCTYPE html> | 1 <!DOCTYPE html> |
2 <script src="../../../resources/js-test.js"></script> | 2 <title>Encoding API: UTF encoding round trips</title> |
| 3 <script src="../../../resources/testharness.js"></script> |
| 4 <script src="../../../resources/testharnessreport.js"></script> |
3 <script src="resources/shared.js"></script> | 5 <script src="resources/shared.js"></script> |
4 <script> | 6 <script> |
5 | 7 |
6 description("Sanity check the Encoding API's handling of UTF encodings."); | 8 var BATCH_SIZE = 0x1000; // Convert in batches spanning this many code points. |
7 | 9 var SKIP_SIZE = 0x77; // For efficiency, don't test every code point. |
8 BATCH_SIZE = 0x1000; // Convert in batches spanning this made code points. | |
9 SKIP_SIZE = 0x77; // For efficiency, don't test every code point. | |
10 quiet = true; // Don't log every matching range. | |
11 | 10 |
12 function fromCodePoint(cp) { | 11 function fromCodePoint(cp) { |
13 if (0xd800 <= cp && cp <= 0xdfff) throw new Error('Invalid code point'); | 12 if (0xD800 <= cp && cp <= 0xDFFF) throw new Error('Invalid code point'); |
14 | 13 |
15 if (cp > 0xffff) { | 14 if (cp <= 0xFFFF) |
16 // outside BMP - encode as surrogate pair | 15 return String.fromCharCode(cp); |
17 return String.fromCharCode(0xd800 + ((cp >> 10) & 0x3ff), 0xdc00 + (cp &
0x3ff)); | 16 |
18 } | 17 // outside BMP - encode as surrogate pair |
19 return String.fromCharCode(i); | 18 return String.fromCharCode(0xD800 + ((cp >> 10) & 0x3FF), 0xDC00 + (cp & 0x3
FF)); |
20 } | 19 } |
21 | 20 |
22 function makeBatch(cp) { | 21 function makeBatch(cp) { |
23 var string = ''; | 22 var string = ''; |
24 for (var i = cp; i < cp + BATCH_SIZE && cp < 0x10FFFF; i += SKIP_SIZE) { | 23 for (var i = cp; i < cp + BATCH_SIZE && cp < 0x10FFFF; i += SKIP_SIZE) { |
25 if (0xd800 <= i && i <= 0xdfff) { | 24 if (0xD800 <= i && i <= 0xDFFF) { |
26 // surrogate half | 25 // surrogate half |
27 continue; | 26 continue; |
28 } | 27 } |
29 string += fromCodePoint(i); | 28 string += fromCodePoint(i); |
30 } | 29 } |
31 return string; | 30 return string; |
32 } | 31 } |
33 | 32 |
34 function testEncodeDecode(encoding, min, max) { | |
35 debug(encoding + " - Encode/Decode Range " + cpname(min) + " - " + cpname(ma
x)); | |
36 | |
37 function cpname(n) { | |
38 return 'U+' + ((n <= 0xFFFF) ? | |
39 ('0000' + n.toString(16).toUpperCase()).slice(-4) : | |
40 n.toString(16).toUpperCase()); | |
41 } | |
42 | |
43 for (i = min; i < max; i += BATCH_SIZE) { | |
44 string = makeBatch(i); | |
45 encoded = new TextEncoder(encoding).encode(string); | |
46 decoded = new TextDecoder(encoding).decode(encoded); | |
47 shouldBe("string", "decoded", quiet); | |
48 } | |
49 | |
50 debug("no output means all ranges matched"); | |
51 debug(""); | |
52 } | |
53 | |
54 utf_encodings.forEach(function(encoding) { | 33 utf_encodings.forEach(function(encoding) { |
55 testEncodeDecode(encoding, 0, 0x10FFFF); | 34 test(function() { |
| 35 for (var i = 0; i < 0x10FFFF; i += BATCH_SIZE) { |
| 36 var string = makeBatch(i); |
| 37 var encoded = new TextEncoder(encoding).encode(string); |
| 38 var decoded = new TextDecoder(encoding).decode(encoded); |
| 39 assert_equals(decoded, string); |
| 40 } |
| 41 }, encoding + ' - encode/decode round trip'); |
56 }); | 42 }); |
57 | 43 |
58 | 44 |
59 // Inspired by: | 45 // Inspired by: |
60 // http://ecmanaut.blogspot.com/2006/07/encoding-decoding-utf8-in-javascript.htm
l | 46 // http://ecmanaut.blogspot.com/2006/07/encoding-decoding-utf8-in-javascript.htm
l |
61 function encode_utf8(string) { | 47 function encode_utf8(string) { |
62 var utf8 = unescape(encodeURIComponent(string)); | 48 var utf8 = unescape(encodeURIComponent(string)); |
63 var octets = [], i; | 49 var octets = []; |
64 for (i = 0; i < utf8.length; i += 1) { | 50 for (var i = 0; i < utf8.length; i += 1) |
65 octets.push(utf8.charCodeAt(i)); | 51 octets.push(utf8.charCodeAt(i)); |
66 } | |
67 return octets; | 52 return octets; |
68 } | 53 } |
69 | 54 |
70 function decode_utf8(octets) { | 55 function decode_utf8(octets) { |
71 var utf8 = String.fromCharCode.apply(null, octets); | 56 var utf8 = String.fromCharCode.apply(null, octets); |
72 return decodeURIComponent(escape(utf8)); | 57 return decodeURIComponent(escape(utf8)); |
73 } | 58 } |
74 | 59 |
75 debug("UTF-8 encoding (compare against unescape/encodeURIComponent)"); | 60 test(function() { |
76 for (i = 0; i < 0x10FFFF; i += BATCH_SIZE) { | 61 for (var i = 0; i < 0x10FFFF; i += BATCH_SIZE) { |
77 str = makeBatch(i); | 62 var string = makeBatch(i); |
78 expected = encode_utf8(str); | 63 var expected = encode_utf8(string); |
79 actual = new TextEncoder('UTF-8').encode(str); | 64 var actual = new TextEncoder('UTF-8').encode(string); |
80 shouldBe("actual", "expected", quiet); | 65 assert_array_equals(actual, expected); |
81 } | 66 } |
82 debug("no output means all ranges matched"); | 67 }, 'UTF-8 encoding (compare against unescape/encodeURIComponent)'); |
83 debug(""); | |
84 | 68 |
85 debug("UTF-8 decoding (compare against decodeURIComponent/escape)"); | 69 test(function() { |
86 for (i = 0; i < 0x10FFFF; i += BATCH_SIZE) { | 70 for (var i = 0; i < 0x10FFFF; i += BATCH_SIZE) { |
87 str = makeBatch(i); | 71 var string = makeBatch(i); |
88 encoded = encode_utf8(str); | 72 var encoded = encode_utf8(string); |
89 expected = decode_utf8(encoded); | 73 var expected = decode_utf8(encoded); |
90 actual = new TextDecoder('UTF-8').decode(new Uint8Array(encoded)); | 74 var actual = new TextDecoder('UTF-8').decode(new Uint8Array(encoded)); |
91 shouldBe("actual", "expected", quiet); | 75 assert_equals(actual, expected); |
92 } | 76 } |
93 debug("no output means all ranges matched"); | 77 }, 'UTF-8 decoding (compare against decodeURIComponent/escape)'); |
94 debug(""); | |
95 | 78 |
96 </script> | 79 </script> |
OLD | NEW |