OLD | NEW |
(Empty) | |
| 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. |
| 4 |
| 5 library utf8_test; |
| 6 import "package:expect/expect.dart"; |
| 7 import 'dart:convert'; |
| 8 |
| 9 List<int> encode(String str) { |
| 10 List<int> bytes; |
| 11 ChunkedConversionSink byteSink = |
| 12 new ByteConversionSink.withCallback((result) => bytes = result); |
| 13 var stringConversionSink = new Utf8Encoder().startChunkedConversion(byteSink); |
| 14 stringConversionSink.add(str); |
| 15 stringConversionSink.close(); |
| 16 return bytes; |
| 17 } |
| 18 |
| 19 List<int> encode2(String str) { |
| 20 List<int> bytes; |
| 21 ChunkedConversionSink byteSink = |
| 22 new ByteConversionSink.withCallback((result) => bytes = result); |
| 23 var stringConversionSink = new Utf8Encoder().startChunkedConversion(byteSink); |
| 24 ClosableStringSink stringSink = stringConversionSink.asStringSink(); |
| 25 stringSink.write(str); |
| 26 stringSink.close(); |
| 27 return bytes; |
| 28 } |
| 29 |
| 30 List<int> encode3(String str) { |
| 31 List<int> bytes; |
| 32 ChunkedConversionSink byteSink = |
| 33 new ByteConversionSink.withCallback((result) => bytes = result); |
| 34 var stringConversionSink = new Utf8Encoder().startChunkedConversion(byteSink); |
| 35 ClosableStringSink stringSink = stringConversionSink.asStringSink(); |
| 36 str.codeUnits.forEach(stringSink.writeCharCode); |
| 37 stringSink.close(); |
| 38 return bytes; |
| 39 } |
| 40 |
| 41 List<int> encode4(String str) { |
| 42 List<int> bytes; |
| 43 ChunkedConversionSink byteSink = |
| 44 new ByteConversionSink.withCallback((result) => bytes = result); |
| 45 var stringConversionSink = new Utf8Encoder().startChunkedConversion(byteSink); |
| 46 ClosableStringSink stringSink = stringConversionSink.asStringSink(); |
| 47 str.runes.forEach(stringSink.writeCharCode); |
| 48 stringSink.close(); |
| 49 return bytes; |
| 50 } |
| 51 |
| 52 List<int> encode5(String str) { |
| 53 List<int> bytes; |
| 54 ChunkedConversionSink byteSink = |
| 55 new ByteConversionSink.withCallback((result) => bytes = result); |
| 56 var stringConversionSink = new Utf8Encoder().startChunkedConversion(byteSink); |
| 57 ByteConversionSink inputByteSink = stringConversionSink.asUtf8Sink(false); |
| 58 List<int> tmpBytes = UTF8.encode(str); |
| 59 inputByteSink.add(tmpBytes); |
| 60 inputByteSink.close(); |
| 61 return bytes; |
| 62 } |
| 63 |
| 64 List<int> encode6(String str) { |
| 65 List<int> bytes; |
| 66 ChunkedConversionSink byteSink = |
| 67 new ByteConversionSink.withCallback((result) => bytes = result); |
| 68 var stringConversionSink = new Utf8Encoder().startChunkedConversion(byteSink); |
| 69 ByteConversionSink inputByteSink = stringConversionSink.asUtf8Sink(false); |
| 70 List<int> tmpBytes = UTF8.encode(str); |
| 71 tmpBytes.forEach((b) => inputByteSink.addSlice([0, b, 1], 1, 2, false)); |
| 72 inputByteSink.close(); |
| 73 return bytes; |
| 74 } |
| 75 |
| 76 List<int> encode7(String str) { |
| 77 List<int> bytes; |
| 78 ChunkedConversionSink byteSink = |
| 79 new ByteConversionSink.withCallback((result) => bytes = result); |
| 80 var stringConversionSink = new Utf8Encoder().startChunkedConversion(byteSink); |
| 81 stringConversionSink.addSlice("1" + str + "2", 1, str.length + 1, false); |
| 82 stringConversionSink.close(); |
| 83 return bytes; |
| 84 } |
| 85 |
| 86 |
| 87 int _nextPowerOf2(v) { |
| 88 assert(v > 0); |
| 89 v--; |
| 90 v |= v >> 1; |
| 91 v |= v >> 2; |
| 92 v |= v >> 4; |
| 93 v |= v >> 8; |
| 94 v |= v >> 16; |
| 95 v++; |
| 96 return v; |
| 97 } |
| 98 |
| 99 runTest(test) { |
| 100 List<int> bytes = test[0]; |
| 101 String string = test[1]; |
| 102 Expect.listEquals(bytes, encode(string)); |
| 103 Expect.listEquals(bytes, encode2(string)); |
| 104 Expect.listEquals(bytes, encode3(string)); |
| 105 Expect.listEquals(bytes, encode4(string)); |
| 106 Expect.listEquals(bytes, encode5(string)); |
| 107 Expect.listEquals(bytes, encode6(string)); |
| 108 Expect.listEquals(bytes, encode7(string)); |
| 109 } |
| 110 |
| 111 main() { |
| 112 const LEADING_SURROGATE = 0xd801; |
| 113 const TRAILING_SURROGATE = 0xdc12; |
| 114 const UTF8_ENCODING = const [0xf0, 0x90, 0x90, 0x92]; |
| 115 const UTF8_LEADING = const [0xed, 0xa0, 0x81]; |
| 116 const UTF8_TRAILING = const [0xed, 0xb0, 0x92]; |
| 117 const CHAR_A = 0x61; |
| 118 |
| 119 // Test surrogates at all kinds of locations. |
| 120 var tests = []; |
| 121 List codeUnits = <int>[]; |
| 122 for (int i = 0; i < 2049; i++) { |
| 123 // Invariant: codeUnits[0..i - 1] is filled with CHAR_A (character 'a'). |
| 124 codeUnits.length = i + 1; |
| 125 codeUnits[i] = CHAR_A; |
| 126 |
| 127 // Only test for problem zones, close to powers of two. |
| 128 if (i > 20 && _nextPowerOf2(i - 2) - i > 10) continue; |
| 129 |
| 130 codeUnits[i] = LEADING_SURROGATE; |
| 131 var str = new String.fromCharCodes(codeUnits); |
| 132 var bytes = new List.filled(i + 3, CHAR_A); |
| 133 bytes[i] = UTF8_LEADING[0]; |
| 134 bytes[i + 1] = UTF8_LEADING[1]; |
| 135 bytes[i + 2] = UTF8_LEADING[2]; |
| 136 runTest([bytes, str]); |
| 137 |
| 138 codeUnits[i] = TRAILING_SURROGATE; |
| 139 str = new String.fromCharCodes(codeUnits); |
| 140 bytes = new List.filled(i + 3, CHAR_A); |
| 141 bytes[i] = UTF8_TRAILING[0]; |
| 142 bytes[i + 1] = UTF8_TRAILING[1]; |
| 143 bytes[i + 2] = UTF8_TRAILING[2]; |
| 144 runTest([bytes, str]); |
| 145 |
| 146 codeUnits.length = i + 2; |
| 147 codeUnits[i] = LEADING_SURROGATE; |
| 148 codeUnits[i + 1] = TRAILING_SURROGATE; |
| 149 str = new String.fromCharCodes(codeUnits); |
| 150 bytes = new List.filled(i + 4, CHAR_A); |
| 151 bytes[i] = UTF8_ENCODING[0]; |
| 152 bytes[i + 1] = UTF8_ENCODING[1]; |
| 153 bytes[i + 2] = UTF8_ENCODING[2]; |
| 154 bytes[i + 3] = UTF8_ENCODING[3]; |
| 155 runTest([bytes, str]); |
| 156 |
| 157 codeUnits[i] = TRAILING_SURROGATE; |
| 158 codeUnits[i + 1] = TRAILING_SURROGATE; |
| 159 str = new String.fromCharCodes(codeUnits); |
| 160 bytes = new List.filled(i + 6, CHAR_A); |
| 161 bytes[i] = UTF8_TRAILING[0]; |
| 162 bytes[i + 1] = UTF8_TRAILING[1]; |
| 163 bytes[i + 2] = UTF8_TRAILING[2]; |
| 164 bytes[i + 3] = UTF8_TRAILING[0]; |
| 165 bytes[i + 4] = UTF8_TRAILING[1]; |
| 166 bytes[i + 5] = UTF8_TRAILING[2]; |
| 167 runTest([bytes, str]); |
| 168 |
| 169 codeUnits[i] = LEADING_SURROGATE; |
| 170 codeUnits[i + 1] = LEADING_SURROGATE; |
| 171 str = new String.fromCharCodes(codeUnits); |
| 172 bytes = new List.filled(i + 6, CHAR_A); |
| 173 bytes[i] = UTF8_LEADING[0]; |
| 174 bytes[i + 1] = UTF8_LEADING[1]; |
| 175 bytes[i + 2] = UTF8_LEADING[2]; |
| 176 bytes[i + 3] = UTF8_LEADING[0]; |
| 177 bytes[i + 4] = UTF8_LEADING[1]; |
| 178 bytes[i + 5] = UTF8_LEADING[2]; |
| 179 runTest([bytes, str]); |
| 180 |
| 181 codeUnits[i] = TRAILING_SURROGATE; |
| 182 codeUnits[i + 1] = LEADING_SURROGATE; |
| 183 str = new String.fromCharCodes(codeUnits); |
| 184 bytes = new List.filled(i + 6, CHAR_A); |
| 185 bytes[i] = UTF8_TRAILING[0]; |
| 186 bytes[i + 1] = UTF8_TRAILING[1]; |
| 187 bytes[i + 2] = UTF8_TRAILING[2]; |
| 188 bytes[i + 3] = UTF8_LEADING[0]; |
| 189 bytes[i + 4] = UTF8_LEADING[1]; |
| 190 bytes[i + 5] = UTF8_LEADING[2]; |
| 191 runTest([bytes, str]); |
| 192 |
| 193 codeUnits.length = i + 3; |
| 194 codeUnits[i] = LEADING_SURROGATE; |
| 195 codeUnits[i + 1] = TRAILING_SURROGATE; |
| 196 codeUnits[i + 2] = CHAR_A; // Add trailing 'a'. |
| 197 str = new String.fromCharCodes(codeUnits); |
| 198 bytes = new List.filled(i + 5, CHAR_A); |
| 199 bytes[i] = UTF8_ENCODING[0]; |
| 200 bytes[i + 1] = UTF8_ENCODING[1]; |
| 201 bytes[i + 2] = UTF8_ENCODING[2]; |
| 202 bytes[i + 3] = UTF8_ENCODING[3]; |
| 203 // No need to assign the 'a' character. The whole list is already filled |
| 204 // with it. |
| 205 runTest([bytes, str]); |
| 206 |
| 207 codeUnits[i] = TRAILING_SURROGATE; |
| 208 codeUnits[i + 1] = TRAILING_SURROGATE; |
| 209 codeUnits[i + 2] = CHAR_A; // Add trailing 'a'. |
| 210 str = new String.fromCharCodes(codeUnits); |
| 211 bytes = new List.filled(i + 7, CHAR_A); |
| 212 bytes[i] = UTF8_TRAILING[0]; |
| 213 bytes[i + 1] = UTF8_TRAILING[1]; |
| 214 bytes[i + 2] = UTF8_TRAILING[2]; |
| 215 bytes[i + 3] = UTF8_TRAILING[0]; |
| 216 bytes[i + 4] = UTF8_TRAILING[1]; |
| 217 bytes[i + 5] = UTF8_TRAILING[2]; |
| 218 runTest([bytes, str]); |
| 219 |
| 220 codeUnits[i] = LEADING_SURROGATE; |
| 221 codeUnits[i + 1] = LEADING_SURROGATE; |
| 222 codeUnits[i + 2] = CHAR_A; // Add trailing 'a'. |
| 223 str = new String.fromCharCodes(codeUnits); |
| 224 bytes = new List.filled(i + 7, CHAR_A); |
| 225 bytes[i] = UTF8_LEADING[0]; |
| 226 bytes[i + 1] = UTF8_LEADING[1]; |
| 227 bytes[i + 2] = UTF8_LEADING[2]; |
| 228 bytes[i + 3] = UTF8_LEADING[0]; |
| 229 bytes[i + 4] = UTF8_LEADING[1]; |
| 230 bytes[i + 5] = UTF8_LEADING[2]; |
| 231 runTest([bytes, str]); |
| 232 |
| 233 codeUnits[i] = TRAILING_SURROGATE; |
| 234 codeUnits[i + 1] = LEADING_SURROGATE; |
| 235 codeUnits[i + 2] = CHAR_A; // Add trailing 'a'. |
| 236 str = new String.fromCharCodes(codeUnits); |
| 237 bytes = new List.filled(i + 7, CHAR_A); |
| 238 bytes[i] = UTF8_TRAILING[0]; |
| 239 bytes[i + 1] = UTF8_TRAILING[1]; |
| 240 bytes[i + 2] = UTF8_TRAILING[2]; |
| 241 bytes[i + 3] = UTF8_LEADING[0]; |
| 242 bytes[i + 4] = UTF8_LEADING[1]; |
| 243 bytes[i + 5] = UTF8_LEADING[2]; |
| 244 runTest([bytes, str]); |
| 245 |
| 246 // Make sure the invariant is correct. |
| 247 codeUnits[i] = CHAR_A; |
| 248 } |
| 249 } |
OLD | NEW |