| OLD | NEW |
| (Empty) |
| 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file | |
| 2 // for details. All rights reserved. Use of this source code is governed by a | |
| 3 // BSD-style license that can be found in the LICENSE file. | |
| 4 | |
| 5 part of utf; | |
| 6 | |
| 7 // TODO(jmesserly): would be nice to have this on String (dartbug.com/6501). | |
| 8 /** | |
| 9 * Provide a list of Unicode codepoints for a given string. | |
| 10 */ | |
| 11 List<int> stringToCodepoints(String str) { | |
| 12 // Note: str.codeUnits gives us 16-bit code units on all Dart implementations. | |
| 13 // So we need to convert. | |
| 14 return _utf16CodeUnitsToCodepoints(str.codeUnits); | |
| 15 } | |
| 16 | |
| 17 /** | |
| 18 * Generate a string from the provided Unicode codepoints. | |
| 19 * | |
| 20 * *Deprecated* Use [String.fromCharCodes] instead. | |
| 21 */ | |
| 22 String codepointsToString(List<int> codepoints) { | |
| 23 return new String.fromCharCodes(codepoints); | |
| 24 } | |
| 25 | |
| 26 /** | |
| 27 * An Iterator<int> of codepoints built on an Iterator of UTF-16 code units. | |
| 28 * The parameters can override the default Unicode replacement character. Set | |
| 29 * the replacementCharacter to null to throw an ArgumentError | |
| 30 * rather than replace the bad value. | |
| 31 */ | |
| 32 class Utf16CodeUnitDecoder implements Iterator<int> { | |
| 33 final _ListRangeIterator utf16CodeUnitIterator; | |
| 34 final int replacementCodepoint; | |
| 35 int _current = null; | |
| 36 | |
| 37 Utf16CodeUnitDecoder(List<int> utf16CodeUnits, [int offset = 0, int length, | |
| 38 int this.replacementCodepoint = | |
| 39 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | |
| 40 utf16CodeUnitIterator = | |
| 41 (new _ListRange(utf16CodeUnits, offset, length)).iterator; | |
| 42 | |
| 43 Utf16CodeUnitDecoder.fromListRangeIterator( | |
| 44 _ListRangeIterator this.utf16CodeUnitIterator, | |
| 45 int this.replacementCodepoint); | |
| 46 | |
| 47 Iterator<int> get iterator => this; | |
| 48 | |
| 49 int get current => _current; | |
| 50 | |
| 51 bool moveNext() { | |
| 52 _current = null; | |
| 53 if (!utf16CodeUnitIterator.moveNext()) return false; | |
| 54 | |
| 55 int value = utf16CodeUnitIterator.current; | |
| 56 if (value < 0) { | |
| 57 if (replacementCodepoint != null) { | |
| 58 _current = replacementCodepoint; | |
| 59 } else { | |
| 60 throw new ArgumentError( | |
| 61 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | |
| 62 } | |
| 63 } else if (value < UNICODE_UTF16_RESERVED_LO || | |
| 64 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | |
| 65 // transfer directly | |
| 66 _current = value; | |
| 67 } else if (value < UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | |
| 68 utf16CodeUnitIterator.moveNext()) { | |
| 69 // merge surrogate pair | |
| 70 int nextValue = utf16CodeUnitIterator.current; | |
| 71 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_1_BASE && | |
| 72 nextValue <= UNICODE_UTF16_RESERVED_HI) { | |
| 73 value = (value - UNICODE_UTF16_SURROGATE_UNIT_0_BASE) << 10; | |
| 74 value += UNICODE_UTF16_OFFSET + | |
| 75 (nextValue - UNICODE_UTF16_SURROGATE_UNIT_1_BASE); | |
| 76 _current = value; | |
| 77 } else { | |
| 78 if (nextValue >= UNICODE_UTF16_SURROGATE_UNIT_0_BASE && | |
| 79 nextValue < UNICODE_UTF16_SURROGATE_UNIT_1_BASE) { | |
| 80 utf16CodeUnitIterator.backup(); | |
| 81 } | |
| 82 if (replacementCodepoint != null) { | |
| 83 _current = replacementCodepoint; | |
| 84 } else { | |
| 85 throw new ArgumentError( | |
| 86 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | |
| 87 } | |
| 88 } | |
| 89 } else if (replacementCodepoint != null) { | |
| 90 _current = replacementCodepoint; | |
| 91 } else { | |
| 92 throw new ArgumentError( | |
| 93 "Invalid UTF16 at ${utf16CodeUnitIterator.position}"); | |
| 94 } | |
| 95 return true; | |
| 96 } | |
| 97 } | |
| 98 | |
| 99 /** | |
| 100 * Encode code points as UTF16 code units. | |
| 101 */ | |
| 102 List<int> _codepointsToUtf16CodeUnits( | |
| 103 List<int> codepoints, | |
| 104 [int offset = 0, | |
| 105 int length, | |
| 106 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 107 | |
| 108 _ListRange listRange = new _ListRange(codepoints, offset, length); | |
| 109 int encodedLength = 0; | |
| 110 for (int value in listRange) { | |
| 111 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || | |
| 112 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | |
| 113 encodedLength++; | |
| 114 } else if (value > UNICODE_PLANE_ONE_MAX && | |
| 115 value <= UNICODE_VALID_RANGE_MAX) { | |
| 116 encodedLength += 2; | |
| 117 } else { | |
| 118 encodedLength++; | |
| 119 } | |
| 120 } | |
| 121 | |
| 122 List<int> codeUnitsBuffer = new List<int>(encodedLength); | |
| 123 int j = 0; | |
| 124 for (int value in listRange) { | |
| 125 if ((value >= 0 && value < UNICODE_UTF16_RESERVED_LO) || | |
| 126 (value > UNICODE_UTF16_RESERVED_HI && value <= UNICODE_PLANE_ONE_MAX)) { | |
| 127 codeUnitsBuffer[j++] = value; | |
| 128 } else if (value > UNICODE_PLANE_ONE_MAX && | |
| 129 value <= UNICODE_VALID_RANGE_MAX) { | |
| 130 int base = value - UNICODE_UTF16_OFFSET; | |
| 131 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_0_BASE + | |
| 132 ((base & UNICODE_UTF16_HI_MASK) >> 10); | |
| 133 codeUnitsBuffer[j++] = UNICODE_UTF16_SURROGATE_UNIT_1_BASE + | |
| 134 (base & UNICODE_UTF16_LO_MASK); | |
| 135 } else if (replacementCodepoint != null) { | |
| 136 codeUnitsBuffer[j++] = replacementCodepoint; | |
| 137 } else { | |
| 138 throw new ArgumentError("Invalid encoding"); | |
| 139 } | |
| 140 } | |
| 141 return codeUnitsBuffer; | |
| 142 } | |
| 143 | |
| 144 /** | |
| 145 * Decodes the utf16 codeunits to codepoints. | |
| 146 */ | |
| 147 List<int> _utf16CodeUnitsToCodepoints( | |
| 148 List<int> utf16CodeUnits, [int offset = 0, int length, | |
| 149 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 150 _ListRangeIterator source = | |
| 151 (new _ListRange(utf16CodeUnits, offset, length)).iterator; | |
| 152 Utf16CodeUnitDecoder decoder = new Utf16CodeUnitDecoder | |
| 153 .fromListRangeIterator(source, replacementCodepoint); | |
| 154 List<int> codepoints = new List<int>(source.remaining); | |
| 155 int i = 0; | |
| 156 while (decoder.moveNext()) { | |
| 157 codepoints[i++] = decoder.current; | |
| 158 } | |
| 159 if (i == codepoints.length) { | |
| 160 return codepoints; | |
| 161 } else { | |
| 162 List<int> codepointTrunc = new List<int>(i); | |
| 163 codepointTrunc.setRange(0, i, codepoints); | |
| 164 return codepointTrunc; | |
| 165 } | |
| 166 } | |
| 167 | |
| 168 /** | |
| 169 * Decodes the UTF-16 bytes as an iterable. Thus, the consumer can only convert | |
| 170 * as much of the input as needed. Determines the byte order from the BOM, | |
| 171 * or uses big-endian as a default. This method always strips a leading BOM. | |
| 172 * Set the [replacementCodepoint] to null to throw an ArgumentError | |
| 173 * rather than replace the bad value. The default value for | |
| 174 * [replacementCodepoint] is U+FFFD. | |
| 175 */ | |
| 176 IterableUtf16Decoder decodeUtf16AsIterable(List<int> bytes, [int offset = 0, | |
| 177 int length, int replacementCodepoint = | |
| 178 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 179 return new IterableUtf16Decoder._( | |
| 180 () => new Utf16BytesToCodeUnitsDecoder(bytes, offset, length, | |
| 181 replacementCodepoint), replacementCodepoint); | |
| 182 } | |
| 183 | |
| 184 /** | |
| 185 * Decodes the UTF-16BE bytes as an iterable. Thus, the consumer can only | |
| 186 * convert as much of the input as needed. This method strips a leading BOM by | |
| 187 * default, but can be overridden by setting the optional parameter [stripBom] | |
| 188 * to false. Set the [replacementCodepoint] to null to throw an | |
| 189 * ArgumentError rather than replace the bad value. The default | |
| 190 * value for the [replacementCodepoint] is U+FFFD. | |
| 191 */ | |
| 192 IterableUtf16Decoder decodeUtf16beAsIterable(List<int> bytes, [int offset = 0, | |
| 193 int length, bool stripBom = true, int replacementCodepoint = | |
| 194 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 195 return new IterableUtf16Decoder._( | |
| 196 () => new Utf16beBytesToCodeUnitsDecoder(bytes, offset, length, stripBom, | |
| 197 replacementCodepoint), replacementCodepoint); | |
| 198 } | |
| 199 | |
| 200 /** | |
| 201 * Decodes the UTF-16LE bytes as an iterable. Thus, the consumer can only | |
| 202 * convert as much of the input as needed. This method strips a leading BOM by | |
| 203 * default, but can be overridden by setting the optional parameter [stripBom] | |
| 204 * to false. Set the [replacementCodepoint] to null to throw an | |
| 205 * ArgumentError rather than replace the bad value. The default | |
| 206 * value for the [replacementCodepoint] is U+FFFD. | |
| 207 */ | |
| 208 IterableUtf16Decoder decodeUtf16leAsIterable(List<int> bytes, [int offset = 0, | |
| 209 int length, bool stripBom = true, int replacementCodepoint = | |
| 210 UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 211 return new IterableUtf16Decoder._( | |
| 212 () => new Utf16leBytesToCodeUnitsDecoder(bytes, offset, length, stripBom, | |
| 213 replacementCodepoint), replacementCodepoint); | |
| 214 } | |
| 215 | |
| 216 /** | |
| 217 * Produce a String from a sequence of UTF-16 encoded bytes. This method always | |
| 218 * strips a leading BOM. Set the [replacementCodepoint] to null to throw an | |
| 219 * ArgumentError rather than replace the bad value. The default | |
| 220 * value for the [replacementCodepoint] is U+FFFD. | |
| 221 */ | |
| 222 String decodeUtf16(List<int> bytes, [int offset = 0, int length, | |
| 223 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 224 Utf16BytesToCodeUnitsDecoder decoder = new Utf16BytesToCodeUnitsDecoder(bytes, | |
| 225 offset, length, replacementCodepoint); | |
| 226 List<int> codeunits = decoder.decodeRest(); | |
| 227 return new String.fromCharCodes( | |
| 228 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | |
| 229 } | |
| 230 | |
| 231 /** | |
| 232 * Produce a String from a sequence of UTF-16BE encoded bytes. This method | |
| 233 * strips a leading BOM by default, but can be overridden by setting the | |
| 234 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to | |
| 235 * null to throw an ArgumentError rather than replace the bad value. | |
| 236 * The default value for the [replacementCodepoint] is U+FFFD. | |
| 237 */ | |
| 238 String decodeUtf16be(List<int> bytes, [int offset = 0, int length, | |
| 239 bool stripBom = true, | |
| 240 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 241 List<int> codeunits = (new Utf16beBytesToCodeUnitsDecoder(bytes, offset, | |
| 242 length, stripBom, replacementCodepoint)).decodeRest(); | |
| 243 return new String.fromCharCodes( | |
| 244 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | |
| 245 } | |
| 246 | |
| 247 /** | |
| 248 * Produce a String from a sequence of UTF-16LE encoded bytes. This method | |
| 249 * strips a leading BOM by default, but can be overridden by setting the | |
| 250 * optional parameter [stripBom] to false. Set the [replacementCodepoint] to | |
| 251 * null to throw an ArgumentError rather than replace the bad value. | |
| 252 * The default value for the [replacementCodepoint] is U+FFFD. | |
| 253 */ | |
| 254 String decodeUtf16le(List<int> bytes, [int offset = 0, int length, | |
| 255 bool stripBom = true, | |
| 256 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 257 List<int> codeunits = (new Utf16leBytesToCodeUnitsDecoder(bytes, offset, | |
| 258 length, stripBom, replacementCodepoint)).decodeRest(); | |
| 259 return new String.fromCharCodes( | |
| 260 _utf16CodeUnitsToCodepoints(codeunits, 0, null, replacementCodepoint)); | |
| 261 } | |
| 262 | |
| 263 /** | |
| 264 * Produce a list of UTF-16 encoded bytes. This method prefixes the resulting | |
| 265 * bytes with a big-endian byte-order-marker. | |
| 266 */ | |
| 267 List<int> encodeUtf16(String str) => | |
| 268 encodeUtf16be(str, true); | |
| 269 | |
| 270 /** | |
| 271 * Produce a list of UTF-16BE encoded bytes. By default, this method produces | |
| 272 * UTF-16BE bytes with no BOM. | |
| 273 */ | |
| 274 List<int> encodeUtf16be(String str, [bool writeBOM = false]) { | |
| 275 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str); | |
| 276 List<int> encoding = | |
| 277 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0)); | |
| 278 int i = 0; | |
| 279 if (writeBOM) { | |
| 280 encoding[i++] = UNICODE_UTF_BOM_HI; | |
| 281 encoding[i++] = UNICODE_UTF_BOM_LO; | |
| 282 } | |
| 283 for (int unit in utf16CodeUnits) { | |
| 284 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8; | |
| 285 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; | |
| 286 } | |
| 287 return encoding; | |
| 288 } | |
| 289 | |
| 290 /** | |
| 291 * Produce a list of UTF-16LE encoded bytes. By default, this method produces | |
| 292 * UTF-16LE bytes with no BOM. | |
| 293 */ | |
| 294 List<int> encodeUtf16le(String str, [bool writeBOM = false]) { | |
| 295 List<int> utf16CodeUnits = _stringToUtf16CodeUnits(str); | |
| 296 List<int> encoding = | |
| 297 new List<int>(2 * utf16CodeUnits.length + (writeBOM ? 2 : 0)); | |
| 298 int i = 0; | |
| 299 if (writeBOM) { | |
| 300 encoding[i++] = UNICODE_UTF_BOM_LO; | |
| 301 encoding[i++] = UNICODE_UTF_BOM_HI; | |
| 302 } | |
| 303 for (int unit in utf16CodeUnits) { | |
| 304 encoding[i++] = unit & UNICODE_BYTE_ZERO_MASK; | |
| 305 encoding[i++] = (unit & UNICODE_BYTE_ONE_MASK) >> 8; | |
| 306 } | |
| 307 return encoding; | |
| 308 } | |
| 309 | |
| 310 /** | |
| 311 * Identifies whether a List of bytes starts (based on offset) with a | |
| 312 * byte-order marker (BOM). | |
| 313 */ | |
| 314 bool hasUtf16Bom(List<int> utf32EncodedBytes, [int offset = 0, int length]) { | |
| 315 return hasUtf16beBom(utf32EncodedBytes, offset, length) || | |
| 316 hasUtf16leBom(utf32EncodedBytes, offset, length); | |
| 317 } | |
| 318 | |
| 319 /** | |
| 320 * Identifies whether a List of bytes starts (based on offset) with a | |
| 321 * big-endian byte-order marker (BOM). | |
| 322 */ | |
| 323 bool hasUtf16beBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) { | |
| 324 int end = length != null ? offset + length : utf16EncodedBytes.length; | |
| 325 return (offset + 2) <= end && | |
| 326 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_HI && | |
| 327 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_LO; | |
| 328 } | |
| 329 | |
| 330 /** | |
| 331 * Identifies whether a List of bytes starts (based on offset) with a | |
| 332 * little-endian byte-order marker (BOM). | |
| 333 */ | |
| 334 bool hasUtf16leBom(List<int> utf16EncodedBytes, [int offset = 0, int length]) { | |
| 335 int end = length != null ? offset + length : utf16EncodedBytes.length; | |
| 336 return (offset + 2) <= end && | |
| 337 utf16EncodedBytes[offset] == UNICODE_UTF_BOM_LO && | |
| 338 utf16EncodedBytes[offset + 1] == UNICODE_UTF_BOM_HI; | |
| 339 } | |
| 340 | |
| 341 List<int> _stringToUtf16CodeUnits(String str) { | |
| 342 return _codepointsToUtf16CodeUnits(str.codeUnits); | |
| 343 } | |
| 344 | |
| 345 typedef _ListRangeIterator _CodeUnitsProvider(); | |
| 346 | |
| 347 /** | |
| 348 * Return type of [decodeUtf16AsIterable] and variants. The Iterable type | |
| 349 * provides an iterator on demand and the iterator will only translate bytes | |
| 350 * as requested by the user of the iterator. (Note: results are not cached.) | |
| 351 */ | |
| 352 // TODO(floitsch): Consider removing the extend and switch to implements since | |
| 353 // that's cheaper to allocate. | |
| 354 class IterableUtf16Decoder extends IterableBase<int> { | |
| 355 final _CodeUnitsProvider codeunitsProvider; | |
| 356 final int replacementCodepoint; | |
| 357 | |
| 358 IterableUtf16Decoder._(this.codeunitsProvider, this.replacementCodepoint); | |
| 359 | |
| 360 Utf16CodeUnitDecoder get iterator => | |
| 361 new Utf16CodeUnitDecoder.fromListRangeIterator(codeunitsProvider(), | |
| 362 replacementCodepoint); | |
| 363 } | |
| 364 | |
| 365 /** | |
| 366 * Convert UTF-16 encoded bytes to UTF-16 code units by grouping 1-2 bytes | |
| 367 * to produce the code unit (0-(2^16)-1). Relies on BOM to determine | |
| 368 * endian-ness, and defaults to BE. | |
| 369 */ | |
| 370 abstract class Utf16BytesToCodeUnitsDecoder implements _ListRangeIterator { | |
| 371 final _ListRangeIterator utf16EncodedBytesIterator; | |
| 372 final int replacementCodepoint; | |
| 373 int _current = null; | |
| 374 | |
| 375 Utf16BytesToCodeUnitsDecoder._fromListRangeIterator( | |
| 376 this.utf16EncodedBytesIterator, this.replacementCodepoint); | |
| 377 | |
| 378 factory Utf16BytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | |
| 379 int offset = 0, int length, | |
| 380 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) { | |
| 381 if (length == null) { | |
| 382 length = utf16EncodedBytes.length - offset; | |
| 383 } | |
| 384 if (hasUtf16beBom(utf16EncodedBytes, offset, length)) { | |
| 385 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, | |
| 386 length - 2, false, replacementCodepoint); | |
| 387 } else if (hasUtf16leBom(utf16EncodedBytes, offset, length)) { | |
| 388 return new Utf16leBytesToCodeUnitsDecoder(utf16EncodedBytes, offset + 2, | |
| 389 length - 2, false, replacementCodepoint); | |
| 390 } else { | |
| 391 return new Utf16beBytesToCodeUnitsDecoder(utf16EncodedBytes, offset, | |
| 392 length, false, replacementCodepoint); | |
| 393 } | |
| 394 } | |
| 395 | |
| 396 /** | |
| 397 * Provides a fast way to decode the rest of the source bytes in a single | |
| 398 * call. This method trades memory for improved speed in that it potentially | |
| 399 * over-allocates the List containing results. | |
| 400 */ | |
| 401 List<int> decodeRest() { | |
| 402 List<int> codeunits = new List<int>(remaining); | |
| 403 int i = 0; | |
| 404 while (moveNext()) { | |
| 405 codeunits[i++] = current; | |
| 406 } | |
| 407 if (i == codeunits.length) { | |
| 408 return codeunits; | |
| 409 } else { | |
| 410 List<int> truncCodeunits = new List<int>(i); | |
| 411 truncCodeunits.setRange(0, i, codeunits); | |
| 412 return truncCodeunits; | |
| 413 } | |
| 414 } | |
| 415 | |
| 416 int get current => _current; | |
| 417 | |
| 418 bool moveNext() { | |
| 419 _current = null; | |
| 420 int remaining = utf16EncodedBytesIterator.remaining; | |
| 421 if (remaining == 0) { | |
| 422 _current = null; | |
| 423 return false; | |
| 424 } | |
| 425 if (remaining == 1) { | |
| 426 utf16EncodedBytesIterator.moveNext(); | |
| 427 if (replacementCodepoint != null) { | |
| 428 _current = replacementCodepoint; | |
| 429 return true; | |
| 430 } else { | |
| 431 throw new ArgumentError( | |
| 432 "Invalid UTF16 at ${utf16EncodedBytesIterator.position}"); | |
| 433 } | |
| 434 } | |
| 435 _current = decode(); | |
| 436 return true; | |
| 437 } | |
| 438 | |
| 439 int get position => utf16EncodedBytesIterator.position ~/ 2; | |
| 440 | |
| 441 void backup([int by = 1]) { | |
| 442 utf16EncodedBytesIterator.backup(2 * by); | |
| 443 } | |
| 444 | |
| 445 int get remaining => (utf16EncodedBytesIterator.remaining + 1) ~/ 2; | |
| 446 | |
| 447 void skip([int count = 1]) { | |
| 448 utf16EncodedBytesIterator.skip(2 * count); | |
| 449 } | |
| 450 | |
| 451 int decode(); | |
| 452 } | |
| 453 | |
| 454 /** | |
| 455 * Convert UTF-16BE encoded bytes to utf16 code units by grouping 1-2 bytes | |
| 456 * to produce the code unit (0-(2^16)-1). | |
| 457 */ | |
| 458 class Utf16beBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { | |
| 459 Utf16beBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | |
| 460 int offset = 0, int length, bool stripBom = true, | |
| 461 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | |
| 462 super._fromListRangeIterator( | |
| 463 (new _ListRange(utf16EncodedBytes, offset, length)).iterator, | |
| 464 replacementCodepoint) { | |
| 465 if (stripBom && hasUtf16beBom(utf16EncodedBytes, offset, length)) { | |
| 466 skip(); | |
| 467 } | |
| 468 } | |
| 469 | |
| 470 int decode() { | |
| 471 utf16EncodedBytesIterator.moveNext(); | |
| 472 int hi = utf16EncodedBytesIterator.current; | |
| 473 utf16EncodedBytesIterator.moveNext(); | |
| 474 int lo = utf16EncodedBytesIterator.current; | |
| 475 return (hi << 8) + lo; | |
| 476 } | |
| 477 } | |
| 478 | |
| 479 /** | |
| 480 * Convert UTF-16LE encoded bytes to utf16 code units by grouping 1-2 bytes | |
| 481 * to produce the code unit (0-(2^16)-1). | |
| 482 */ | |
| 483 class Utf16leBytesToCodeUnitsDecoder extends Utf16BytesToCodeUnitsDecoder { | |
| 484 Utf16leBytesToCodeUnitsDecoder(List<int> utf16EncodedBytes, [ | |
| 485 int offset = 0, int length, bool stripBom = true, | |
| 486 int replacementCodepoint = UNICODE_REPLACEMENT_CHARACTER_CODEPOINT]) : | |
| 487 super._fromListRangeIterator( | |
| 488 (new _ListRange(utf16EncodedBytes, offset, length)).iterator, | |
| 489 replacementCodepoint) { | |
| 490 if (stripBom && hasUtf16leBom(utf16EncodedBytes, offset, length)) { | |
| 491 skip(); | |
| 492 } | |
| 493 } | |
| 494 | |
| 495 int decode() { | |
| 496 utf16EncodedBytesIterator.moveNext(); | |
| 497 int lo = utf16EncodedBytesIterator.current; | |
| 498 utf16EncodedBytesIterator.moveNext(); | |
| 499 int hi = utf16EncodedBytesIterator.current; | |
| 500 return (hi << 8) + lo; | |
| 501 } | |
| 502 } | |
| OLD | NEW |