| OLD | NEW |
| 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
| 2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
| 3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
| 4 | 4 |
| 5 part of dart.convert; | 5 part of dart.convert; |
| 6 | 6 |
| 7 /** | 7 /** |
| 8 * An instance of the default implementation of the [Utf8Codec]. | 8 * An instance of the default implementation of the [Utf8Codec]. |
| 9 * | 9 * |
| 10 * This instance provides a convenient access to the most common UTF-8 | 10 * This instance provides a convenient access to the most common UTF-8 |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 55 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); | 55 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); |
| 56 } | 56 } |
| 57 | 57 |
| 58 Converter<String, List<int>> get encoder => new Utf8Encoder(); | 58 Converter<String, List<int>> get encoder => new Utf8Encoder(); |
| 59 Converter<List<int>, String> get decoder { | 59 Converter<List<int>, String> get decoder { |
| 60 return new Utf8Decoder(allowMalformed: _allowMalformed); | 60 return new Utf8Decoder(allowMalformed: _allowMalformed); |
| 61 } | 61 } |
| 62 } | 62 } |
| 63 | 63 |
| 64 /** | 64 /** |
| 65 * A [Utf8Encoder] converts strings to their UTF-8 code units (a list of | 65 * This class converts strings to their UTF-8 code units (a list of |
| 66 * unsigned 8-bit integers). | 66 * unsigned 8-bit integers). |
| 67 */ | 67 */ |
| 68 class Utf8Encoder extends Converter<String, List<int>> { | 68 class Utf8Encoder extends Converter<String, List<int>> { |
| 69 /** | 69 /** |
| 70 * Converts [string] to its UTF-8 code units (a list of | 70 * Converts [string] to its UTF-8 code units (a list of |
| 71 * unsigned 8-bit integers). | 71 * unsigned 8-bit integers). |
| 72 */ | 72 */ |
| 73 List<int> convert(String string) { | 73 List<int> convert(String string) { |
| 74 // Create a new encoder with a length that is guaranteed to be big enough. | 74 // Create a new encoder with a length that is guaranteed to be big enough. |
| 75 // A single code unit uses at most 3 bytes. Two code units at most 4. | 75 // A single code unit uses at most 3 bytes. Two code units at most 4. |
| 76 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(string.length * 3); | 76 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(string.length * 3); |
| 77 int endPosition = encoder._fillBuffer(string, 0, string.length); | 77 int endPosition = encoder._fillBuffer(string, 0, string.length); |
| 78 assert(endPosition >= string.length - 1); | 78 assert(endPosition >= string.length - 1); |
| 79 if (endPosition != string.length) { | 79 if (endPosition != string.length) { |
| 80 int lastCodeUnit = string.codeUnitAt(string.length - 1); | 80 int lastCodeUnit = string.codeUnitAt(string.length - 1); |
| 81 assert(_isLeadSurrogate(lastCodeUnit)); | 81 assert(_isLeadSurrogate(lastCodeUnit)); |
| 82 // We use a non-surrogate as `nextUnit` so that _writeSurrogate just | 82 // We use a non-surrogate as `nextUnit` so that _writeSurrogate just |
| 83 // writes the lead-surrogate. | 83 // writes the lead-surrogate. |
| 84 bool wasCombined = encoder._writeSurrogate(lastCodeUnit, 0); | 84 bool wasCombined = encoder._writeSurrogate(lastCodeUnit, 0); |
| 85 assert(!wasCombined); | 85 assert(!wasCombined); |
| 86 } | 86 } |
| 87 return encoder._buffer.sublist(0, encoder._bufferIndex); | 87 return encoder._buffer.sublist(0, encoder._bufferIndex); |
| 88 } | 88 } |
| 89 |
| 90 StringConversionSink startChunkedConversion(ChunkedConversionSink sink) { |
| 91 ByteConversionSink ByteSink = sink.adaptTo(outputInterface); |
| 92 return new _Utf8EncoderSink(sink); |
| 93 } |
| 94 |
| 95 ChunkedConversionInterface get inputInterface => |
| 96 StringConversionSink.INTERFACE; |
| 97 ChunkedConversionInterface get outputInterface => |
| 98 ByteConversionSink.INTERFACE; |
| 89 } | 99 } |
| 90 | 100 |
| 91 /** | 101 /** |
| 92 * This class encodes Strings to UTF-8 code units (unsigned 8 bit integers). | 102 * This class encodes Strings to UTF-8 code units (unsigned 8 bit integers). |
| 93 */ | 103 */ |
| 94 // TODO(floitsch): make this class public. | 104 // TODO(floitsch): make this class public. |
| 95 class _Utf8Encoder { | 105 class _Utf8Encoder { |
| 96 int _carry = 0; | 106 int _carry = 0; |
| 97 int _bufferIndex = 0; | 107 int _bufferIndex = 0; |
| 98 final List<int> _buffer; | 108 final List<int> _buffer; |
| 99 | 109 |
| 100 static const _DEFAULT_BYTE_BUFFER_SIZE = 1024; | 110 static const _DEFAULT_BYTE_BUFFER_SIZE = 1024; |
| 101 | 111 |
| 102 _Utf8Encoder() : this.withBufferSize(_DEFAULT_BYTE_BUFFER_SIZE); | 112 _Utf8Encoder() : this.withBufferSize(_DEFAULT_BYTE_BUFFER_SIZE); |
| 103 | 113 |
| 104 _Utf8Encoder.withBufferSize(int bufferSize) | 114 _Utf8Encoder.withBufferSize(int bufferSize) |
| 105 // TODO(11971, floitsch): use Uint8List instead of normal lists. | 115 // TODO(11971, floitsch): use Uint8List instead of normal lists. |
| 106 : _buffer = new List<int>(bufferSize); | 116 : _buffer = new List<int>(bufferSize); |
| 107 | 117 |
| 108 /** | 118 /** |
| 109 * Tries to combine the given [leadingSurrogate] with the [nextCodeUnit] and | 119 * Tries to combine the given [leadingSurrogate] with the [nextCodeUnit] and |
| 110 * writes it to [_buffer]. | 120 * writes it to [_buffer]. |
| 111 * | 121 * |
| 112 * Returns true if the [nextCodeUnit] was combined with the | 122 * Returns true if the [nextCodeUnit] was combined with the |
| 113 * [leadingSurrogate]. If it wasn't then nextCodeUnit has not been written | 123 * [leadingSurrogate]. If it wasn't then nextCodeUnit was not a trailing |
| 114 * yet. | 124 * surrogate and has not been written yet. |
| 125 * |
| 126 * It is safe to pass 0 for [nextCodeUnit] in which case only the leading |
| 127 * surrogate is written. |
| 115 */ | 128 */ |
| 116 bool _writeSurrogate(int leadingSurrogate, int nextCodeUnit) { | 129 bool _writeSurrogate(int leadingSurrogate, int nextCodeUnit) { |
| 117 if (_isTailSurrogate(nextCodeUnit)) { | 130 if (_isTailSurrogate(nextCodeUnit)) { |
| 118 int rune = _combineSurrogatePair(leadingSurrogate, nextCodeUnit); | 131 int rune = _combineSurrogatePair(leadingSurrogate, nextCodeUnit); |
| 119 // If the rune is encoded with 2 code-units then it must be encoded | 132 // If the rune is encoded with 2 code-units then it must be encoded |
| 120 // with 4 bytes in UTF-8. | 133 // with 4 bytes in UTF-8. |
| 121 assert(rune > _THREE_BYTE_LIMIT); | 134 assert(rune > _THREE_BYTE_LIMIT); |
| 122 assert(rune <= _FOUR_BYTE_LIMIT); | 135 assert(rune <= _FOUR_BYTE_LIMIT); |
| 123 _buffer[_bufferIndex++] = 0xF0 | (rune >> 18); | 136 _buffer[_bufferIndex++] = 0xF0 | (rune >> 18); |
| 124 _buffer[_bufferIndex++] = 0x80 | ((rune >> 12) & 0x3f); | 137 _buffer[_bufferIndex++] = 0x80 | ((rune >> 12) & 0x3f); |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 180 _buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f); | 193 _buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f); |
| 181 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f); | 194 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f); |
| 182 } | 195 } |
| 183 } | 196 } |
| 184 } | 197 } |
| 185 return stringIndex; | 198 return stringIndex; |
| 186 } | 199 } |
| 187 } | 200 } |
| 188 | 201 |
| 189 /** | 202 /** |
| 203 * This class encodes chunked strings to UTF-8 code units (unsigned 8-bit |
| 204 * integers). |
| 205 */ |
| 206 class _Utf8EncoderSink extends _Utf8Encoder with StringConversionSinkMixin { |
| 207 |
| 208 final ByteConversionSink _sink; |
| 209 |
| 210 _Utf8EncoderSink(this._sink); |
| 211 |
| 212 void close() { |
| 213 if (_carry != 0) { |
| 214 // addSlice will call close again, but then the carry must be equal to 0. |
| 215 addSlice("", 0, 0, true); |
| 216 return; |
| 217 } |
| 218 _sink.close(); |
| 219 } |
| 220 |
| 221 void addSlice(String str, int start, int end, bool isLast) { |
| 222 _bufferIndex = 0; |
| 223 |
| 224 if (start == end && !isLast) { |
| 225 return; |
| 226 } |
| 227 |
| 228 if (_carry != 0) { |
| 229 int nextCodeUnit = 0; |
| 230 if (start != end) { |
| 231 nextCodeUnit = str.codeUnitAt(start); |
| 232 } else { |
| 233 assert(isLast); |
| 234 } |
| 235 bool wasCombined = _writeSurrogate(_carry, nextCodeUnit); |
| 236 // Either we got a non-empty string, or we must not have been combined. |
| 237 assert(!wasCombined || start != end ); |
| 238 if (wasCombined) start++; |
| 239 _carry = 0; |
| 240 } |
| 241 do { |
| 242 start = _fillBuffer(str, start, end); |
| 243 bool isLastSlice = isLast && (start == end); |
| 244 if (start == end - 1 && _isLeadSurrogate(str.codeUnitAt(start))) { |
| 245 if (isLast && _bufferIndex < _buffer.length - 3) { |
| 246 // There is still space for the last incomplete surrogate. |
| 247 // We use a non-surrogate as second argument. This way the |
| 248 // function will just add the surrogate-half to the buffer. |
| 249 bool hasBeenCombined = _writeSurrogate(str.codeUnitAt(start), 0); |
| 250 assert(!hasBeenCombined); |
| 251 } else { |
| 252 // Otherwise store it in the carry. If isLast is true, then |
| 253 // close will flush the last carry. |
| 254 _carry = str.codeUnitAt(start); |
| 255 } |
| 256 start++; |
| 257 } |
| 258 _sink.addSlice(_buffer, 0, _bufferIndex, isLastSlice); |
| 259 _bufferIndex = 0; |
| 260 } while (start < end); |
| 261 if (isLast) close(); |
| 262 } |
| 263 |
| 264 // TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it |
| 265 // needs to deal with malformed input. |
| 266 } |
| 267 |
| 268 /** |
| 190 * This class converts UTF-8 code units (lists of unsigned 8-bit integers) | 269 * This class converts UTF-8 code units (lists of unsigned 8-bit integers) |
| 191 * to a string. | 270 * to a string. |
| 192 */ | 271 */ |
| 193 class Utf8Decoder extends Converter<List<int>, String> { | 272 class Utf8Decoder extends Converter<List<int>, String> { |
| 194 final bool _allowMalformed; | 273 final bool _allowMalformed; |
| 195 | 274 |
| 196 /** | 275 /** |
| 197 * Instantiates a new [Utf8Decoder]. | 276 * Instantiates a new [Utf8Decoder]. |
| 198 * | 277 * |
| 199 * The optional [allowMalformed] argument defines how [convert] deals | 278 * The optional [allowMalformed] argument defines how [convert] deals |
| 200 * with invalid or unterminated character sequences. | 279 * with invalid or unterminated character sequences. |
| 201 * | 280 * |
| 202 * If it is `true` [convert] replaces invalid (or unterminated) character | 281 * If it is `true` [convert] replaces invalid (or unterminated) character |
| 203 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise | 282 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise |
| 204 * it throws a [FormatException]. | 283 * it throws a [FormatException]. |
| 205 */ | 284 */ |
| 206 Utf8Decoder({ bool allowMalformed: false }) | 285 Utf8Decoder({ bool allowMalformed: false }) |
| 207 : this._allowMalformed = allowMalformed; | 286 : this._allowMalformed = allowMalformed; |
| 208 | 287 |
| 209 /** | 288 /** |
| 210 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 289 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
| 211 * corresponding string. | 290 * corresponding string. |
| 212 */ | 291 */ |
| 213 String convert(List<int> codeUnits) { | 292 String convert(List<int> codeUnits) { |
| 214 StringBuffer buffer = new StringBuffer(); | 293 StringBuffer buffer = new StringBuffer(); |
| 215 _Utf8Decoder decoder = new _Utf8Decoder(_allowMalformed); | 294 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); |
| 216 decoder.convert(codeUnits, 0, codeUnits.length, buffer); | 295 decoder.convert(codeUnits, 0, codeUnits.length); |
| 217 decoder.close(buffer); | 296 decoder.close(); |
| 218 return buffer.toString(); | 297 return buffer.toString(); |
| 219 } | 298 } |
| 299 |
| 300 ByteConversionSink startChunkedConversion(ChunkedConversionSink sink) { |
| 301 StringConversionSink stringSink = sink.adaptTo(outputInterface); |
| 302 return stringSink.asUtf8Sink(_allowMalformed); |
| 303 } |
| 304 |
| 305 ChunkedConversionInterface get inputInterface => |
| 306 ByteConversionSink.INTERFACE; |
| 307 ChunkedConversionInterface get outputInterface => |
| 308 StringConversionSink.INTERFACE; |
| 220 } | 309 } |
| 221 | 310 |
| 222 // UTF-8 constants. | 311 // UTF-8 constants. |
| 223 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bytes | 312 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits |
| 224 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bytes | 313 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits |
| 225 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bytes | 314 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits |
| 226 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bytes, truncated to Unicode max. | 315 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max. |
| 227 | 316 |
| 228 // UTF-16 constants. | 317 // UTF-16 constants. |
| 229 const int _SURROGATE_MASK = 0xF800; | 318 const int _SURROGATE_MASK = 0xF800; |
| 230 const int _SURROGATE_TAG_MASK = 0xFC00; | 319 const int _SURROGATE_TAG_MASK = 0xFC00; |
| 231 const int _SURROGATE_VALUE_MASK = 0x3FF; | 320 const int _SURROGATE_VALUE_MASK = 0x3FF; |
| 232 const int _LEAD_SURROGATE_MIN = 0xD800; | 321 const int _LEAD_SURROGATE_MIN = 0xD800; |
| 233 const int _TAIL_SURROGATE_MIN = 0xDC00; | 322 const int _TAIL_SURROGATE_MIN = 0xDC00; |
| 234 | 323 |
| 235 const int _REPLACEMENT_CHARACTER = 0xFFFD; | 324 const int _REPLACEMENT_CHARACTER = 0xFFFD; |
| 236 const int _BOM_CHARACTER = 0xFEFF; | 325 const int _BOM_CHARACTER = 0xFEFF; |
| (...skipping 10 matching lines...) Expand all Loading... |
| 247 | 336 |
| 248 | 337 |
| 249 /** | 338 /** |
| 250 * Decodes UTF-8. | 339 * Decodes UTF-8. |
| 251 * | 340 * |
| 252 * The decoder handles chunked input. | 341 * The decoder handles chunked input. |
| 253 */ | 342 */ |
| 254 // TODO(floitsch): make this class public. | 343 // TODO(floitsch): make this class public. |
| 255 class _Utf8Decoder { | 344 class _Utf8Decoder { |
| 256 final bool _allowMalformed; | 345 final bool _allowMalformed; |
| 346 final StringSink _stringSink; |
| 257 bool _isFirstCharacter = true; | 347 bool _isFirstCharacter = true; |
| 258 int _value = 0; | 348 int _value = 0; |
| 259 int _expectedUnits = 0; | 349 int _expectedUnits = 0; |
| 260 int _extraUnits = 0; | 350 int _extraUnits = 0; |
| 261 | 351 |
| 262 _Utf8Decoder(this._allowMalformed); | 352 _Utf8Decoder(this._stringSink, this._allowMalformed); |
| 263 | 353 |
| 264 bool get hasPartialInput => _expectedUnits > 0; | 354 bool get hasPartialInput => _expectedUnits > 0; |
| 265 | 355 |
| 266 // Limits of one through four byte encodings. | 356 // Limits of one through four byte encodings. |
| 267 static const List<int> _LIMITS = const <int>[ | 357 static const List<int> _LIMITS = const <int>[ |
| 268 _ONE_BYTE_LIMIT, | 358 _ONE_BYTE_LIMIT, |
| 269 _TWO_BYTE_LIMIT, | 359 _TWO_BYTE_LIMIT, |
| 270 _THREE_BYTE_LIMIT, | 360 _THREE_BYTE_LIMIT, |
| 271 _FOUR_BYTE_LIMIT ]; | 361 _FOUR_BYTE_LIMIT ]; |
| 272 | 362 |
| 273 void close(StringSink sink) { | 363 void close() { |
| 364 flush(); |
| 365 } |
| 366 |
| 367 /** |
| 368 * Flushes this decoder as if closed. |
| 369 * |
| 370 * This method throws if the input was partial and the decoder was |
| 371 * constructed with `allowMalformed` set to `false`. |
| 372 */ |
| 373 void flush() { |
| 274 if (hasPartialInput) { | 374 if (hasPartialInput) { |
| 275 if (!_allowMalformed) { | 375 if (!_allowMalformed) { |
| 276 throw new FormatException("Unfinished UTF-8 octet sequence"); | 376 throw new FormatException("Unfinished UTF-8 octet sequence"); |
| 277 } | 377 } |
| 278 sink.writeCharCode(_REPLACEMENT_CHARACTER); | 378 _stringSink.writeCharCode(_REPLACEMENT_CHARACTER); |
| 379 _value = 0; |
| 380 _expectedUnits = 0; |
| 381 _extraUnits = 0; |
| 279 } | 382 } |
| 280 } | 383 } |
| 281 | 384 |
| 282 void convert(List<int> codeUnits, int startIndex, int endIndex, | 385 void convert(List<int> codeUnits, int startIndex, int endIndex) { |
| 283 StringSink sink) { | |
| 284 int value = _value; | 386 int value = _value; |
| 285 int expectedUnits = _expectedUnits; | 387 int expectedUnits = _expectedUnits; |
| 286 int extraUnits = _extraUnits; | 388 int extraUnits = _extraUnits; |
| 287 _value = 0; | 389 _value = 0; |
| 288 _expectedUnits = 0; | 390 _expectedUnits = 0; |
| 289 _extraUnits = 0; | 391 _extraUnits = 0; |
| 290 | 392 |
| 291 int i = startIndex; | 393 int i = startIndex; |
| 292 loop: while (true) { | 394 loop: while (true) { |
| 293 multibyte: if (expectedUnits > 0) { | 395 multibyte: if (expectedUnits > 0) { |
| 294 do { | 396 do { |
| 295 if (i == endIndex) { | 397 if (i == endIndex) { |
| 296 break loop; | 398 break loop; |
| 297 } | 399 } |
| 298 int unit = codeUnits[i]; | 400 int unit = codeUnits[i]; |
| 299 if ((unit & 0xC0) != 0x80) { | 401 if ((unit & 0xC0) != 0x80) { |
| 300 expectedUnits = 0; | 402 expectedUnits = 0; |
| 301 if (!_allowMalformed) { | 403 if (!_allowMalformed) { |
| 302 throw new FormatException( | 404 throw new FormatException( |
| 303 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); | 405 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); |
| 304 } | 406 } |
| 305 _isFirstCharacter = false; | 407 _isFirstCharacter = false; |
| 306 sink.writeCharCode(_REPLACEMENT_CHARACTER); | 408 _stringSink.writeCharCode(_REPLACEMENT_CHARACTER); |
| 307 break multibyte; | 409 break multibyte; |
| 308 } else { | 410 } else { |
| 309 value = (value << 6) | (unit & 0x3f); | 411 value = (value << 6) | (unit & 0x3f); |
| 310 expectedUnits--; | 412 expectedUnits--; |
| 311 i++; | 413 i++; |
| 312 } | 414 } |
| 313 } while (expectedUnits > 0); | 415 } while (expectedUnits > 0); |
| 314 if (value <= _LIMITS[extraUnits - 1]) { | 416 if (value <= _LIMITS[extraUnits - 1]) { |
| 315 // Overly long encoding. The value could be encoded with a shorter | 417 // Overly long encoding. The value could be encoded with a shorter |
| 316 // encoding. | 418 // encoding. |
| 317 if (!_allowMalformed) { | 419 if (!_allowMalformed) { |
| 318 throw new FormatException( | 420 throw new FormatException( |
| 319 "Overlong encoding of 0x${value.toRadixString(16)}"); | 421 "Overlong encoding of 0x${value.toRadixString(16)}"); |
| 320 } | 422 } |
| 321 expectedUnits = extraUnits = 0; | 423 expectedUnits = extraUnits = 0; |
| 322 value = _REPLACEMENT_CHARACTER; | 424 value = _REPLACEMENT_CHARACTER; |
| 323 } | 425 } |
| 324 if (value > _FOUR_BYTE_LIMIT) { | 426 if (value > _FOUR_BYTE_LIMIT) { |
| 325 if (!_allowMalformed) { | 427 if (!_allowMalformed) { |
| 326 throw new FormatException("Character outside valid Unicode range: " | 428 throw new FormatException("Character outside valid Unicode range: " |
| 327 "0x${value.toRadixString(16)}"); | 429 "0x${value.toRadixString(16)}"); |
| 328 } | 430 } |
| 329 value = _REPLACEMENT_CHARACTER; | 431 value = _REPLACEMENT_CHARACTER; |
| 330 } | 432 } |
| 331 if (!_isFirstCharacter || value != _BOM_CHARACTER) { | 433 if (!_isFirstCharacter || value != _BOM_CHARACTER) { |
| 332 sink.writeCharCode(value); | 434 _stringSink.writeCharCode(value); |
| 333 } | 435 } |
| 334 _isFirstCharacter = false; | 436 _isFirstCharacter = false; |
| 335 } | 437 } |
| 336 | 438 |
| 337 while (i < endIndex) { | 439 while (i < endIndex) { |
| 338 int unit = codeUnits[i++]; | 440 int unit = codeUnits[i++]; |
| 339 if (unit <= _ONE_BYTE_LIMIT) { | 441 if (unit <= _ONE_BYTE_LIMIT) { |
| 340 _isFirstCharacter = false; | 442 _isFirstCharacter = false; |
| 341 sink.writeCharCode(unit); | 443 _stringSink.writeCharCode(unit); |
| 342 } else { | 444 } else { |
| 343 if ((unit & 0xE0) == 0xC0) { | 445 if ((unit & 0xE0) == 0xC0) { |
| 344 value = unit & 0x1F; | 446 value = unit & 0x1F; |
| 345 expectedUnits = extraUnits = 1; | 447 expectedUnits = extraUnits = 1; |
| 346 continue loop; | 448 continue loop; |
| 347 } | 449 } |
| 348 if ((unit & 0xF0) == 0xE0) { | 450 if ((unit & 0xF0) == 0xE0) { |
| 349 value = unit & 0x0F; | 451 value = unit & 0x0F; |
| 350 expectedUnits = extraUnits = 2; | 452 expectedUnits = extraUnits = 2; |
| 351 continue loop; | 453 continue loop; |
| 352 } | 454 } |
| 353 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences. | 455 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences. |
| 354 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { | 456 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { |
| 355 value = unit & 0x07; | 457 value = unit & 0x07; |
| 356 expectedUnits = extraUnits = 3; | 458 expectedUnits = extraUnits = 3; |
| 357 continue loop; | 459 continue loop; |
| 358 } | 460 } |
| 359 if (!_allowMalformed) { | 461 if (!_allowMalformed) { |
| 360 throw new FormatException( | 462 throw new FormatException( |
| 361 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); | 463 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); |
| 362 } | 464 } |
| 363 value = _REPLACEMENT_CHARACTER; | 465 value = _REPLACEMENT_CHARACTER; |
| 364 expectedUnits = extraUnits = 0; | 466 expectedUnits = extraUnits = 0; |
| 365 _isFirstCharacter = false; | 467 _isFirstCharacter = false; |
| 366 sink.writeCharCode(value); | 468 _stringSink.writeCharCode(value); |
| 367 } | 469 } |
| 368 } | 470 } |
| 369 break loop; | 471 break loop; |
| 370 } | 472 } |
| 371 if (expectedUnits > 0) { | 473 if (expectedUnits > 0) { |
| 372 _value = value; | 474 _value = value; |
| 373 _expectedUnits = expectedUnits; | 475 _expectedUnits = expectedUnits; |
| 374 _extraUnits = extraUnits; | 476 _extraUnits = extraUnits; |
| 375 } | 477 } |
| 376 } | 478 } |
| 377 } | 479 } |
| OLD | NEW |