OLD | NEW |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.convert; | 5 part of dart.convert; |
6 | 6 |
7 /** | 7 /** |
8 * An instance of the default implementation of the [Utf8Codec]. | 8 * An instance of the default implementation of the [Utf8Codec]. |
9 * | 9 * |
10 * This instance provides a convenient access to the most common UTF-8 | 10 * This instance provides a convenient access to the most common UTF-8 |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
55 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); | 55 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); |
56 } | 56 } |
57 | 57 |
58 Converter<String, List<int>> get encoder => new Utf8Encoder(); | 58 Converter<String, List<int>> get encoder => new Utf8Encoder(); |
59 Converter<List<int>, String> get decoder { | 59 Converter<List<int>, String> get decoder { |
60 return new Utf8Decoder(allowMalformed: _allowMalformed); | 60 return new Utf8Decoder(allowMalformed: _allowMalformed); |
61 } | 61 } |
62 } | 62 } |
63 | 63 |
64 /** | 64 /** |
65 * A [Utf8Encoder] converts strings to their UTF-8 code units (a list of | 65 * This class converts strings to their UTF-8 code units (a list of |
66 * unsigned 8-bit integers). | 66 * unsigned 8-bit integers). |
67 */ | 67 */ |
68 class Utf8Encoder extends Converter<String, List<int>> { | 68 class Utf8Encoder extends Converter<String, List<int>> { |
69 /** | 69 /** |
70 * Converts [string] to its UTF-8 code units (a list of | 70 * Converts [string] to its UTF-8 code units (a list of |
71 * unsigned 8-bit integers). | 71 * unsigned 8-bit integers). |
72 */ | 72 */ |
73 List<int> convert(String string) { | 73 List<int> convert(String string) { |
74 // Create a new encoder with a length that is guaranteed to be big enough. | 74 // Create a new encoder with a length that is guaranteed to be big enough. |
75 // A single code unit uses at most 3 bytes. Two code units at most 4. | 75 // A single code unit uses at most 3 bytes. Two code units at most 4. |
76 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(string.length * 3); | 76 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(string.length * 3); |
77 int endPosition = encoder._fillBuffer(string, 0, string.length); | 77 int endPosition = encoder._fillBuffer(string, 0, string.length); |
78 assert(endPosition >= string.length - 1); | 78 assert(endPosition >= string.length - 1); |
79 if (endPosition != string.length) { | 79 if (endPosition != string.length) { |
80 int lastCodeUnit = string.codeUnitAt(string.length - 1); | 80 int lastCodeUnit = string.codeUnitAt(string.length - 1); |
81 assert(_isLeadSurrogate(lastCodeUnit)); | 81 assert(_isLeadSurrogate(lastCodeUnit)); |
82 // We use a non-surrogate as `nextUnit` so that _writeSurrogate just | 82 // We use a non-surrogate as `nextUnit` so that _writeSurrogate just |
83 // writes the lead-surrogate. | 83 // writes the lead-surrogate. |
84 bool wasCombined = encoder._writeSurrogate(lastCodeUnit, 0); | 84 bool wasCombined = encoder._writeSurrogate(lastCodeUnit, 0); |
85 assert(!wasCombined); | 85 assert(!wasCombined); |
86 } | 86 } |
87 return encoder._buffer.sublist(0, encoder._bufferIndex); | 87 return encoder._buffer.sublist(0, encoder._bufferIndex); |
88 } | 88 } |
| 89 |
| 90 /** |
| 91 * Starts a chunked conversion. |
| 92 * |
| 93 * The converter works more efficiently if the given [sink] is a |
| 94 * [ByteConversionSink]. |
| 95 */ |
| 96 StringConversionSink startChunkedConversion( |
| 97 ChunkedConversionSink<List<int>> sink) { |
| 98 if (sink is! ByteConversionSink) { |
| 99 sink = new ByteConversionSink.from(sink); |
| 100 } |
| 101 return new _Utf8EncoderSink(sink); |
| 102 } |
89 } | 103 } |
90 | 104 |
91 /** | 105 /** |
92 * This class encodes Strings to UTF-8 code units (unsigned 8 bit integers). | 106 * This class encodes Strings to UTF-8 code units (unsigned 8 bit integers). |
93 */ | 107 */ |
94 // TODO(floitsch): make this class public. | 108 // TODO(floitsch): make this class public. |
95 class _Utf8Encoder { | 109 class _Utf8Encoder { |
96 int _carry = 0; | 110 int _carry = 0; |
97 int _bufferIndex = 0; | 111 int _bufferIndex = 0; |
98 final List<int> _buffer; | 112 final List<int> _buffer; |
99 | 113 |
100 static const _DEFAULT_BYTE_BUFFER_SIZE = 1024; | 114 static const _DEFAULT_BYTE_BUFFER_SIZE = 1024; |
101 | 115 |
102 _Utf8Encoder() : this.withBufferSize(_DEFAULT_BYTE_BUFFER_SIZE); | 116 _Utf8Encoder() : this.withBufferSize(_DEFAULT_BYTE_BUFFER_SIZE); |
103 | 117 |
104 _Utf8Encoder.withBufferSize(int bufferSize) | 118 _Utf8Encoder.withBufferSize(int bufferSize) |
105 // TODO(11971, floitsch): use Uint8List instead of normal lists. | 119 // TODO(11971, floitsch): use Uint8List instead of normal lists. |
106 : _buffer = new List<int>(bufferSize); | 120 : _buffer = new List<int>(bufferSize); |
107 | 121 |
108 /** | 122 /** |
109 * Tries to combine the given [leadingSurrogate] with the [nextCodeUnit] and | 123 * Tries to combine the given [leadingSurrogate] with the [nextCodeUnit] and |
110 * writes it to [_buffer]. | 124 * writes it to [_buffer]. |
111 * | 125 * |
112 * Returns true if the [nextCodeUnit] was combined with the | 126 * Returns true if the [nextCodeUnit] was combined with the |
113 * [leadingSurrogate]. If it wasn't then nextCodeUnit has not been written | 127 * [leadingSurrogate]. If it wasn't then nextCodeUnit was not a trailing |
114 * yet. | 128 * surrogate and has not been written yet. |
| 129 * |
| 130 * It is safe to pass 0 for [nextCodeUnit] in which case only the leading |
| 131 * surrogate is written. |
115 */ | 132 */ |
116 bool _writeSurrogate(int leadingSurrogate, int nextCodeUnit) { | 133 bool _writeSurrogate(int leadingSurrogate, int nextCodeUnit) { |
117 if (_isTailSurrogate(nextCodeUnit)) { | 134 if (_isTailSurrogate(nextCodeUnit)) { |
118 int rune = _combineSurrogatePair(leadingSurrogate, nextCodeUnit); | 135 int rune = _combineSurrogatePair(leadingSurrogate, nextCodeUnit); |
119 // If the rune is encoded with 2 code-units then it must be encoded | 136 // If the rune is encoded with 2 code-units then it must be encoded |
120 // with 4 bytes in UTF-8. | 137 // with 4 bytes in UTF-8. |
121 assert(rune > _THREE_BYTE_LIMIT); | 138 assert(rune > _THREE_BYTE_LIMIT); |
122 assert(rune <= _FOUR_BYTE_LIMIT); | 139 assert(rune <= _FOUR_BYTE_LIMIT); |
123 _buffer[_bufferIndex++] = 0xF0 | (rune >> 18); | 140 _buffer[_bufferIndex++] = 0xF0 | (rune >> 18); |
124 _buffer[_bufferIndex++] = 0x80 | ((rune >> 12) & 0x3f); | 141 _buffer[_bufferIndex++] = 0x80 | ((rune >> 12) & 0x3f); |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
180 _buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f); | 197 _buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f); |
181 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f); | 198 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f); |
182 } | 199 } |
183 } | 200 } |
184 } | 201 } |
185 return stringIndex; | 202 return stringIndex; |
186 } | 203 } |
187 } | 204 } |
188 | 205 |
189 /** | 206 /** |
| 207 * This class encodes chunked strings to UTF-8 code units (unsigned 8-bit |
| 208 * integers). |
| 209 */ |
| 210 class _Utf8EncoderSink extends _Utf8Encoder with StringConversionSinkMixin { |
| 211 |
| 212 final ByteConversionSink _sink; |
| 213 |
| 214 _Utf8EncoderSink(this._sink); |
| 215 |
| 216 void close() { |
| 217 if (_carry != 0) { |
| 218 // addSlice will call close again, but then the carry must be equal to 0. |
| 219 addSlice("", 0, 0, true); |
| 220 return; |
| 221 } |
| 222 _sink.close(); |
| 223 } |
| 224 |
| 225 void addSlice(String str, int start, int end, bool isLast) { |
| 226 _bufferIndex = 0; |
| 227 |
| 228 if (start == end && !isLast) { |
| 229 return; |
| 230 } |
| 231 |
| 232 if (_carry != 0) { |
| 233 int nextCodeUnit = 0; |
| 234 if (start != end) { |
| 235 nextCodeUnit = str.codeUnitAt(start); |
| 236 } else { |
| 237 assert(isLast); |
| 238 } |
| 239 bool wasCombined = _writeSurrogate(_carry, nextCodeUnit); |
| 240 // Either we got a non-empty string, or we must not have been combined. |
| 241 assert(!wasCombined || start != end ); |
| 242 if (wasCombined) start++; |
| 243 _carry = 0; |
| 244 } |
| 245 do { |
| 246 start = _fillBuffer(str, start, end); |
| 247 bool isLastSlice = isLast && (start == end); |
| 248 if (start == end - 1 && _isLeadSurrogate(str.codeUnitAt(start))) { |
| 249 if (isLast && _bufferIndex < _buffer.length - 3) { |
| 250 // There is still space for the last incomplete surrogate. |
| 251 // We use a non-surrogate as second argument. This way the |
| 252 // function will just add the surrogate-half to the buffer. |
| 253 bool hasBeenCombined = _writeSurrogate(str.codeUnitAt(start), 0); |
| 254 assert(!hasBeenCombined); |
| 255 } else { |
| 256 // Otherwise store it in the carry. If isLast is true, then |
| 257 // close will flush the last carry. |
| 258 _carry = str.codeUnitAt(start); |
| 259 } |
| 260 start++; |
| 261 } |
| 262 _sink.addSlice(_buffer, 0, _bufferIndex, isLastSlice); |
| 263 _bufferIndex = 0; |
| 264 } while (start < end); |
| 265 if (isLast) close(); |
| 266 } |
| 267 |
| 268 // TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it |
| 269 // needs to deal with malformed input. |
| 270 } |
| 271 |
| 272 /** |
190 * This class converts UTF-8 code units (lists of unsigned 8-bit integers) | 273 * This class converts UTF-8 code units (lists of unsigned 8-bit integers) |
191 * to a string. | 274 * to a string. |
192 */ | 275 */ |
193 class Utf8Decoder extends Converter<List<int>, String> { | 276 class Utf8Decoder extends Converter<List<int>, String> { |
194 final bool _allowMalformed; | 277 final bool _allowMalformed; |
195 | 278 |
196 /** | 279 /** |
197 * Instantiates a new [Utf8Decoder]. | 280 * Instantiates a new [Utf8Decoder]. |
198 * | 281 * |
199 * The optional [allowMalformed] argument defines how [convert] deals | 282 * The optional [allowMalformed] argument defines how [convert] deals |
200 * with invalid or unterminated character sequences. | 283 * with invalid or unterminated character sequences. |
201 * | 284 * |
202 * If it is `true` [convert] replaces invalid (or unterminated) character | 285 * If it is `true` [convert] replaces invalid (or unterminated) character |
203 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise | 286 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise |
204 * it throws a [FormatException]. | 287 * it throws a [FormatException]. |
205 */ | 288 */ |
206 Utf8Decoder({ bool allowMalformed: false }) | 289 Utf8Decoder({ bool allowMalformed: false }) |
207 : this._allowMalformed = allowMalformed; | 290 : this._allowMalformed = allowMalformed; |
208 | 291 |
209 /** | 292 /** |
210 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 293 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
211 * corresponding string. | 294 * corresponding string. |
212 */ | 295 */ |
213 String convert(List<int> codeUnits) { | 296 String convert(List<int> codeUnits) { |
214 StringBuffer buffer = new StringBuffer(); | 297 StringBuffer buffer = new StringBuffer(); |
215 _Utf8Decoder decoder = new _Utf8Decoder(_allowMalformed); | 298 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); |
216 decoder.convert(codeUnits, 0, codeUnits.length, buffer); | 299 decoder.convert(codeUnits, 0, codeUnits.length); |
217 decoder.close(buffer); | 300 decoder.close(); |
218 return buffer.toString(); | 301 return buffer.toString(); |
219 } | 302 } |
| 303 |
| 304 /** |
| 305 * Starts a chunked conversion. |
| 306 * |
| 307 * The converter works more efficiently if the given [sink] is a |
| 308 * [StringConversionSink]. |
| 309 */ |
| 310 ByteConversionSink startChunkedConversion( |
| 311 ChunkedConversionSink<String> sink) { |
| 312 StringConversionSink stringSink; |
| 313 if (sink is StringConversionSink) { |
| 314 stringSink = sink; |
| 315 } else { |
| 316 stringSink = new StringConversionSink.from(sink); |
| 317 } |
| 318 return stringSink.asUtf8Sink(_allowMalformed); |
| 319 } |
220 } | 320 } |
221 | 321 |
222 // UTF-8 constants. | 322 // UTF-8 constants. |
223 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bytes | 323 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits |
224 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bytes | 324 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits |
225 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bytes | 325 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits |
226 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bytes, truncated to Unicode max. | 326 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max. |
227 | 327 |
228 // UTF-16 constants. | 328 // UTF-16 constants. |
229 const int _SURROGATE_MASK = 0xF800; | 329 const int _SURROGATE_MASK = 0xF800; |
230 const int _SURROGATE_TAG_MASK = 0xFC00; | 330 const int _SURROGATE_TAG_MASK = 0xFC00; |
231 const int _SURROGATE_VALUE_MASK = 0x3FF; | 331 const int _SURROGATE_VALUE_MASK = 0x3FF; |
232 const int _LEAD_SURROGATE_MIN = 0xD800; | 332 const int _LEAD_SURROGATE_MIN = 0xD800; |
233 const int _TAIL_SURROGATE_MIN = 0xDC00; | 333 const int _TAIL_SURROGATE_MIN = 0xDC00; |
234 | 334 |
235 const int _REPLACEMENT_CHARACTER = 0xFFFD; | 335 const int _REPLACEMENT_CHARACTER = 0xFFFD; |
236 const int _BOM_CHARACTER = 0xFEFF; | 336 const int _BOM_CHARACTER = 0xFEFF; |
(...skipping 10 matching lines...) Expand all Loading... |
247 | 347 |
248 | 348 |
249 /** | 349 /** |
250 * Decodes UTF-8. | 350 * Decodes UTF-8. |
251 * | 351 * |
252 * The decoder handles chunked input. | 352 * The decoder handles chunked input. |
253 */ | 353 */ |
254 // TODO(floitsch): make this class public. | 354 // TODO(floitsch): make this class public. |
255 class _Utf8Decoder { | 355 class _Utf8Decoder { |
256 final bool _allowMalformed; | 356 final bool _allowMalformed; |
| 357 final StringSink _stringSink; |
257 bool _isFirstCharacter = true; | 358 bool _isFirstCharacter = true; |
258 int _value = 0; | 359 int _value = 0; |
259 int _expectedUnits = 0; | 360 int _expectedUnits = 0; |
260 int _extraUnits = 0; | 361 int _extraUnits = 0; |
261 | 362 |
262 _Utf8Decoder(this._allowMalformed); | 363 _Utf8Decoder(this._stringSink, this._allowMalformed); |
263 | 364 |
264 bool get hasPartialInput => _expectedUnits > 0; | 365 bool get hasPartialInput => _expectedUnits > 0; |
265 | 366 |
266 // Limits of one through four byte encodings. | 367 // Limits of one through four byte encodings. |
267 static const List<int> _LIMITS = const <int>[ | 368 static const List<int> _LIMITS = const <int>[ |
268 _ONE_BYTE_LIMIT, | 369 _ONE_BYTE_LIMIT, |
269 _TWO_BYTE_LIMIT, | 370 _TWO_BYTE_LIMIT, |
270 _THREE_BYTE_LIMIT, | 371 _THREE_BYTE_LIMIT, |
271 _FOUR_BYTE_LIMIT ]; | 372 _FOUR_BYTE_LIMIT ]; |
272 | 373 |
273 void close(StringSink sink) { | 374 void close() { |
| 375 flush(); |
| 376 } |
| 377 |
| 378 /** |
| 379 * Flushes this decoder as if closed. |
| 380 * |
| 381 * This method throws if the input was partial and the decoder was |
| 382 * constructed with `allowMalformed` set to `false`. |
| 383 */ |
| 384 void flush() { |
274 if (hasPartialInput) { | 385 if (hasPartialInput) { |
275 if (!_allowMalformed) { | 386 if (!_allowMalformed) { |
276 throw new FormatException("Unfinished UTF-8 octet sequence"); | 387 throw new FormatException("Unfinished UTF-8 octet sequence"); |
277 } | 388 } |
278 sink.writeCharCode(_REPLACEMENT_CHARACTER); | 389 _stringSink.writeCharCode(_REPLACEMENT_CHARACTER); |
| 390 _value = 0; |
| 391 _expectedUnits = 0; |
| 392 _extraUnits = 0; |
279 } | 393 } |
280 } | 394 } |
281 | 395 |
282 void convert(List<int> codeUnits, int startIndex, int endIndex, | 396 void convert(List<int> codeUnits, int startIndex, int endIndex) { |
283 StringSink sink) { | |
284 int value = _value; | 397 int value = _value; |
285 int expectedUnits = _expectedUnits; | 398 int expectedUnits = _expectedUnits; |
286 int extraUnits = _extraUnits; | 399 int extraUnits = _extraUnits; |
287 _value = 0; | 400 _value = 0; |
288 _expectedUnits = 0; | 401 _expectedUnits = 0; |
289 _extraUnits = 0; | 402 _extraUnits = 0; |
290 | 403 |
291 int i = startIndex; | 404 int i = startIndex; |
292 loop: while (true) { | 405 loop: while (true) { |
293 multibyte: if (expectedUnits > 0) { | 406 multibyte: if (expectedUnits > 0) { |
294 do { | 407 do { |
295 if (i == endIndex) { | 408 if (i == endIndex) { |
296 break loop; | 409 break loop; |
297 } | 410 } |
298 int unit = codeUnits[i]; | 411 int unit = codeUnits[i]; |
299 if ((unit & 0xC0) != 0x80) { | 412 if ((unit & 0xC0) != 0x80) { |
300 expectedUnits = 0; | 413 expectedUnits = 0; |
301 if (!_allowMalformed) { | 414 if (!_allowMalformed) { |
302 throw new FormatException( | 415 throw new FormatException( |
303 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); | 416 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); |
304 } | 417 } |
305 _isFirstCharacter = false; | 418 _isFirstCharacter = false; |
306 sink.writeCharCode(_REPLACEMENT_CHARACTER); | 419 _stringSink.writeCharCode(_REPLACEMENT_CHARACTER); |
307 break multibyte; | 420 break multibyte; |
308 } else { | 421 } else { |
309 value = (value << 6) | (unit & 0x3f); | 422 value = (value << 6) | (unit & 0x3f); |
310 expectedUnits--; | 423 expectedUnits--; |
311 i++; | 424 i++; |
312 } | 425 } |
313 } while (expectedUnits > 0); | 426 } while (expectedUnits > 0); |
314 if (value <= _LIMITS[extraUnits - 1]) { | 427 if (value <= _LIMITS[extraUnits - 1]) { |
315 // Overly long encoding. The value could be encoded with a shorter | 428 // Overly long encoding. The value could be encoded with a shorter |
316 // encoding. | 429 // encoding. |
317 if (!_allowMalformed) { | 430 if (!_allowMalformed) { |
318 throw new FormatException( | 431 throw new FormatException( |
319 "Overlong encoding of 0x${value.toRadixString(16)}"); | 432 "Overlong encoding of 0x${value.toRadixString(16)}"); |
320 } | 433 } |
321 expectedUnits = extraUnits = 0; | 434 expectedUnits = extraUnits = 0; |
322 value = _REPLACEMENT_CHARACTER; | 435 value = _REPLACEMENT_CHARACTER; |
323 } | 436 } |
324 if (value > _FOUR_BYTE_LIMIT) { | 437 if (value > _FOUR_BYTE_LIMIT) { |
325 if (!_allowMalformed) { | 438 if (!_allowMalformed) { |
326 throw new FormatException("Character outside valid Unicode range: " | 439 throw new FormatException("Character outside valid Unicode range: " |
327 "0x${value.toRadixString(16)}"); | 440 "0x${value.toRadixString(16)}"); |
328 } | 441 } |
329 value = _REPLACEMENT_CHARACTER; | 442 value = _REPLACEMENT_CHARACTER; |
330 } | 443 } |
331 if (!_isFirstCharacter || value != _BOM_CHARACTER) { | 444 if (!_isFirstCharacter || value != _BOM_CHARACTER) { |
332 sink.writeCharCode(value); | 445 _stringSink.writeCharCode(value); |
333 } | 446 } |
334 _isFirstCharacter = false; | 447 _isFirstCharacter = false; |
335 } | 448 } |
336 | 449 |
337 while (i < endIndex) { | 450 while (i < endIndex) { |
338 int unit = codeUnits[i++]; | 451 int unit = codeUnits[i++]; |
339 if (unit <= _ONE_BYTE_LIMIT) { | 452 if (unit <= _ONE_BYTE_LIMIT) { |
340 _isFirstCharacter = false; | 453 _isFirstCharacter = false; |
341 sink.writeCharCode(unit); | 454 _stringSink.writeCharCode(unit); |
342 } else { | 455 } else { |
343 if ((unit & 0xE0) == 0xC0) { | 456 if ((unit & 0xE0) == 0xC0) { |
344 value = unit & 0x1F; | 457 value = unit & 0x1F; |
345 expectedUnits = extraUnits = 1; | 458 expectedUnits = extraUnits = 1; |
346 continue loop; | 459 continue loop; |
347 } | 460 } |
348 if ((unit & 0xF0) == 0xE0) { | 461 if ((unit & 0xF0) == 0xE0) { |
349 value = unit & 0x0F; | 462 value = unit & 0x0F; |
350 expectedUnits = extraUnits = 2; | 463 expectedUnits = extraUnits = 2; |
351 continue loop; | 464 continue loop; |
352 } | 465 } |
353 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences. | 466 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences. |
354 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { | 467 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { |
355 value = unit & 0x07; | 468 value = unit & 0x07; |
356 expectedUnits = extraUnits = 3; | 469 expectedUnits = extraUnits = 3; |
357 continue loop; | 470 continue loop; |
358 } | 471 } |
359 if (!_allowMalformed) { | 472 if (!_allowMalformed) { |
360 throw new FormatException( | 473 throw new FormatException( |
361 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); | 474 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); |
362 } | 475 } |
363 value = _REPLACEMENT_CHARACTER; | 476 value = _REPLACEMENT_CHARACTER; |
364 expectedUnits = extraUnits = 0; | 477 expectedUnits = extraUnits = 0; |
365 _isFirstCharacter = false; | 478 _isFirstCharacter = false; |
366 sink.writeCharCode(value); | 479 _stringSink.writeCharCode(value); |
367 } | 480 } |
368 } | 481 } |
369 break loop; | 482 break loop; |
370 } | 483 } |
371 if (expectedUnits > 0) { | 484 if (expectedUnits > 0) { |
372 _value = value; | 485 _value = value; |
373 _expectedUnits = expectedUnits; | 486 _expectedUnits = expectedUnits; |
374 _extraUnits = extraUnits; | 487 _extraUnits = extraUnits; |
375 } | 488 } |
376 } | 489 } |
377 } | 490 } |
OLD | NEW |