OLD | NEW |
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.convert; | 5 part of dart.convert; |
6 | 6 |
7 /** | 7 /** |
8 * An instance of the default implementation of the [Utf8Codec]. | 8 * An instance of the default implementation of the [Utf8Codec]. |
9 * | 9 * |
10 * This instance provides a convenient access to the most common UTF-8 | 10 * This instance provides a convenient access to the most common UTF-8 |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
55 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); | 55 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); |
56 } | 56 } |
57 | 57 |
58 Converter<String, List<int>> get encoder => new Utf8Encoder(); | 58 Converter<String, List<int>> get encoder => new Utf8Encoder(); |
59 Converter<List<int>, String> get decoder { | 59 Converter<List<int>, String> get decoder { |
60 return new Utf8Decoder(allowMalformed: _allowMalformed); | 60 return new Utf8Decoder(allowMalformed: _allowMalformed); |
61 } | 61 } |
62 } | 62 } |
63 | 63 |
64 /** | 64 /** |
65 * A [Utf8Encoder] converts strings to their UTF-8 code units (a list of | 65 * This class converts strings to their UTF-8 code units (a list of |
66 * unsigned 8-bit integers). | 66 * unsigned 8-bit integers). |
67 */ | 67 */ |
68 class Utf8Encoder extends Converter<String, List<int>> { | 68 class Utf8Encoder extends Converter<String, List<int>> { |
69 /** | 69 /** |
70 * Converts [string] to its UTF-8 code units (a list of | 70 * Converts [string] to its UTF-8 code units (a list of |
71 * unsigned 8-bit integers). | 71 * unsigned 8-bit integers). |
72 */ | 72 */ |
73 List<int> convert(String string) { | 73 List<int> convert(String string) { |
74 // Create a new encoder with a length that is guaranteed to be big enough. | 74 // Create a new encoder with a length that is guaranteed to be big enough. |
75 // A single code unit uses at most 3 bytes. Two code units at most 4. | 75 // A single code unit uses at most 3 bytes. Two code units at most 4. |
76 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(string.length * 3); | 76 _Utf8Encoder encoder = new _Utf8Encoder.withBufferSize(string.length * 3); |
77 int endPosition = encoder._fillBuffer(string, 0, string.length); | 77 int endPosition = encoder._fillBuffer(string, 0, string.length); |
78 assert(endPosition >= string.length - 1); | 78 assert(endPosition >= string.length - 1); |
79 if (endPosition != string.length) { | 79 if (endPosition != string.length) { |
80 int lastCodeUnit = string.codeUnitAt(string.length - 1); | 80 int lastCodeUnit = string.codeUnitAt(string.length - 1); |
81 assert(_isLeadSurrogate(lastCodeUnit)); | 81 assert(_isLeadSurrogate(lastCodeUnit)); |
82 // We use a non-surrogate as `nextUnit` so that _writeSurrogate just | 82 // We use a non-surrogate as `nextUnit` so that _writeSurrogate just |
83 // writes the lead-surrogate. | 83 // writes the lead-surrogate. |
84 bool wasCombined = encoder._writeSurrogate(lastCodeUnit, 0); | 84 bool wasCombined = encoder._writeSurrogate(lastCodeUnit, 0); |
85 assert(!wasCombined); | 85 assert(!wasCombined); |
86 } | 86 } |
87 return encoder._buffer.sublist(0, encoder._bufferIndex); | 87 return encoder._buffer.sublist(0, encoder._bufferIndex); |
88 } | 88 } |
| 89 |
| 90 StringConversionSink startChunkedConversion(ChunkedConversionSink sink) { |
| 91 ByteConversionSink ByteSink = sink.adaptTo(outputInterface); |
| 92 return new _Utf8EncoderSink(sink); |
| 93 } |
| 94 |
| 95 ChunkedConversionInterface get inputInterface => |
| 96 StringConversionSink.INTERFACE; |
| 97 ChunkedConversionInterface get outputInterface => |
| 98 ByteConversionSink.INTERFACE; |
89 } | 99 } |
90 | 100 |
91 /** | 101 /** |
92 * This class encodes Strings to UTF-8 code units (unsigned 8 bit integers). | 102 * This class encodes Strings to UTF-8 code units (unsigned 8 bit integers). |
93 */ | 103 */ |
94 // TODO(floitsch): make this class public. | 104 // TODO(floitsch): make this class public. |
95 class _Utf8Encoder { | 105 class _Utf8Encoder { |
96 int _carry = 0; | 106 int _carry = 0; |
97 int _bufferIndex = 0; | 107 int _bufferIndex = 0; |
98 final List<int> _buffer; | 108 final List<int> _buffer; |
99 | 109 |
100 static const _DEFAULT_BYTE_BUFFER_SIZE = 1024; | 110 static const _DEFAULT_BYTE_BUFFER_SIZE = 1024; |
101 | 111 |
102 _Utf8Encoder() : this.withBufferSize(_DEFAULT_BYTE_BUFFER_SIZE); | 112 _Utf8Encoder() : this.withBufferSize(_DEFAULT_BYTE_BUFFER_SIZE); |
103 | 113 |
104 _Utf8Encoder.withBufferSize(int bufferSize) | 114 _Utf8Encoder.withBufferSize(int bufferSize) |
105 // TODO(11971, floitsch): use Uint8List instead of normal lists. | 115 // TODO(11971, floitsch): use Uint8List instead of normal lists. |
106 : _buffer = new List<int>(bufferSize); | 116 : _buffer = new List<int>(bufferSize); |
107 | 117 |
108 /** | 118 /** |
109 * Tries to combine the given [leadingSurrogate] with the [nextCodeUnit] and | 119 * Tries to combine the given [leadingSurrogate] with the [nextCodeUnit] and |
110 * writes it to [_buffer]. | 120 * writes it to [_buffer]. |
111 * | 121 * |
112 * Returns true if the [nextCodeUnit] was combined with the | 122 * Returns true if the [nextCodeUnit] was combined with the |
113 * [leadingSurrogate]. If it wasn't then nextCodeUnit has not been written | 123 * [leadingSurrogate]. If it wasn't then nextCodeUnit was not a trailing |
114 * yet. | 124 * surrogate and has not been written yet. |
| 125 * |
| 126 * It is safe to pass 0 for [nextCodeUnit] in which case only the leading |
| 127 * surrogate is written. |
115 */ | 128 */ |
116 bool _writeSurrogate(int leadingSurrogate, int nextCodeUnit) { | 129 bool _writeSurrogate(int leadingSurrogate, int nextCodeUnit) { |
117 if (_isTailSurrogate(nextCodeUnit)) { | 130 if (_isTailSurrogate(nextCodeUnit)) { |
118 int rune = _combineSurrogatePair(leadingSurrogate, nextCodeUnit); | 131 int rune = _combineSurrogatePair(leadingSurrogate, nextCodeUnit); |
119 // If the rune is encoded with 2 code-units then it must be encoded | 132 // If the rune is encoded with 2 code-units then it must be encoded |
120 // with 4 bytes in UTF-8. | 133 // with 4 bytes in UTF-8. |
121 assert(rune > _THREE_BYTE_LIMIT); | 134 assert(rune > _THREE_BYTE_LIMIT); |
122 assert(rune <= _FOUR_BYTE_LIMIT); | 135 assert(rune <= _FOUR_BYTE_LIMIT); |
123 _buffer[_bufferIndex++] = 0xF0 | (rune >> 18); | 136 _buffer[_bufferIndex++] = 0xF0 | (rune >> 18); |
124 _buffer[_bufferIndex++] = 0x80 | ((rune >> 12) & 0x3f); | 137 _buffer[_bufferIndex++] = 0x80 | ((rune >> 12) & 0x3f); |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
180 _buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f); | 193 _buffer[_bufferIndex++] = 0x80 | ((rune >> 6) & 0x3f); |
181 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f); | 194 _buffer[_bufferIndex++] = 0x80 | (rune & 0x3f); |
182 } | 195 } |
183 } | 196 } |
184 } | 197 } |
185 return stringIndex; | 198 return stringIndex; |
186 } | 199 } |
187 } | 200 } |
188 | 201 |
189 /** | 202 /** |
| 203 * This class encodes chunked strings to UTF-8 code units (unsigned 8-bit |
| 204 * integers). |
| 205 */ |
| 206 class _Utf8EncoderSink extends _Utf8Encoder with StringConversionSinkMixin { |
| 207 |
| 208 final ByteConversionSink _sink; |
| 209 |
| 210 _Utf8EncoderSink(this._sink); |
| 211 |
| 212 void close() { |
| 213 if (_carry != 0) { |
| 214 // addSlice will call close again, but then the carry must be equal to 0. |
| 215 addSlice("", 0, 0, true); |
| 216 return; |
| 217 } |
| 218 _sink.close(); |
| 219 } |
| 220 |
| 221 void addSlice(String str, int start, int end, bool isLast) { |
| 222 _bufferIndex = 0; |
| 223 |
| 224 if (start == end && !isLast) { |
| 225 return; |
| 226 } |
| 227 |
| 228 if (_carry != 0) { |
| 229 int nextCodeUnit = 0; |
| 230 if (start != end) { |
| 231 nextCodeUnit = str.codeUnitAt(start); |
| 232 } else { |
| 233 assert(isLast); |
| 234 } |
| 235 bool wasCombined = _writeSurrogate(_carry, nextCodeUnit); |
| 236 // Either we got a non-empty string, or we must not have been combined. |
| 237 assert(!wasCombined || start != end ); |
| 238 if (wasCombined) start++; |
| 239 _carry = 0; |
| 240 } |
| 241 do { |
| 242 start = _fillBuffer(str, start, end); |
| 243 bool isLastSlice = isLast && (start == end); |
| 244 if (start == end - 1 && _isLeadSurrogate(str.codeUnitAt(start))) { |
| 245 if (isLast && _bufferIndex < _buffer.length - 3) { |
| 246 // There is still space for the last incomplete surrogate. |
| 247 // We use a non-surrogate as second argument. This way the |
| 248 // function will just add the surrogate-half to the buffer. |
| 249 bool hasBeenCombined = _writeSurrogate(str.codeUnitAt(start), 0); |
| 250 assert(!hasBeenCombined); |
| 251 } else { |
| 252 // Otherwise store it in the carry. If isLast is true, then |
| 253 // close will flush the last carry. |
| 254 _carry = str.codeUnitAt(start); |
| 255 } |
| 256 start++; |
| 257 } |
| 258 _sink.addSlice(_buffer, 0, _bufferIndex, isLastSlice); |
| 259 _bufferIndex = 0; |
| 260 } while (start < end); |
| 261 if (isLast) close(); |
| 262 } |
| 263 |
| 264 // TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it |
| 265 // needs to deal with malformed input. |
| 266 } |
| 267 |
| 268 /** |
190 * This class converts UTF-8 code units (lists of unsigned 8-bit integers) | 269 * This class converts UTF-8 code units (lists of unsigned 8-bit integers) |
191 * to a string. | 270 * to a string. |
192 */ | 271 */ |
193 class Utf8Decoder extends Converter<List<int>, String> { | 272 class Utf8Decoder extends Converter<List<int>, String> { |
194 final bool _allowMalformed; | 273 final bool _allowMalformed; |
195 | 274 |
196 /** | 275 /** |
197 * Instantiates a new [Utf8Decoder]. | 276 * Instantiates a new [Utf8Decoder]. |
198 * | 277 * |
199 * The optional [allowMalformed] argument defines how [convert] deals | 278 * The optional [allowMalformed] argument defines how [convert] deals |
200 * with invalid or unterminated character sequences. | 279 * with invalid or unterminated character sequences. |
201 * | 280 * |
202 * If it is `true` [convert] replaces invalid (or unterminated) character | 281 * If it is `true` [convert] replaces invalid (or unterminated) character |
203 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise | 282 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise |
204 * it throws a [FormatException]. | 283 * it throws a [FormatException]. |
205 */ | 284 */ |
206 Utf8Decoder({ bool allowMalformed: false }) | 285 Utf8Decoder({ bool allowMalformed: false }) |
207 : this._allowMalformed = allowMalformed; | 286 : this._allowMalformed = allowMalformed; |
208 | 287 |
209 /** | 288 /** |
210 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the | 289 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the |
211 * corresponding string. | 290 * corresponding string. |
212 */ | 291 */ |
213 String convert(List<int> codeUnits) { | 292 String convert(List<int> codeUnits) { |
214 StringBuffer buffer = new StringBuffer(); | 293 StringBuffer buffer = new StringBuffer(); |
215 _Utf8Decoder decoder = new _Utf8Decoder(_allowMalformed); | 294 _Utf8Decoder decoder = new _Utf8Decoder(buffer, _allowMalformed); |
216 decoder.convert(codeUnits, 0, codeUnits.length, buffer); | 295 decoder.convert(codeUnits, 0, codeUnits.length); |
217 decoder.close(buffer); | 296 decoder.close(); |
218 return buffer.toString(); | 297 return buffer.toString(); |
219 } | 298 } |
| 299 |
| 300 ByteConversionSink startChunkedConversion(ChunkedConversionSink sink) { |
| 301 StringConversionSink stringSink = sink.adaptTo(outputInterface); |
| 302 return stringSink.asUtf8Sink(_allowMalformed); |
| 303 } |
| 304 |
| 305 ChunkedConversionInterface get inputInterface => |
| 306 ByteConversionSink.INTERFACE; |
| 307 ChunkedConversionInterface get outputInterface => |
| 308 StringConversionSink.INTERFACE; |
220 } | 309 } |
221 | 310 |
222 // UTF-8 constants. | 311 // UTF-8 constants. |
223 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bytes | 312 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits |
224 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bytes | 313 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits |
225 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bytes | 314 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits |
226 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bytes, truncated to Unicode max. | 315 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max. |
227 | 316 |
228 // UTF-16 constants. | 317 // UTF-16 constants. |
229 const int _SURROGATE_MASK = 0xF800; | 318 const int _SURROGATE_MASK = 0xF800; |
230 const int _SURROGATE_TAG_MASK = 0xFC00; | 319 const int _SURROGATE_TAG_MASK = 0xFC00; |
231 const int _SURROGATE_VALUE_MASK = 0x3FF; | 320 const int _SURROGATE_VALUE_MASK = 0x3FF; |
232 const int _LEAD_SURROGATE_MIN = 0xD800; | 321 const int _LEAD_SURROGATE_MIN = 0xD800; |
233 const int _TAIL_SURROGATE_MIN = 0xDC00; | 322 const int _TAIL_SURROGATE_MIN = 0xDC00; |
234 | 323 |
235 const int _REPLACEMENT_CHARACTER = 0xFFFD; | 324 const int _REPLACEMENT_CHARACTER = 0xFFFD; |
236 const int _BOM_CHARACTER = 0xFEFF; | 325 const int _BOM_CHARACTER = 0xFEFF; |
(...skipping 10 matching lines...) Expand all Loading... |
247 | 336 |
248 | 337 |
249 /** | 338 /** |
250 * Decodes UTF-8. | 339 * Decodes UTF-8. |
251 * | 340 * |
252 * The decoder handles chunked input. | 341 * The decoder handles chunked input. |
253 */ | 342 */ |
254 // TODO(floitsch): make this class public. | 343 // TODO(floitsch): make this class public. |
255 class _Utf8Decoder { | 344 class _Utf8Decoder { |
256 final bool _allowMalformed; | 345 final bool _allowMalformed; |
| 346 final StringSink _stringSink; |
257 bool _isFirstCharacter = true; | 347 bool _isFirstCharacter = true; |
258 int _value = 0; | 348 int _value = 0; |
259 int _expectedUnits = 0; | 349 int _expectedUnits = 0; |
260 int _extraUnits = 0; | 350 int _extraUnits = 0; |
261 | 351 |
262 _Utf8Decoder(this._allowMalformed); | 352 _Utf8Decoder(this._stringSink, this._allowMalformed); |
263 | 353 |
264 bool get hasPartialInput => _expectedUnits > 0; | 354 bool get hasPartialInput => _expectedUnits > 0; |
265 | 355 |
266 // Limits of one through four byte encodings. | 356 // Limits of one through four byte encodings. |
267 static const List<int> _LIMITS = const <int>[ | 357 static const List<int> _LIMITS = const <int>[ |
268 _ONE_BYTE_LIMIT, | 358 _ONE_BYTE_LIMIT, |
269 _TWO_BYTE_LIMIT, | 359 _TWO_BYTE_LIMIT, |
270 _THREE_BYTE_LIMIT, | 360 _THREE_BYTE_LIMIT, |
271 _FOUR_BYTE_LIMIT ]; | 361 _FOUR_BYTE_LIMIT ]; |
272 | 362 |
273 void close(StringSink sink) { | 363 void close() { |
| 364 flush(); |
| 365 } |
| 366 |
| 367 /** |
| 368 * Flushes this decoder as if closed. |
| 369 * |
| 370 * This method throws if the input was partial and the decoder was |
| 371 * constructed with `allowMalformed` set to `false`. |
| 372 */ |
| 373 void flush() { |
274 if (hasPartialInput) { | 374 if (hasPartialInput) { |
275 if (!_allowMalformed) { | 375 if (!_allowMalformed) { |
276 throw new FormatException("Unfinished UTF-8 octet sequence"); | 376 throw new FormatException("Unfinished UTF-8 octet sequence"); |
277 } | 377 } |
278 sink.writeCharCode(_REPLACEMENT_CHARACTER); | 378 _stringSink.writeCharCode(_REPLACEMENT_CHARACTER); |
| 379 _value = 0; |
| 380 _expectedUnits = 0; |
| 381 _extraUnits = 0; |
279 } | 382 } |
280 } | 383 } |
281 | 384 |
282 void convert(List<int> codeUnits, int startIndex, int endIndex, | 385 void convert(List<int> codeUnits, int startIndex, int endIndex) { |
283 StringSink sink) { | |
284 int value = _value; | 386 int value = _value; |
285 int expectedUnits = _expectedUnits; | 387 int expectedUnits = _expectedUnits; |
286 int extraUnits = _extraUnits; | 388 int extraUnits = _extraUnits; |
287 _value = 0; | 389 _value = 0; |
288 _expectedUnits = 0; | 390 _expectedUnits = 0; |
289 _extraUnits = 0; | 391 _extraUnits = 0; |
290 | 392 |
291 int i = startIndex; | 393 int i = startIndex; |
292 loop: while (true) { | 394 loop: while (true) { |
293 multibyte: if (expectedUnits > 0) { | 395 multibyte: if (expectedUnits > 0) { |
294 do { | 396 do { |
295 if (i == endIndex) { | 397 if (i == endIndex) { |
296 break loop; | 398 break loop; |
297 } | 399 } |
298 int unit = codeUnits[i]; | 400 int unit = codeUnits[i]; |
299 if ((unit & 0xC0) != 0x80) { | 401 if ((unit & 0xC0) != 0x80) { |
300 expectedUnits = 0; | 402 expectedUnits = 0; |
301 if (!_allowMalformed) { | 403 if (!_allowMalformed) { |
302 throw new FormatException( | 404 throw new FormatException( |
303 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); | 405 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); |
304 } | 406 } |
305 _isFirstCharacter = false; | 407 _isFirstCharacter = false; |
306 sink.writeCharCode(_REPLACEMENT_CHARACTER); | 408 _stringSink.writeCharCode(_REPLACEMENT_CHARACTER); |
307 break multibyte; | 409 break multibyte; |
308 } else { | 410 } else { |
309 value = (value << 6) | (unit & 0x3f); | 411 value = (value << 6) | (unit & 0x3f); |
310 expectedUnits--; | 412 expectedUnits--; |
311 i++; | 413 i++; |
312 } | 414 } |
313 } while (expectedUnits > 0); | 415 } while (expectedUnits > 0); |
314 if (value <= _LIMITS[extraUnits - 1]) { | 416 if (value <= _LIMITS[extraUnits - 1]) { |
315 // Overly long encoding. The value could be encoded with a shorter | 417 // Overly long encoding. The value could be encoded with a shorter |
316 // encoding. | 418 // encoding. |
317 if (!_allowMalformed) { | 419 if (!_allowMalformed) { |
318 throw new FormatException( | 420 throw new FormatException( |
319 "Overlong encoding of 0x${value.toRadixString(16)}"); | 421 "Overlong encoding of 0x${value.toRadixString(16)}"); |
320 } | 422 } |
321 expectedUnits = extraUnits = 0; | 423 expectedUnits = extraUnits = 0; |
322 value = _REPLACEMENT_CHARACTER; | 424 value = _REPLACEMENT_CHARACTER; |
323 } | 425 } |
324 if (value > _FOUR_BYTE_LIMIT) { | 426 if (value > _FOUR_BYTE_LIMIT) { |
325 if (!_allowMalformed) { | 427 if (!_allowMalformed) { |
326 throw new FormatException("Character outside valid Unicode range: " | 428 throw new FormatException("Character outside valid Unicode range: " |
327 "0x${value.toRadixString(16)}"); | 429 "0x${value.toRadixString(16)}"); |
328 } | 430 } |
329 value = _REPLACEMENT_CHARACTER; | 431 value = _REPLACEMENT_CHARACTER; |
330 } | 432 } |
331 if (!_isFirstCharacter || value != _BOM_CHARACTER) { | 433 if (!_isFirstCharacter || value != _BOM_CHARACTER) { |
332 sink.writeCharCode(value); | 434 _stringSink.writeCharCode(value); |
333 } | 435 } |
334 _isFirstCharacter = false; | 436 _isFirstCharacter = false; |
335 } | 437 } |
336 | 438 |
337 while (i < endIndex) { | 439 while (i < endIndex) { |
338 int unit = codeUnits[i++]; | 440 int unit = codeUnits[i++]; |
339 if (unit <= _ONE_BYTE_LIMIT) { | 441 if (unit <= _ONE_BYTE_LIMIT) { |
340 _isFirstCharacter = false; | 442 _isFirstCharacter = false; |
341 sink.writeCharCode(unit); | 443 _stringSink.writeCharCode(unit); |
342 } else { | 444 } else { |
343 if ((unit & 0xE0) == 0xC0) { | 445 if ((unit & 0xE0) == 0xC0) { |
344 value = unit & 0x1F; | 446 value = unit & 0x1F; |
345 expectedUnits = extraUnits = 1; | 447 expectedUnits = extraUnits = 1; |
346 continue loop; | 448 continue loop; |
347 } | 449 } |
348 if ((unit & 0xF0) == 0xE0) { | 450 if ((unit & 0xF0) == 0xE0) { |
349 value = unit & 0x0F; | 451 value = unit & 0x0F; |
350 expectedUnits = extraUnits = 2; | 452 expectedUnits = extraUnits = 2; |
351 continue loop; | 453 continue loop; |
352 } | 454 } |
353 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences. | 455 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences. |
354 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { | 456 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { |
355 value = unit & 0x07; | 457 value = unit & 0x07; |
356 expectedUnits = extraUnits = 3; | 458 expectedUnits = extraUnits = 3; |
357 continue loop; | 459 continue loop; |
358 } | 460 } |
359 if (!_allowMalformed) { | 461 if (!_allowMalformed) { |
360 throw new FormatException( | 462 throw new FormatException( |
361 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); | 463 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); |
362 } | 464 } |
363 value = _REPLACEMENT_CHARACTER; | 465 value = _REPLACEMENT_CHARACTER; |
364 expectedUnits = extraUnits = 0; | 466 expectedUnits = extraUnits = 0; |
365 _isFirstCharacter = false; | 467 _isFirstCharacter = false; |
366 sink.writeCharCode(value); | 468 _stringSink.writeCharCode(value); |
367 } | 469 } |
368 } | 470 } |
369 break loop; | 471 break loop; |
370 } | 472 } |
371 if (expectedUnits > 0) { | 473 if (expectedUnits > 0) { |
372 _value = value; | 474 _value = value; |
373 _expectedUnits = expectedUnits; | 475 _expectedUnits = expectedUnits; |
374 _extraUnits = extraUnits; | 476 _extraUnits = extraUnits; |
375 } | 477 } |
376 } | 478 } |
377 } | 479 } |
OLD | NEW |