Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(47)

Side by Side Diff: sdk/lib/convert/utf.dart

Issue 2754013002: Format all dart: library files (Closed)
Patch Set: Format all dart: library files Created 3 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « sdk/lib/convert/string_conversion.dart ('k') | sdk/lib/core/bool.dart » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.convert; 5 part of dart.convert;
6 6
7 /** The Unicode Replacement character `U+FFFD` (�). */ 7 /** The Unicode Replacement character `U+FFFD` (�). */
8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; 8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD;
9 9
10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */ 10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */
(...skipping 24 matching lines...) Expand all
35 * Instantiates a new [Utf8Codec]. 35 * Instantiates a new [Utf8Codec].
36 * 36 *
37 * The optional [allowMalformed] argument defines how [decoder] (and [decode]) 37 * The optional [allowMalformed] argument defines how [decoder] (and [decode])
38 * deal with invalid or unterminated character sequences. 38 * deal with invalid or unterminated character sequences.
39 * 39 *
40 * If it is `true` (and not overridden at the method invocation) [decode] and 40 * If it is `true` (and not overridden at the method invocation) [decode] and
41 * the [decoder] replace invalid (or unterminated) octet 41 * the [decoder] replace invalid (or unterminated) octet
42 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise 42 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise
43 * they throw a [FormatException]. 43 * they throw a [FormatException].
44 */ 44 */
45 const Utf8Codec({ bool allowMalformed: false }) 45 const Utf8Codec({bool allowMalformed: false})
46 : _allowMalformed = allowMalformed; 46 : _allowMalformed = allowMalformed;
47 47
48 String get name => "utf-8"; 48 String get name => "utf-8";
49 49
50 /** 50 /**
51 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the 51 * Decodes the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the
52 * corresponding string. 52 * corresponding string.
53 * 53 *
54 * If the [codeUnits] start with the encoding of a 54 * If the [codeUnits] start with the encoding of a
55 * [UNICODE_BOM_CHARACTER_RUNE], that character is discarded. 55 * [UNICODE_BOM_CHARACTER_RUNE], that character is discarded.
56 * 56 *
57 * If [allowMalformed] is `true` the decoder replaces invalid (or 57 * If [allowMalformed] is `true` the decoder replaces invalid (or
58 * unterminated) character sequences with the Unicode Replacement character 58 * unterminated) character sequences with the Unicode Replacement character
59 * `U+FFFD` (�). Otherwise it throws a [FormatException]. 59 * `U+FFFD` (�). Otherwise it throws a [FormatException].
60 * 60 *
61 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that 61 * If [allowMalformed] is not given, it defaults to the `allowMalformed` that
62 * was used to instantiate `this`. 62 * was used to instantiate `this`.
63 */ 63 */
64 String decode(List<int> codeUnits, { bool allowMalformed }) { 64 String decode(List<int> codeUnits, {bool allowMalformed}) {
65 if (allowMalformed == null) allowMalformed = _allowMalformed; 65 if (allowMalformed == null) allowMalformed = _allowMalformed;
66 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits); 66 return new Utf8Decoder(allowMalformed: allowMalformed).convert(codeUnits);
67 } 67 }
68 68
69 Utf8Encoder get encoder => const Utf8Encoder(); 69 Utf8Encoder get encoder => const Utf8Encoder();
70 Utf8Decoder get decoder { 70 Utf8Decoder get decoder {
71 return new Utf8Decoder(allowMalformed: _allowMalformed); 71 return new Utf8Decoder(allowMalformed: _allowMalformed);
72 } 72 }
73 } 73 }
74 74
75 /** 75 /**
76 * This class converts strings to their UTF-8 code units (a list of 76 * This class converts strings to their UTF-8 code units (a list of
77 * unsigned 8-bit integers). 77 * unsigned 8-bit integers).
78 */ 78 */
79 class Utf8Encoder extends Converter<String, List<int>> 79 class Utf8Encoder extends Converter<String, List<int>>
80 implements ChunkedConverter<String, List<int>, String, List<int>> { 80 implements ChunkedConverter<String, List<int>, String, List<int>> {
81
82 const Utf8Encoder(); 81 const Utf8Encoder();
83 82
84 /** 83 /**
85 * Converts [string] to its UTF-8 code units (a list of 84 * Converts [string] to its UTF-8 code units (a list of
86 * unsigned 8-bit integers). 85 * unsigned 8-bit integers).
87 * 86 *
88 * If [start] and [end] are provided, only the substring 87 * If [start] and [end] are provided, only the substring
89 * `string.substring(start, end)` is converted. 88 * `string.substring(start, end)` is converted.
90 */ 89 */
91 List<int> convert(String string, [int start = 0, int end]) { 90 List<int> convert(String string, [int start = 0, int end]) {
(...skipping 141 matching lines...) Expand 10 before | Expand all | Expand 10 after
233 } 232 }
234 return stringIndex; 233 return stringIndex;
235 } 234 }
236 } 235 }
237 236
238 /** 237 /**
239 * This class encodes chunked strings to UTF-8 code units (unsigned 8-bit 238 * This class encodes chunked strings to UTF-8 code units (unsigned 8-bit
240 * integers). 239 * integers).
241 */ 240 */
242 class _Utf8EncoderSink extends _Utf8Encoder with StringConversionSinkMixin { 241 class _Utf8EncoderSink extends _Utf8Encoder with StringConversionSinkMixin {
243
244 final ByteConversionSink _sink; 242 final ByteConversionSink _sink;
245 243
246 _Utf8EncoderSink(this._sink); 244 _Utf8EncoderSink(this._sink);
247 245
248 void close() { 246 void close() {
249 if (_carry != 0) { 247 if (_carry != 0) {
250 // addSlice will call close again, but then the carry must be equal to 0. 248 // addSlice will call close again, but then the carry must be equal to 0.
251 addSlice("", 0, 0, true); 249 addSlice("", 0, 0, true);
252 return; 250 return;
253 } 251 }
254 _sink.close(); 252 _sink.close();
255 } 253 }
256 254
257 void addSlice(String str, int start, int end, bool isLast) { 255 void addSlice(String str, int start, int end, bool isLast) {
258 _bufferIndex = 0; 256 _bufferIndex = 0;
259 257
260 if (start == end && !isLast) { 258 if (start == end && !isLast) {
261 return; 259 return;
262 } 260 }
263 261
264 if (_carry != 0) { 262 if (_carry != 0) {
265 int nextCodeUnit = 0; 263 int nextCodeUnit = 0;
266 if (start != end) { 264 if (start != end) {
267 nextCodeUnit = str.codeUnitAt(start); 265 nextCodeUnit = str.codeUnitAt(start);
268 } else { 266 } else {
269 assert(isLast); 267 assert(isLast);
270 } 268 }
271 bool wasCombined = _writeSurrogate(_carry, nextCodeUnit); 269 bool wasCombined = _writeSurrogate(_carry, nextCodeUnit);
272 // Either we got a non-empty string, or we must not have been combined. 270 // Either we got a non-empty string, or we must not have been combined.
273 assert(!wasCombined || start != end ); 271 assert(!wasCombined || start != end);
274 if (wasCombined) start++; 272 if (wasCombined) start++;
275 _carry = 0; 273 _carry = 0;
276 } 274 }
277 do { 275 do {
278 start = _fillBuffer(str, start, end); 276 start = _fillBuffer(str, start, end);
279 bool isLastSlice = isLast && (start == end); 277 bool isLastSlice = isLast && (start == end);
280 if (start == end - 1 && _isLeadSurrogate(str.codeUnitAt(start))) { 278 if (start == end - 1 && _isLeadSurrogate(str.codeUnitAt(start))) {
281 if (isLast && _bufferIndex < _buffer.length - 3) { 279 if (isLast && _bufferIndex < _buffer.length - 3) {
282 // There is still space for the last incomplete surrogate. 280 // There is still space for the last incomplete surrogate.
283 // We use a non-surrogate as second argument. This way the 281 // We use a non-surrogate as second argument. This way the
(...skipping 16 matching lines...) Expand all
300 // TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it 298 // TODO(floitsch): implement asUtf8Sink. Sligthly complicated because it
301 // needs to deal with malformed input. 299 // needs to deal with malformed input.
302 } 300 }
303 301
304 /** 302 /**
305 * This class converts UTF-8 code units (lists of unsigned 8-bit integers) 303 * This class converts UTF-8 code units (lists of unsigned 8-bit integers)
306 * to a string. 304 * to a string.
307 */ 305 */
308 class Utf8Decoder extends Converter<List<int>, String> 306 class Utf8Decoder extends Converter<List<int>, String>
309 implements ChunkedConverter<List<int>, String, List<int>, String> { 307 implements ChunkedConverter<List<int>, String, List<int>, String> {
310
311 final bool _allowMalformed; 308 final bool _allowMalformed;
312 309
313 /** 310 /**
314 * Instantiates a new [Utf8Decoder]. 311 * Instantiates a new [Utf8Decoder].
315 * 312 *
316 * The optional [allowMalformed] argument defines how [convert] deals 313 * The optional [allowMalformed] argument defines how [convert] deals
317 * with invalid or unterminated character sequences. 314 * with invalid or unterminated character sequences.
318 * 315 *
319 * If it is `true` [convert] replaces invalid (or unterminated) character 316 * If it is `true` [convert] replaces invalid (or unterminated) character
320 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise 317 * sequences with the Unicode Replacement character `U+FFFD` (�). Otherwise
321 * it throws a [FormatException]. 318 * it throws a [FormatException].
322 */ 319 */
323 const Utf8Decoder({ bool allowMalformed: false }) 320 const Utf8Decoder({bool allowMalformed: false})
324 : this._allowMalformed = allowMalformed; 321 : this._allowMalformed = allowMalformed;
325 322
326 /** 323 /**
327 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the 324 * Converts the UTF-8 [codeUnits] (a list of unsigned 8-bit integers) to the
328 * corresponding string. 325 * corresponding string.
329 * 326 *
330 * Uses the code units from [start] to, but no including, [end]. 327 * Uses the code units from [start] to, but no including, [end].
331 * If [end] is omitted, it defaults to `codeUnits.length`. 328 * If [end] is omitted, it defaults to `codeUnits.length`.
332 * 329 *
333 * If the [codeUnits] start with the encoding of a 330 * If the [codeUnits] start with the encoding of a
(...skipping 36 matching lines...) Expand 10 before | Expand all | Expand 10 after
370 // Override the base-classes bind, to provide a better type. 367 // Override the base-classes bind, to provide a better type.
371 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream); 368 Stream<String> bind(Stream<List<int>> stream) => super.bind(stream);
372 369
373 external Converter<List<int>, T> fuse<T>(Converter<String, T> next); 370 external Converter<List<int>, T> fuse<T>(Converter<String, T> next);
374 371
375 external static String _convertIntercepted( 372 external static String _convertIntercepted(
376 bool allowMalformed, List<int> codeUnits, int start, int end); 373 bool allowMalformed, List<int> codeUnits, int start, int end);
377 } 374 }
378 375
379 // UTF-8 constants. 376 // UTF-8 constants.
380 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits 377 const int _ONE_BYTE_LIMIT = 0x7f; // 7 bits
381 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits 378 const int _TWO_BYTE_LIMIT = 0x7ff; // 11 bits
382 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits 379 const int _THREE_BYTE_LIMIT = 0xffff; // 16 bits
383 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max. 380 const int _FOUR_BYTE_LIMIT = 0x10ffff; // 21 bits, truncated to Unicode max.
384 381
385 // UTF-16 constants. 382 // UTF-16 constants.
386 const int _SURROGATE_MASK = 0xF800; 383 const int _SURROGATE_MASK = 0xF800;
387 const int _SURROGATE_TAG_MASK = 0xFC00; 384 const int _SURROGATE_TAG_MASK = 0xFC00;
388 const int _SURROGATE_VALUE_MASK = 0x3FF; 385 const int _SURROGATE_VALUE_MASK = 0x3FF;
389 const int _LEAD_SURROGATE_MIN = 0xD800; 386 const int _LEAD_SURROGATE_MIN = 0xD800;
390 const int _TAIL_SURROGATE_MIN = 0xDC00; 387 const int _TAIL_SURROGATE_MIN = 0xDC00;
391 388
392 bool _isLeadSurrogate(int codeUnit) => 389 bool _isLeadSurrogate(int codeUnit) =>
393 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN; 390 (codeUnit & _SURROGATE_TAG_MASK) == _LEAD_SURROGATE_MIN;
394 bool _isTailSurrogate(int codeUnit) => 391 bool _isTailSurrogate(int codeUnit) =>
395 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN; 392 (codeUnit & _SURROGATE_TAG_MASK) == _TAIL_SURROGATE_MIN;
396 int _combineSurrogatePair(int lead, int tail) => 393 int _combineSurrogatePair(int lead, int tail) =>
397 0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10) 394 0x10000 + ((lead & _SURROGATE_VALUE_MASK) << 10) |
398 | (tail & _SURROGATE_VALUE_MASK); 395 (tail & _SURROGATE_VALUE_MASK);
399 396
400 /** 397 /**
401 * Decodes UTF-8. 398 * Decodes UTF-8.
402 * 399 *
403 * The decoder handles chunked input. 400 * The decoder handles chunked input.
404 */ 401 */
405 // TODO(floitsch): make this class public. 402 // TODO(floitsch): make this class public.
406 class _Utf8Decoder { 403 class _Utf8Decoder {
407 final bool _allowMalformed; 404 final bool _allowMalformed;
408 final StringSink _stringSink; 405 final StringSink _stringSink;
409 bool _isFirstCharacter = true; 406 bool _isFirstCharacter = true;
410 int _value = 0; 407 int _value = 0;
411 int _expectedUnits = 0; 408 int _expectedUnits = 0;
412 int _extraUnits = 0; 409 int _extraUnits = 0;
413 410
414 _Utf8Decoder(this._stringSink, this._allowMalformed); 411 _Utf8Decoder(this._stringSink, this._allowMalformed);
415 412
416 bool get hasPartialInput => _expectedUnits > 0; 413 bool get hasPartialInput => _expectedUnits > 0;
417 414
418 // Limits of one through four byte encodings. 415 // Limits of one through four byte encodings.
419 static const List<int> _LIMITS = const <int>[ 416 static const List<int> _LIMITS = const <int>[
420 _ONE_BYTE_LIMIT, 417 _ONE_BYTE_LIMIT,
421 _TWO_BYTE_LIMIT, 418 _TWO_BYTE_LIMIT,
422 _THREE_BYTE_LIMIT, 419 _THREE_BYTE_LIMIT,
423 _FOUR_BYTE_LIMIT ]; 420 _FOUR_BYTE_LIMIT
421 ];
424 422
425 void close() { 423 void close() {
426 flush(); 424 flush();
427 } 425 }
428 426
429 /** 427 /**
430 * Flushes this decoder as if closed. 428 * Flushes this decoder as if closed.
431 * 429 *
432 * This method throws if the input was partial and the decoder was 430 * This method throws if the input was partial and the decoder was
433 * constructed with `allowMalformed` set to `false`. 431 * constructed with `allowMalformed` set to `false`.
434 * 432 *
435 * The [source] and [offset] of the current position may be provided, 433 * The [source] and [offset] of the current position may be provided,
436 * and are included in the exception if one is thrown. 434 * and are included in the exception if one is thrown.
437 */ 435 */
438 void flush([List<int> source, int offset]) { 436 void flush([List<int> source, int offset]) {
439 if (hasPartialInput) { 437 if (hasPartialInput) {
440 if (!_allowMalformed) { 438 if (!_allowMalformed) {
441 throw new FormatException("Unfinished UTF-8 octet sequence", 439 throw new FormatException(
442 source, offset); 440 "Unfinished UTF-8 octet sequence", source, offset);
443 } 441 }
444 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); 442 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);
445 _value = 0; 443 _value = 0;
446 _expectedUnits = 0; 444 _expectedUnits = 0;
447 _extraUnits = 0; 445 _extraUnits = 0;
448 } 446 }
449 } 447 }
450 448
451 void convert(List<int> codeUnits, int startIndex, int endIndex) { 449 void convert(List<int> codeUnits, int startIndex, int endIndex) {
452 int value = _value; 450 int value = _value;
(...skipping 13 matching lines...) Expand all
466 return to - from; 464 return to - from;
467 } 465 }
468 466
469 void addSingleBytes(int from, int to) { 467 void addSingleBytes(int from, int to) {
470 assert(from >= startIndex && from <= endIndex); 468 assert(from >= startIndex && from <= endIndex);
471 assert(to >= startIndex && to <= endIndex); 469 assert(to >= startIndex && to <= endIndex);
472 _stringSink.write(new String.fromCharCodes(codeUnits, from, to)); 470 _stringSink.write(new String.fromCharCodes(codeUnits, from, to));
473 } 471 }
474 472
475 int i = startIndex; 473 int i = startIndex;
476 loop: while (true) { 474 loop:
477 multibyte: if (expectedUnits > 0) { 475 while (true) {
476 multibyte:
477 if (expectedUnits > 0) {
478 do { 478 do {
479 if (i == endIndex) { 479 if (i == endIndex) {
480 break loop; 480 break loop;
481 } 481 }
482 int unit = codeUnits[i]; 482 int unit = codeUnits[i];
483 if ((unit & 0xC0) != 0x80) { 483 if ((unit & 0xC0) != 0x80) {
484 expectedUnits = 0; 484 expectedUnits = 0;
485 if (!_allowMalformed) { 485 if (!_allowMalformed) {
486 throw new FormatException( 486 throw new FormatException(
487 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}", 487 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}",
488 codeUnits, i); 488 codeUnits,
489 i);
489 } 490 }
490 _isFirstCharacter = false; 491 _isFirstCharacter = false;
491 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); 492 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);
492 break multibyte; 493 break multibyte;
493 } else { 494 } else {
494 value = (value << 6) | (unit & 0x3f); 495 value = (value << 6) | (unit & 0x3f);
495 expectedUnits--; 496 expectedUnits--;
496 i++; 497 i++;
497 } 498 }
498 } while (expectedUnits > 0); 499 } while (expectedUnits > 0);
499 if (value <= _LIMITS[extraUnits - 1]) { 500 if (value <= _LIMITS[extraUnits - 1]) {
500 // Overly long encoding. The value could be encoded with a shorter 501 // Overly long encoding. The value could be encoded with a shorter
501 // encoding. 502 // encoding.
502 if (!_allowMalformed) { 503 if (!_allowMalformed) {
503 throw new FormatException( 504 throw new FormatException(
504 "Overlong encoding of 0x${value.toRadixString(16)}", 505 "Overlong encoding of 0x${value.toRadixString(16)}",
505 codeUnits, i - extraUnits - 1); 506 codeUnits,
507 i - extraUnits - 1);
506 } 508 }
507 expectedUnits = extraUnits = 0; 509 expectedUnits = extraUnits = 0;
508 value = UNICODE_REPLACEMENT_CHARACTER_RUNE; 510 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;
509 } 511 }
510 if (value > _FOUR_BYTE_LIMIT) { 512 if (value > _FOUR_BYTE_LIMIT) {
511 if (!_allowMalformed) { 513 if (!_allowMalformed) {
512 throw new FormatException("Character outside valid Unicode range: " 514 throw new FormatException(
513 "0x${value.toRadixString(16)}", 515 "Character outside valid Unicode range: "
514 codeUnits, i - extraUnits - 1); 516 "0x${value.toRadixString(16)}",
517 codeUnits,
518 i - extraUnits - 1);
515 } 519 }
516 value = UNICODE_REPLACEMENT_CHARACTER_RUNE; 520 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;
517 } 521 }
518 if (!_isFirstCharacter || value != UNICODE_BOM_CHARACTER_RUNE) { 522 if (!_isFirstCharacter || value != UNICODE_BOM_CHARACTER_RUNE) {
519 _stringSink.writeCharCode(value); 523 _stringSink.writeCharCode(value);
520 } 524 }
521 _isFirstCharacter = false; 525 _isFirstCharacter = false;
522 } 526 }
523 527
524 while (i < endIndex) { 528 while (i < endIndex) {
525 int oneBytes = scanOneByteCharacters(codeUnits, i); 529 int oneBytes = scanOneByteCharacters(codeUnits, i);
526 if (oneBytes > 0) { 530 if (oneBytes > 0) {
527 _isFirstCharacter = false; 531 _isFirstCharacter = false;
528 addSingleBytes(i, i + oneBytes); 532 addSingleBytes(i, i + oneBytes);
529 i += oneBytes; 533 i += oneBytes;
530 if (i == endIndex) break; 534 if (i == endIndex) break;
531 } 535 }
532 int unit = codeUnits[i++]; 536 int unit = codeUnits[i++];
533 // TODO(floitsch): the way we test we could potentially allow 537 // TODO(floitsch): the way we test we could potentially allow
534 // units that are too large, if they happen to have the 538 // units that are too large, if they happen to have the
535 // right bit-pattern. (Same is true for the multibyte loop above). 539 // right bit-pattern. (Same is true for the multibyte loop above).
536 // TODO(floitsch): optimize this loop. See: 540 // TODO(floitsch): optimize this loop. See:
537 // https://codereview.chromium.org/22929022/diff/1/sdk/lib/convert/utf.d art?column_width=80 541 // https://codereview.chromium.org/22929022/diff/1/sdk/lib/convert/utf.d art?column_width=80
538 if (unit < 0) { 542 if (unit < 0) {
539 // TODO(floitsch): should this be unit <= 0 ? 543 // TODO(floitsch): should this be unit <= 0 ?
540 if (!_allowMalformed) { 544 if (!_allowMalformed) {
541 throw new FormatException( 545 throw new FormatException(
542 "Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}", 546 "Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}",
543 codeUnits, i - 1); 547 codeUnits,
548 i - 1);
544 } 549 }
545 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); 550 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);
546 } else { 551 } else {
547 assert(unit > _ONE_BYTE_LIMIT); 552 assert(unit > _ONE_BYTE_LIMIT);
548 if ((unit & 0xE0) == 0xC0) { 553 if ((unit & 0xE0) == 0xC0) {
549 value = unit & 0x1F; 554 value = unit & 0x1F;
550 expectedUnits = extraUnits = 1; 555 expectedUnits = extraUnits = 1;
551 continue loop; 556 continue loop;
552 } 557 }
553 if ((unit & 0xF0) == 0xE0) { 558 if ((unit & 0xF0) == 0xE0) {
554 value = unit & 0x0F; 559 value = unit & 0x0F;
555 expectedUnits = extraUnits = 2; 560 expectedUnits = extraUnits = 2;
556 continue loop; 561 continue loop;
557 } 562 }
558 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences. 563 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences.
559 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { 564 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) {
560 value = unit & 0x07; 565 value = unit & 0x07;
561 expectedUnits = extraUnits = 3; 566 expectedUnits = extraUnits = 3;
562 continue loop; 567 continue loop;
563 } 568 }
564 if (!_allowMalformed) { 569 if (!_allowMalformed) {
565 throw new FormatException( 570 throw new FormatException(
566 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}", 571 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}",
567 codeUnits, i - 1); 572 codeUnits,
573 i - 1);
568 } 574 }
569 value = UNICODE_REPLACEMENT_CHARACTER_RUNE; 575 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;
570 expectedUnits = extraUnits = 0; 576 expectedUnits = extraUnits = 0;
571 _isFirstCharacter = false; 577 _isFirstCharacter = false;
572 _stringSink.writeCharCode(value); 578 _stringSink.writeCharCode(value);
573 } 579 }
574 } 580 }
575 break loop; 581 break loop;
576 } 582 }
577 if (expectedUnits > 0) { 583 if (expectedUnits > 0) {
578 _value = value; 584 _value = value;
579 _expectedUnits = expectedUnits; 585 _expectedUnits = expectedUnits;
580 _extraUnits = extraUnits; 586 _extraUnits = extraUnits;
581 } 587 }
582 } 588 }
583 } 589 }
OLDNEW
« no previous file with comments | « sdk/lib/convert/string_conversion.dart ('k') | sdk/lib/core/bool.dart » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698