OLD | NEW |
---|---|
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file | 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file |
2 // for details. All rights reserved. Use of this source code is governed by a | 2 // for details. All rights reserved. Use of this source code is governed by a |
3 // BSD-style license that can be found in the LICENSE file. | 3 // BSD-style license that can be found in the LICENSE file. |
4 | 4 |
5 part of dart.convert; | 5 part of dart.convert; |
6 | 6 |
7 /** The Unicode Replacement character `U+FFFD` (�). */ | 7 /** The Unicode Replacement character `U+FFFD` (�). */ |
8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; | 8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; |
9 | 9 |
10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */ | 10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */ |
(...skipping 402 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
413 _value = 0; | 413 _value = 0; |
414 _expectedUnits = 0; | 414 _expectedUnits = 0; |
415 _extraUnits = 0; | 415 _extraUnits = 0; |
416 } | 416 } |
417 } | 417 } |
418 | 418 |
419 void convert(List<int> codeUnits, int startIndex, int endIndex) { | 419 void convert(List<int> codeUnits, int startIndex, int endIndex) { |
420 int value = _value; | 420 int value = _value; |
421 int expectedUnits = _expectedUnits; | 421 int expectedUnits = _expectedUnits; |
422 int extraUnits = _extraUnits; | 422 int extraUnits = _extraUnits; |
423 int singleBytesCount = 0; | |
423 _value = 0; | 424 _value = 0; |
424 _expectedUnits = 0; | 425 _expectedUnits = 0; |
425 _extraUnits = 0; | 426 _extraUnits = 0; |
426 | 427 |
428 void addSingleBytes(int from, int to) { | |
429 assert(singleBytesCount > 0); | |
430 assert(from >= startIndex && from <= endIndex); | |
431 assert(to >= startIndex && to <= endIndex); | |
432 if (from == 0 && to == codeUnits.length) { | |
433 _stringSink.write(new String.fromCharCodes(codeUnits)); | |
434 } else { | |
435 _stringSink.write( | |
436 new String.fromCharCodes(codeUnits.sublist(from, to))); | |
437 } | |
438 singleBytesCount = 0; | |
439 } | |
440 | |
427 int i = startIndex; | 441 int i = startIndex; |
428 loop: while (true) { | 442 loop: while (true) { |
429 multibyte: if (expectedUnits > 0) { | 443 multibyte: if (expectedUnits > 0) { |
430 do { | 444 do { |
431 if (i == endIndex) { | 445 if (i == endIndex) { |
432 break loop; | 446 break loop; |
433 } | 447 } |
434 int unit = codeUnits[i]; | 448 int unit = codeUnits[i]; |
435 if ((unit & 0xC0) != 0x80) { | 449 if ((unit & 0xC0) != 0x80) { |
436 expectedUnits = 0; | 450 expectedUnits = 0; |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... | |
472 | 486 |
473 while (i < endIndex) { | 487 while (i < endIndex) { |
474 int unit = codeUnits[i++]; | 488 int unit = codeUnits[i++]; |
475 // TODO(floitsch): the way we test we could potentially allow | 489 // TODO(floitsch): the way we test we could potentially allow |
476 // units that are too large, if they happen to have the | 490 // units that are too large, if they happen to have the |
477 // right bit-pattern. (Same is true for the multibyte loop above). | 491 // right bit-pattern. (Same is true for the multibyte loop above). |
478 // TODO(floitsch): optimize this loop. See: | 492 // TODO(floitsch): optimize this loop. See: |
479 // https://codereview.chromium.org/22929022/diff/1/sdk/lib/convert/utf.d art?column_width=80 | 493 // https://codereview.chromium.org/22929022/diff/1/sdk/lib/convert/utf.d art?column_width=80 |
480 if (unit < 0) { | 494 if (unit < 0) { |
481 // TODO(floitsch): should this be unit <= 0 ? | 495 // TODO(floitsch): should this be unit <= 0 ? |
496 if (singleBytesCount > 0) { | |
497 addSingleBytes(i - singleBytesCount - 1, i - 1); | |
498 } | |
482 if (!_allowMalformed) { | 499 if (!_allowMalformed) { |
483 throw new FormatException( | 500 throw new FormatException( |
484 "Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}"); | 501 "Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}"); |
485 } | 502 } |
486 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); | 503 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); |
487 } else if (unit <= _ONE_BYTE_LIMIT) { | 504 } else if (unit <= _ONE_BYTE_LIMIT) { |
488 _isFirstCharacter = false; | 505 _isFirstCharacter = false; |
489 _stringSink.writeCharCode(unit); | 506 singleBytesCount++; |
490 } else { | 507 } else { |
508 if (singleBytesCount > 0) { | |
509 addSingleBytes(i - singleBytesCount - 1, i - 1); | |
Lasse Reichstein Nielsen
2014/03/24 09:21:01
i - 1 - singleBytesCount
Allows reuse of the (i -
Anders Johnsen
2014/03/24 12:01:01
Done.
| |
510 } | |
491 if ((unit & 0xE0) == 0xC0) { | 511 if ((unit & 0xE0) == 0xC0) { |
492 value = unit & 0x1F; | 512 value = unit & 0x1F; |
493 expectedUnits = extraUnits = 1; | 513 expectedUnits = extraUnits = 1; |
494 continue loop; | 514 continue loop; |
495 } | 515 } |
496 if ((unit & 0xF0) == 0xE0) { | 516 if ((unit & 0xF0) == 0xE0) { |
497 value = unit & 0x0F; | 517 value = unit & 0x0F; |
498 expectedUnits = extraUnits = 2; | 518 expectedUnits = extraUnits = 2; |
499 continue loop; | 519 continue loop; |
500 } | 520 } |
501 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences. | 521 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences. |
502 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { | 522 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { |
503 value = unit & 0x07; | 523 value = unit & 0x07; |
504 expectedUnits = extraUnits = 3; | 524 expectedUnits = extraUnits = 3; |
505 continue loop; | 525 continue loop; |
506 } | 526 } |
507 if (!_allowMalformed) { | 527 if (!_allowMalformed) { |
508 throw new FormatException( | 528 throw new FormatException( |
509 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); | 529 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); |
510 } | 530 } |
511 value = UNICODE_REPLACEMENT_CHARACTER_RUNE; | 531 value = UNICODE_REPLACEMENT_CHARACTER_RUNE; |
512 expectedUnits = extraUnits = 0; | 532 expectedUnits = extraUnits = 0; |
513 _isFirstCharacter = false; | 533 _isFirstCharacter = false; |
514 _stringSink.writeCharCode(value); | 534 _stringSink.writeCharCode(value); |
515 } | 535 } |
516 } | 536 } |
517 break loop; | 537 break loop; |
518 } | 538 } |
539 if (singleBytesCount > 0) { | |
540 addSingleBytes(i - singleBytesCount, endIndex); | |
541 } | |
519 if (expectedUnits > 0) { | 542 if (expectedUnits > 0) { |
520 _value = value; | 543 _value = value; |
521 _expectedUnits = expectedUnits; | 544 _expectedUnits = expectedUnits; |
522 _extraUnits = extraUnits; | 545 _extraUnits = extraUnits; |
523 } | 546 } |
524 } | 547 } |
525 } | 548 } |
OLD | NEW |