Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(4)

Side by Side Diff: sdk/lib/convert/utf.dart

Issue 435553002: Improve utf8 decoding of single-char bytes, by isolating the loop. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Revert debug change. Created 6 years, 4 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « no previous file | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.convert; 5 part of dart.convert;
6 6
7 /** The Unicode Replacement character `U+FFFD` (�). */ 7 /** The Unicode Replacement character `U+FFFD` (�). */
8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; 8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD;
9 9
10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */ 10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */
(...skipping 402 matching lines...) Expand 10 before | Expand all | Expand 10 after
413 _value = 0; 413 _value = 0;
414 _expectedUnits = 0; 414 _expectedUnits = 0;
415 _extraUnits = 0; 415 _extraUnits = 0;
416 } 416 }
417 } 417 }
418 418
419 void convert(List<int> codeUnits, int startIndex, int endIndex) { 419 void convert(List<int> codeUnits, int startIndex, int endIndex) {
420 int value = _value; 420 int value = _value;
421 int expectedUnits = _expectedUnits; 421 int expectedUnits = _expectedUnits;
422 int extraUnits = _extraUnits; 422 int extraUnits = _extraUnits;
423 int singleBytesCount = 0;
424 _value = 0; 423 _value = 0;
425 _expectedUnits = 0; 424 _expectedUnits = 0;
426 _extraUnits = 0; 425 _extraUnits = 0;
427 426
427 int scanOneByteCharacters(units, int from) {
428 final to = endIndex;
429 final mask = ~_ONE_BYTE_LIMIT;
430 for (var i = from; i < to; i++) {
431 if ((units[i] & mask) != 0) return i - from;
432 }
433 return to - from;
434 }
435
428 void addSingleBytes(int from, int to) { 436 void addSingleBytes(int from, int to) {
429 assert(singleBytesCount > 0);
430 assert(from >= startIndex && from <= endIndex); 437 assert(from >= startIndex && from <= endIndex);
431 assert(to >= startIndex && to <= endIndex); 438 assert(to >= startIndex && to <= endIndex);
432 if (from == 0 && to == codeUnits.length) { 439 if (from == 0 && to == codeUnits.length) {
433 _stringSink.write(new String.fromCharCodes(codeUnits)); 440 _stringSink.write(new String.fromCharCodes(codeUnits));
434 } else { 441 } else {
435 _stringSink.write( 442 _stringSink.write(
436 new String.fromCharCodes(codeUnits.sublist(from, to))); 443 new String.fromCharCodes(codeUnits.sublist(from, to)));
437 } 444 }
438 singleBytesCount = 0;
439 } 445 }
440 446
441 int i = startIndex; 447 int i = startIndex;
442 loop: while (true) { 448 loop: while (true) {
443 multibyte: if (expectedUnits > 0) { 449 multibyte: if (expectedUnits > 0) {
444 do { 450 do {
445 if (i == endIndex) { 451 if (i == endIndex) {
446 break loop; 452 break loop;
447 } 453 }
448 int unit = codeUnits[i]; 454 int unit = codeUnits[i];
(...skipping 29 matching lines...) Expand all
478 } 484 }
479 value = UNICODE_REPLACEMENT_CHARACTER_RUNE; 485 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;
480 } 486 }
481 if (!_isFirstCharacter || value != UNICODE_BOM_CHARACTER_RUNE) { 487 if (!_isFirstCharacter || value != UNICODE_BOM_CHARACTER_RUNE) {
482 _stringSink.writeCharCode(value); 488 _stringSink.writeCharCode(value);
483 } 489 }
484 _isFirstCharacter = false; 490 _isFirstCharacter = false;
485 } 491 }
486 492
487 while (i < endIndex) { 493 while (i < endIndex) {
494 int oneBytes = scanOneByteCharacters(codeUnits, i);
495 if (oneBytes > 0) {
496 _isFirstCharacter = false;
497 addSingleBytes(i, i + oneBytes);
498 i += oneBytes;
499 if (i == endIndex) break;
500 }
488 int unit = codeUnits[i++]; 501 int unit = codeUnits[i++];
489 // TODO(floitsch): the way we test we could potentially allow 502 // TODO(floitsch): the way we test we could potentially allow
490 // units that are too large, if they happen to have the 503 // units that are too large, if they happen to have the
491 // right bit-pattern. (Same is true for the multibyte loop above). 504 // right bit-pattern. (Same is true for the multibyte loop above).
492 // TODO(floitsch): optimize this loop. See: 505 // TODO(floitsch): optimize this loop. See:
493 // https://codereview.chromium.org/22929022/diff/1/sdk/lib/convert/utf.d art?column_width=80 506 // https://codereview.chromium.org/22929022/diff/1/sdk/lib/convert/utf.d art?column_width=80
494 if (unit < 0) { 507 if (unit < 0) {
495 // TODO(floitsch): should this be unit <= 0 ? 508 // TODO(floitsch): should this be unit <= 0 ?
496 if (singleBytesCount > 0) {
497 int to = i - 1;
498 addSingleBytes(to - singleBytesCount, to);
499 }
500 if (!_allowMalformed) { 509 if (!_allowMalformed) {
501 throw new FormatException( 510 throw new FormatException(
502 "Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}"); 511 "Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}");
503 } 512 }
504 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); 513 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);
505 } else if (unit <= _ONE_BYTE_LIMIT) {
506 _isFirstCharacter = false;
507 singleBytesCount++;
508 } else { 514 } else {
509 if (singleBytesCount > 0) { 515 assert(unit > _ONE_BYTE_LIMIT);
510 int to = i - 1;
511 addSingleBytes(to - singleBytesCount, to);
512 }
513 if ((unit & 0xE0) == 0xC0) { 516 if ((unit & 0xE0) == 0xC0) {
514 value = unit & 0x1F; 517 value = unit & 0x1F;
515 expectedUnits = extraUnits = 1; 518 expectedUnits = extraUnits = 1;
516 continue loop; 519 continue loop;
517 } 520 }
518 if ((unit & 0xF0) == 0xE0) { 521 if ((unit & 0xF0) == 0xE0) {
519 value = unit & 0x0F; 522 value = unit & 0x0F;
520 expectedUnits = extraUnits = 2; 523 expectedUnits = extraUnits = 2;
521 continue loop; 524 continue loop;
522 } 525 }
523 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences. 526 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences.
524 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { 527 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) {
525 value = unit & 0x07; 528 value = unit & 0x07;
526 expectedUnits = extraUnits = 3; 529 expectedUnits = extraUnits = 3;
527 continue loop; 530 continue loop;
528 } 531 }
529 if (!_allowMalformed) { 532 if (!_allowMalformed) {
530 throw new FormatException( 533 throw new FormatException(
531 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); 534 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}");
532 } 535 }
533 value = UNICODE_REPLACEMENT_CHARACTER_RUNE; 536 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;
534 expectedUnits = extraUnits = 0; 537 expectedUnits = extraUnits = 0;
535 _isFirstCharacter = false; 538 _isFirstCharacter = false;
536 _stringSink.writeCharCode(value); 539 _stringSink.writeCharCode(value);
537 } 540 }
538 } 541 }
539 break loop; 542 break loop;
540 } 543 }
541 if (singleBytesCount > 0) {
542 addSingleBytes(i - singleBytesCount, endIndex);
543 }
544 if (expectedUnits > 0) { 544 if (expectedUnits > 0) {
545 _value = value; 545 _value = value;
546 _expectedUnits = expectedUnits; 546 _expectedUnits = expectedUnits;
547 _extraUnits = extraUnits; 547 _extraUnits = extraUnits;
548 } 548 }
549 } 549 }
550 } 550 }
OLDNEW
« no previous file with comments | « no previous file | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698