Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(352)

Side by Side Diff: sdk/lib/convert/utf.dart

Issue 208693008: Speed up ASCII, LATIN1, UTF8 converters, for single-byte codeunits. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Review fixes. Created 6 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « sdk/lib/convert/latin1.dart ('k') | no next file » | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2013, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 part of dart.convert; 5 part of dart.convert;
6 6
7 /** The Unicode Replacement character `U+FFFD` (�). */ 7 /** The Unicode Replacement character `U+FFFD` (�). */
8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD; 8 const int UNICODE_REPLACEMENT_CHARACTER_RUNE = 0xFFFD;
9 9
10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */ 10 /** The Unicode Byte Order Marker (BOM) character `U+FEFF`. */
(...skipping 402 matching lines...) Expand 10 before | Expand all | Expand 10 after
413 _value = 0; 413 _value = 0;
414 _expectedUnits = 0; 414 _expectedUnits = 0;
415 _extraUnits = 0; 415 _extraUnits = 0;
416 } 416 }
417 } 417 }
418 418
419 void convert(List<int> codeUnits, int startIndex, int endIndex) { 419 void convert(List<int> codeUnits, int startIndex, int endIndex) {
420 int value = _value; 420 int value = _value;
421 int expectedUnits = _expectedUnits; 421 int expectedUnits = _expectedUnits;
422 int extraUnits = _extraUnits; 422 int extraUnits = _extraUnits;
423 int singleBytesCount = 0;
423 _value = 0; 424 _value = 0;
424 _expectedUnits = 0; 425 _expectedUnits = 0;
425 _extraUnits = 0; 426 _extraUnits = 0;
426 427
428 void addSingleBytes(int from, int to) {
429 assert(singleBytesCount > 0);
430 assert(from >= startIndex && from <= endIndex);
431 assert(to >= startIndex && to <= endIndex);
432 if (from == 0 && to == codeUnits.length) {
433 _stringSink.write(new String.fromCharCodes(codeUnits));
434 } else {
435 _stringSink.write(
436 new String.fromCharCodes(codeUnits.sublist(from, to)));
437 }
438 singleBytesCount = 0;
439 }
440
427 int i = startIndex; 441 int i = startIndex;
428 loop: while (true) { 442 loop: while (true) {
429 multibyte: if (expectedUnits > 0) { 443 multibyte: if (expectedUnits > 0) {
430 do { 444 do {
431 if (i == endIndex) { 445 if (i == endIndex) {
432 break loop; 446 break loop;
433 } 447 }
434 int unit = codeUnits[i]; 448 int unit = codeUnits[i];
435 if ((unit & 0xC0) != 0x80) { 449 if ((unit & 0xC0) != 0x80) {
436 expectedUnits = 0; 450 expectedUnits = 0;
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after
472 486
473 while (i < endIndex) { 487 while (i < endIndex) {
474 int unit = codeUnits[i++]; 488 int unit = codeUnits[i++];
475 // TODO(floitsch): the way we test we could potentially allow 489 // TODO(floitsch): the way we test we could potentially allow
476 // units that are too large, if they happen to have the 490 // units that are too large, if they happen to have the
477 // right bit-pattern. (Same is true for the multibyte loop above). 491 // right bit-pattern. (Same is true for the multibyte loop above).
478 // TODO(floitsch): optimize this loop. See: 492 // TODO(floitsch): optimize this loop. See:
479 // https://codereview.chromium.org/22929022/diff/1/sdk/lib/convert/utf.d art?column_width=80 493 // https://codereview.chromium.org/22929022/diff/1/sdk/lib/convert/utf.d art?column_width=80
480 if (unit < 0) { 494 if (unit < 0) {
481 // TODO(floitsch): should this be unit <= 0 ? 495 // TODO(floitsch): should this be unit <= 0 ?
496 if (singleBytesCount > 0) {
497 int to = i - 1;
498 addSingleBytes(to - singleBytesCount, to);
499 }
482 if (!_allowMalformed) { 500 if (!_allowMalformed) {
483 throw new FormatException( 501 throw new FormatException(
484 "Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}"); 502 "Negative UTF-8 code unit: -0x${(-unit).toRadixString(16)}");
485 } 503 }
486 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE); 504 _stringSink.writeCharCode(UNICODE_REPLACEMENT_CHARACTER_RUNE);
487 } else if (unit <= _ONE_BYTE_LIMIT) { 505 } else if (unit <= _ONE_BYTE_LIMIT) {
488 _isFirstCharacter = false; 506 _isFirstCharacter = false;
489 _stringSink.writeCharCode(unit); 507 singleBytesCount++;
490 } else { 508 } else {
509 if (singleBytesCount > 0) {
510 int to = i - 1;
511 addSingleBytes(to - singleBytesCount, to);
512 }
491 if ((unit & 0xE0) == 0xC0) { 513 if ((unit & 0xE0) == 0xC0) {
492 value = unit & 0x1F; 514 value = unit & 0x1F;
493 expectedUnits = extraUnits = 1; 515 expectedUnits = extraUnits = 1;
494 continue loop; 516 continue loop;
495 } 517 }
496 if ((unit & 0xF0) == 0xE0) { 518 if ((unit & 0xF0) == 0xE0) {
497 value = unit & 0x0F; 519 value = unit & 0x0F;
498 expectedUnits = extraUnits = 2; 520 expectedUnits = extraUnits = 2;
499 continue loop; 521 continue loop;
500 } 522 }
501 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences. 523 // 0xF5, 0xF6 ... 0xFF never appear in valid UTF-8 sequences.
502 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) { 524 if ((unit & 0xF8) == 0xF0 && unit < 0xF5) {
503 value = unit & 0x07; 525 value = unit & 0x07;
504 expectedUnits = extraUnits = 3; 526 expectedUnits = extraUnits = 3;
505 continue loop; 527 continue loop;
506 } 528 }
507 if (!_allowMalformed) { 529 if (!_allowMalformed) {
508 throw new FormatException( 530 throw new FormatException(
509 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}"); 531 "Bad UTF-8 encoding 0x${unit.toRadixString(16)}");
510 } 532 }
511 value = UNICODE_REPLACEMENT_CHARACTER_RUNE; 533 value = UNICODE_REPLACEMENT_CHARACTER_RUNE;
512 expectedUnits = extraUnits = 0; 534 expectedUnits = extraUnits = 0;
513 _isFirstCharacter = false; 535 _isFirstCharacter = false;
514 _stringSink.writeCharCode(value); 536 _stringSink.writeCharCode(value);
515 } 537 }
516 } 538 }
517 break loop; 539 break loop;
518 } 540 }
541 if (singleBytesCount > 0) {
542 addSingleBytes(i - singleBytesCount, endIndex);
543 }
519 if (expectedUnits > 0) { 544 if (expectedUnits > 0) {
520 _value = value; 545 _value = value;
521 _expectedUnits = expectedUnits; 546 _expectedUnits = expectedUnits;
522 _extraUnits = extraUnits; 547 _extraUnits = extraUnits;
523 } 548 }
524 } 549 }
525 } 550 }
OLDNEW
« no previous file with comments | « sdk/lib/convert/latin1.dart ('k') | no next file » | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698