Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(180)

Side by Side Diff: runtime/lib/string_patch.dart

Issue 858543002: Avoid extra duplication of substrings during string.replaceAll. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Also do replaceAllMapped. Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 patch class String { 5 patch class String {
6 /* patch */ factory String.fromCharCodes(Iterable<int> charCodes, 6 /* patch */ factory String.fromCharCodes(Iterable<int> charCodes,
7 [int start = 0, int end]) { 7 [int start = 0, int end]) {
8 return _StringBase.createFromCharCodes(charCodes, start, end); 8 return _StringBase.createFromCharCodes(charCodes, start, end);
9 } 9 }
10 10
(...skipping 22 matching lines...) Expand all
33 {String defaultValue}) 33 {String defaultValue})
34 native "String_fromEnvironment"; 34 native "String_fromEnvironment";
35 } 35 }
36 36
37 37
38 /** 38 /**
39 * [_StringBase] contains common methods used by concrete String 39 * [_StringBase] contains common methods used by concrete String
40 * implementations, e.g., _OneByteString. 40 * implementations, e.g., _OneByteString.
41 */ 41 */
42 class _StringBase { 42 class _StringBase {
43 // Constants used by replaceAll encoding of matches.
44 static const int _lengthShift = 15;
45 static const int _maxStartValue = (1 << _lengthShift) - 1;
46 static const int _maxLengthValue = (1 << (30 - _lengthShift)) - 1;
47 static const int _startMask = _maxStartValue;
43 48
44 factory _StringBase._uninstantiable() { 49 factory _StringBase._uninstantiable() {
45 throw new UnsupportedError( 50 throw new UnsupportedError(
46 "_StringBase can't be instaniated"); 51 "_StringBase can't be instaniated");
47 } 52 }
48 53
49 Type get runtimeType => String; 54 Type get runtimeType => String;
50 55
51 int get hashCode native "String_getHashCode"; 56 int get hashCode native "String_getHashCode";
52 57
(...skipping 428 matching lines...) Expand 10 before | Expand all | Expand 10 after
481 startIndex == 0 ? pattern.allMatches(this).iterator 486 startIndex == 0 ? pattern.allMatches(this).iterator
482 : pattern.allMatches(this, startIndex).iterator; 487 : pattern.allMatches(this, startIndex).iterator;
483 if (!iterator.moveNext()) return this; 488 if (!iterator.moveNext()) return this;
484 Match match = iterator.current; 489 Match match = iterator.current;
485 return "${this.substring(0, match.start)}" 490 return "${this.substring(0, match.start)}"
486 "$replacement" 491 "$replacement"
487 "${this.substring(match.end)}"; 492 "${this.substring(match.end)}";
488 } 493 }
489 494
490 String replaceAll(Pattern pattern, String replacement) { 495 String replaceAll(Pattern pattern, String replacement) {
491 if (pattern is! Pattern) { 496 List matches = [];
492 throw new ArgumentError("${pattern} is not a Pattern"); 497 int length = 0;
zerny-google 2015/01/20 13:43:11 Is it OK that we no longer have an explicit Argume
Lasse Reichstein Nielsen 2015/01/21 10:48:43 Almost. I'll add back a check for "null", but we s
498 int replacementLength = replacement.length;
499 int startIndex = 0;
500 if (replacementLength == 0) {
501 for (Match match in pattern.allMatches(this)) {
502 int matchStart = match.start;
503 int preMatchLength = match.start - startIndex;
504 if (preMatchLength > 0) {
505 if (startIndex <= _maxStartValue &&
506 preMatchLength <= _maxLengthValue) {
507 matches.add(-(startIndex | preMatchLength << _lengthShift));
zerny-google 2015/01/20 13:43:11 Nit: add parens around << Also, does compacting t
Lasse Reichstein Nielsen 2015/01/21 10:48:43 Done. I see ~5% improvement on a small/micro bench
508 } else {
509 matches.add(startIndex);
510 matches.add(matchStart);
511 }
512 length += preMatchLength;
513 }
514 startIndex = match.end;
515 }
516 } else {
517 for (Match match in pattern.allMatches(this)) {
518 int matchStart = match.start;
519 int preMatchLength = match.start - startIndex;
520 if (preMatchLength > 0) {
521 if (startIndex <= _maxStartValue &&
522 preMatchLength <= _maxLengthValue) {
523 matches.add(-(startIndex | preMatchLength << _lengthShift));
524 } else {
525 matches.add(startIndex);
526 matches.add(matchStart);
527 }
528 length += preMatchLength;
529 }
530 matches.add(replacement);
531 length += replacementLength;
532 startIndex = match.end;
533 }
493 } 534 }
494 if (replacement is! String) { 535 if (startIndex < this.length) {
495 throw new ArgumentError( 536 matches.add(startIndex);
496 "${replacement} is not a String or Match->String function"); 537 matches.add(this.length);
538 length += this.length - startIndex;
497 } 539 }
498 StringBuffer buffer = new StringBuffer(); 540 bool replacementIsOneByte = (replacement is _OneByteString) ||
541 (replacement is _ExternalOneByteString);
542 bool thisIsOneByte = (this is _OneByteString) ||
543 (this is _ExternalOneByteString);
544 if (replacementIsOneByte && thisIsOneByte) {
545 _joinReplaceAllOneByteResult(this, matches, length);
546 }
547 return _joinReplaceAllResult(this, matches, length,
Florian Schneider 2015/01/20 16:39:33 Would it make sense to implement the 'this' two-by
Lasse Reichstein Nielsen 2015/01/21 10:48:43 The one-byte case actually turns out to be slower
548 replacementIsOneByte);
549 }
550
551 /**
552 * As [_joinReplaceAllResult], but knowing that the result
553 * is always a [_OneByteString].
554 */
555 static String _joinReplaceAllOneByteResult(String base,
556 List matches,
557 int length) {
558 _OneByteString result = _OneByteString._allocate(length);
559 int writeIndex = 0;
560 for (int i = 0; i < matches.length; i++) {
561 var entry = matches[i];
562 if (entry is _Smi) {
563 int sliceStart = entry;
564 int sliceEnd;
565 if (sliceStart < 0) {
566 int bits = -sliceStart;
567 sliceStart = bits & _startMask;
568 sliceEnd = sliceStart + (bits >> _lengthShift);
569 } else {
570 i++;
siva 2015/01/21 00:29:20 This is really dicey, 'i' is being incremented and
Lasse Reichstein Nielsen 2015/01/21 10:48:43 Adding assert. This function should only be called
571 sliceEnd = matches[i];
572 }
573 for (int j = sliceStart; j < sliceEnd; j++) {
574 result._setAt(writeIndex++, base.codeUnitAt(j));
575 }
576 } else {
577 _OneByteString replacement = entry;
578 for (int j = 0; j < replacement.length; j++) {
579 result._setAt(writeIndex++, replacement.codeUnitAt(j));
580 }
581 }
582 }
583 assert(writeIndex == length);
584 return result;
585 }
586
587 /**
588 * Combine the results of a [replaceAll] match into a new string.
589 *
590 * The [matches] lists contains Smi index pairs representing slices of
591 * [base] and [String]s to be put in between the slices.
592 *
593 * The total [length] of the resulting string is known, as is
594 * whether the replacement strings are one-byte strings.
595 * If they are, then we have to check the base string slices to know
596 * whether the result must be a one-byte string.
597 */
598 static String _joinReplaceAllResult(String base, List matches, int length,
599 bool replacementStringsAreOneByte)
600 native "StringBase_joinReplaceAllResult";
601
602 String replaceAllMapped(Pattern pattern, String replace(Match match)) {
603 List matches = [];
604 int length = 0;
499 int startIndex = 0; 605 int startIndex = 0;
606 bool replacementStringsAreOneByte = true;
500 for (Match match in pattern.allMatches(this)) { 607 for (Match match in pattern.allMatches(this)) {
501 buffer..write(this.substring(startIndex, match.start)) 608 int matchStart = match.start;
502 ..write(replacement); 609 int preMatchLength = match.start - startIndex;
610 if (preMatchLength > 0) {
611 if (startIndex <= _maxStartValue &&
612 preMatchLength <= _maxLengthValue) {
613 matches.add(-(startIndex | preMatchLength << _lengthShift));
614 } else {
615 matches.add(startIndex);
616 matches.add(matchStart);
617 }
618 length += preMatchLength;
619 }
620 String replacement = replace(match);
621 matches.add(replacement);
622 length += replacement.length;
623 replacementStringsAreOneByte = replacementStringsAreOneByte &&
624 (replacement is _OneByteString ||
625 replacement is _ExternalOneByteString);
503 startIndex = match.end; 626 startIndex = match.end;
504 } 627 }
505 return (buffer..write(this.substring(startIndex))).toString(); 628 if (startIndex < this.length) {
506 } 629 matches.add(startIndex);
507 630 matches.add(this.length);
508 String replaceAllMapped(Pattern pattern, String replace(Match match)) { 631 length += this.length - startIndex;
509 return splitMapJoin(pattern, onMatch: replace); 632 }
633 bool thisIsOneByte = (this is _OneByteString) ||
634 (this is _ExternalOneByteString);
635 if (replacementStringsAreOneByte && thisIsOneByte) {
636 _joinReplaceAllOneByteResult(this, matches, length);
637 }
638 return _joinReplaceAllResult(this, matches, length,
639 replacementStringsAreOneByte);
510 } 640 }
511 641
512 static String _matchString(Match match) => match[0]; 642 static String _matchString(Match match) => match[0];
513 static String _stringIdentity(String string) => string; 643 static String _stringIdentity(String string) => string;
514 644
515 String _splitMapJoinEmptyString(String onMatch(Match match), 645 String _splitMapJoinEmptyString(String onMatch(Match match),
516 String onNonMatch(String nonMatch)) { 646 String onNonMatch(String nonMatch)) {
517 // Pattern is the empty string. 647 // Pattern is the empty string.
518 StringBuffer buffer = new StringBuffer(); 648 StringBuffer buffer = new StringBuffer();
519 int length = this.length; 649 int length = this.length;
(...skipping 555 matching lines...) Expand 10 before | Expand all | Expand 10 after
1075 class _CodeUnits extends Object with ListMixin<int>, 1205 class _CodeUnits extends Object with ListMixin<int>,
1076 UnmodifiableListMixin<int> { 1206 UnmodifiableListMixin<int> {
1077 /** The string that this is the code units of. */ 1207 /** The string that this is the code units of. */
1078 String _string; 1208 String _string;
1079 1209
1080 _CodeUnits(this._string); 1210 _CodeUnits(this._string);
1081 1211
1082 int get length => _string.length; 1212 int get length => _string.length;
1083 int operator[](int i) => _string.codeUnitAt(i); 1213 int operator[](int i) => _string.codeUnitAt(i);
1084 } 1214 }
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698