Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(838)

Side by Side Diff: runtime/lib/string_patch.dart

Issue 858543002: Avoid extra duplication of substrings during string.replaceAll. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
« no previous file with comments | « runtime/lib/string.cc ('k') | runtime/vm/bootstrap_natives.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file 1 // Copyright (c) 2012, the Dart project authors. Please see the AUTHORS file
2 // for details. All rights reserved. Use of this source code is governed by a 2 // for details. All rights reserved. Use of this source code is governed by a
3 // BSD-style license that can be found in the LICENSE file. 3 // BSD-style license that can be found in the LICENSE file.
4 4
5 patch class String { 5 patch class String {
6 /* patch */ factory String.fromCharCodes(Iterable<int> charCodes, 6 /* patch */ factory String.fromCharCodes(Iterable<int> charCodes,
7 [int start = 0, int end]) { 7 [int start = 0, int end]) {
8 return _StringBase.createFromCharCodes(charCodes, start, end); 8 return _StringBase.createFromCharCodes(charCodes, start, end);
9 } 9 }
10 10
(...skipping 22 matching lines...) Expand all
33 {String defaultValue}) 33 {String defaultValue})
34 native "String_fromEnvironment"; 34 native "String_fromEnvironment";
35 } 35 }
36 36
37 37
38 /** 38 /**
39 * [_StringBase] contains common methods used by concrete String 39 * [_StringBase] contains common methods used by concrete String
40 * implementations, e.g., _OneByteString. 40 * implementations, e.g., _OneByteString.
41 */ 41 */
42 class _StringBase { 42 class _StringBase {
43 // Constants used by replaceAll encoding of string slices between matches.
44 // A string slice (start+length) is encoded in a single Smi to save memory
45 // overhead in the common case.
46 // We use fewer bits for length (11 bits) than for the start index (19+ bits).
47 // For long strings, it's possible to have many large indices,
48 // but it's unlikely to have many long lengths since slices don't overlap.
49 // If there are few matches in a long string, then there are few long slices,
50 // and if there are many matches, there'll likely be many short slices.
51 //
52 // Encoding is: 0((start << _lengthBits) | length)
53
54 // Number of bits used by length.
55 // This is the shift used to encode and decode the start index.
56 static const int _lengthBits = 11;
57 // The maximal allowed length value in an encoded slice.
58 static const int _maxLengthValue = (1 << _lengthBits) - 1;
59 // Mask of length in encoded smi value.
60 static const int _lengthMask = _maxLengthValue;
61 static const int _startBits = _maxUnsignedSmiBits - _lengthBits;
62 // Maximal allowed start index value in an encoded slice.
63 static const int _maxStartValue = (1 << _startBits) - 1;
64 // We pick 30 as a safe lower bound on available bits in a negative smi.
65 // TODO(lrn): Consider allowing more bits for start on 64-bit systems.
66 static const int _maxUnsignedSmiBits = 30;
67
68 // For longer strings, calling into C++ to create the result of a
69 // [replaceAll] is faster than [_joinReplaceAllOneByteResult].
70 // TODO(lrn): See if this limit can be tweaked.
71 static const int _maxJoinReplaceOneByteStringLength = 500;
43 72
44 factory _StringBase._uninstantiable() { 73 factory _StringBase._uninstantiable() {
45 throw new UnsupportedError( 74 throw new UnsupportedError(
46 "_StringBase can't be instaniated"); 75 "_StringBase can't be instaniated");
47 } 76 }
48 77
49 Type get runtimeType => String; 78 Type get runtimeType => String;
50 79
51 int get hashCode native "String_getHashCode"; 80 int get hashCode native "String_getHashCode";
52 81
(...skipping 427 matching lines...) Expand 10 before | Expand all | Expand 10 after
480 Iterator iterator = 509 Iterator iterator =
481 startIndex == 0 ? pattern.allMatches(this).iterator 510 startIndex == 0 ? pattern.allMatches(this).iterator
482 : pattern.allMatches(this, startIndex).iterator; 511 : pattern.allMatches(this, startIndex).iterator;
483 if (!iterator.moveNext()) return this; 512 if (!iterator.moveNext()) return this;
484 Match match = iterator.current; 513 Match match = iterator.current;
485 return "${this.substring(0, match.start)}" 514 return "${this.substring(0, match.start)}"
486 "$replacement" 515 "$replacement"
487 "${this.substring(match.end)}"; 516 "${this.substring(match.end)}";
488 } 517 }
489 518
519
520 static int _addReplaceSlice(List matches, int start, int end) {
521 int length = end - start;
522 if (length > 0) {
523 if (length <= _maxLengthValue && start <= _maxStartValue) {
524 matches.add(-((start << _lengthBits) | length));
525 } else {
526 matches.add(start);
527 matches.add(end);
528 }
529 }
530 return length;
531 }
532
490 String replaceAll(Pattern pattern, String replacement) { 533 String replaceAll(Pattern pattern, String replacement) {
491 if (pattern is! Pattern) { 534 if (pattern == null) throw new ArgumentError.notNull("pattern");
492 throw new ArgumentError("${pattern} is not a Pattern"); 535 if (replacement == null) throw new ArgumentError.notNull("replacement");
536 List matches = [];
537 int length = 0;
538 int replacementLength = replacement.length;
539 int startIndex = 0;
540 if (replacementLength == 0) {
541 int count = 0;
542 for (Match match in pattern.allMatches(this)) {
543 length += _addReplaceSlice(matches, startIndex, match.start);
544 startIndex = match.end;
545 }
546 } else {
547 for (Match match in pattern.allMatches(this)) {
548 length += _addReplaceSlice(matches, startIndex, match.start);
549 matches.add(replacement);
550 length += replacementLength;
551 startIndex = match.end;
552 }
493 } 553 }
494 if (replacement is! String) { 554 length += _addReplaceSlice(matches, startIndex, this.length);
495 throw new ArgumentError( 555 bool replacementIsOneByte = (replacement is _OneByteString) ||
496 "${replacement} is not a String or Match->String function"); 556 (replacement is _ExternalOneByteString);
557 if (replacementIsOneByte && length < _maxJoinReplaceOneByteStringLength) {
558 // TODO(lrn): Is there a cut-off point, or is runtime always faster?
559 bool thisIsOneByte = (this is _OneByteString) ||
560 (this is _ExternalOneByteString);
561 if (replacementIsOneByte && thisIsOneByte) {
562 return _joinReplaceAllOneByteResult(this, matches, length);
563 }
497 } 564 }
498 StringBuffer buffer = new StringBuffer(); 565 return _joinReplaceAllResult(this, matches, length,
566 replacementIsOneByte);
567 }
568
569 /**
570 * As [_joinReplaceAllResult], but knowing that the result
571 * is always a [_OneByteString].
572 */
573 static String _joinReplaceAllOneByteResult(String base,
574 List matches,
575 int length) {
576 _OneByteString result = _OneByteString._allocate(length);
577 int writeIndex = 0;
578 for (int i = 0; i < matches.length; i++) {
579 var entry = matches[i];
580 if (entry is _Smi) {
581 int sliceStart = entry;
582 int sliceEnd;
583 if (sliceStart < 0) {
584 int bits = -sliceStart;
585 int sliceLength = bits & _lengthMask;
586 sliceStart = bits >> _lengthBits;
587 sliceEnd = sliceStart + sliceLength;
588 } else {
589 i++;
590 // This function should only be called with valid matches lists.
591 // If the list is short, or sliceEnd is not an integer, one of
592 // the next few lines will throw anyway.
593 assert(i < matches.length);
594 sliceEnd = matches[i];
595 }
596 for (int j = sliceStart; j < sliceEnd; j++) {
597 result._setAt(writeIndex++, base.codeUnitAt(j));
598 }
599 } else {
600 // Replacement is a one-byte string.
601 String replacement = entry;
602 for (int j = 0; j < replacement.length; j++) {
603 result._setAt(writeIndex++, replacement.codeUnitAt(j));
604 }
605 }
606 }
607 assert(writeIndex == length);
608 return result;
609 }
610
611 /**
612 * Combine the results of a [replaceAll] match into a new string.
613 *
614 * The [matches] lists contains Smi index pairs representing slices of
615 * [base] and [String]s to be put in between the slices.
616 *
617 * The total [length] of the resulting string is known, as is
618 * whether the replacement strings are one-byte strings.
619 * If they are, then we have to check the base string slices to know
620 * whether the result must be a one-byte string.
621 */
622 static String _joinReplaceAllResult(String base, List matches, int length,
623 bool replacementStringsAreOneByte)
624 native "StringBase_joinReplaceAllResult";
625
626 String replaceAllMapped(Pattern pattern, String replace(Match match)) {
627 if (pattern == null) throw new ArgumentError.notNull("pattern");
628 if (replace == null) throw new ArgumentError.notNull("replace");
629 List matches = [];
630 int length = 0;
499 int startIndex = 0; 631 int startIndex = 0;
632 bool replacementStringsAreOneByte = true;
500 for (Match match in pattern.allMatches(this)) { 633 for (Match match in pattern.allMatches(this)) {
501 buffer..write(this.substring(startIndex, match.start)) 634 length += _addReplaceSlice(matches, startIndex, match.start);
502 ..write(replacement); 635 String replacement = replace(match).toString();
636 matches.add(replacement);
637 length += replacement.length;
638 replacementStringsAreOneByte = replacementStringsAreOneByte &&
639 (replacement is _OneByteString ||
640 replacement is _ExternalOneByteString);
503 startIndex = match.end; 641 startIndex = match.end;
504 } 642 }
505 return (buffer..write(this.substring(startIndex))).toString(); 643 length += _addReplaceSlice(matches, startIndex, this.length);
506 } 644 if (replacementStringsAreOneByte &&
507 645 length < _maxJoinReplaceOneByteStringLength) {
508 String replaceAllMapped(Pattern pattern, String replace(Match match)) { 646 bool thisIsOneByte = (this is _OneByteString) ||
509 return splitMapJoin(pattern, onMatch: replace); 647 (this is _ExternalOneByteString);
648 if (thisIsOneByte) {
649 return _joinReplaceAllOneByteResult(this, matches, length);
650 }
651 }
652 return _joinReplaceAllResult(this, matches, length,
653 replacementStringsAreOneByte);
510 } 654 }
511 655
512 static String _matchString(Match match) => match[0]; 656 static String _matchString(Match match) => match[0];
513 static String _stringIdentity(String string) => string; 657 static String _stringIdentity(String string) => string;
514 658
515 String _splitMapJoinEmptyString(String onMatch(Match match), 659 String _splitMapJoinEmptyString(String onMatch(Match match),
516 String onNonMatch(String nonMatch)) { 660 String onNonMatch(String nonMatch)) {
517 // Pattern is the empty string. 661 // Pattern is the empty string.
518 StringBuffer buffer = new StringBuffer(); 662 StringBuffer buffer = new StringBuffer();
519 int length = this.length; 663 int length = this.length;
(...skipping 555 matching lines...) Expand 10 before | Expand all | Expand 10 after
1075 class _CodeUnits extends Object with ListMixin<int>, 1219 class _CodeUnits extends Object with ListMixin<int>,
1076 UnmodifiableListMixin<int> { 1220 UnmodifiableListMixin<int> {
1077 /** The string that this is the code units of. */ 1221 /** The string that this is the code units of. */
1078 String _string; 1222 String _string;
1079 1223
1080 _CodeUnits(this._string); 1224 _CodeUnits(this._string);
1081 1225
1082 int get length => _string.length; 1226 int get length => _string.length;
1083 int operator[](int i) => _string.codeUnitAt(i); 1227 int operator[](int i) => _string.codeUnitAt(i);
1084 } 1228 }
OLDNEW
« no previous file with comments | « runtime/lib/string.cc ('k') | runtime/vm/bootstrap_natives.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698