Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(499)

Unified Diff: runtime/lib/string_patch.dart

Issue 858543002: Avoid extra duplication of substrings during string.replaceAll. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Also do replaceAllMapped. Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
Index: runtime/lib/string_patch.dart
diff --git a/runtime/lib/string_patch.dart b/runtime/lib/string_patch.dart
index 3a1fd711b6cd06e4a46f44ad383d6258a8d47a76..52f9392b32d958cc5ea0f7a5c5b1ff1e422e8c63 100644
--- a/runtime/lib/string_patch.dart
+++ b/runtime/lib/string_patch.dart
@@ -40,6 +40,11 @@ patch class String {
* implementations, e.g., _OneByteString.
*/
class _StringBase {
+ // Constants used by replaceAll encoding of matches.
+ static const int _lengthShift = 15;
+ static const int _maxStartValue = (1 << _lengthShift) - 1;
+ static const int _maxLengthValue = (1 << (30 - _lengthShift)) - 1;
+ static const int _startMask = _maxStartValue;
factory _StringBase._uninstantiable() {
throw new UnsupportedError(
@@ -488,25 +493,150 @@ class _StringBase {
}
String replaceAll(Pattern pattern, String replacement) {
- if (pattern is! Pattern) {
- throw new ArgumentError("${pattern} is not a Pattern");
zerny-google 2015/01/20 13:43:11 Is it OK that we no longer have an explicit Argume
Lasse Reichstein Nielsen 2015/01/21 10:48:43 Almost. I'll add back a check for "null", but we s
+ List matches = [];
+ int length = 0;
+ int replacementLength = replacement.length;
+ int startIndex = 0;
+ if (replacementLength == 0) {
+ for (Match match in pattern.allMatches(this)) {
+ int matchStart = match.start;
+ int preMatchLength = match.start - startIndex;
+ if (preMatchLength > 0) {
+ if (startIndex <= _maxStartValue &&
+ preMatchLength <= _maxLengthValue) {
+ matches.add(-(startIndex | preMatchLength << _lengthShift));
zerny-google 2015/01/20 13:43:11 Nit: add parens around << Also, does compacting t
Lasse Reichstein Nielsen 2015/01/21 10:48:43 Done. I see ~5% improvement on a small/micro bench
+ } else {
+ matches.add(startIndex);
+ matches.add(matchStart);
+ }
+ length += preMatchLength;
+ }
+ startIndex = match.end;
+ }
+ } else {
+ for (Match match in pattern.allMatches(this)) {
+ int matchStart = match.start;
+ int preMatchLength = match.start - startIndex;
+ if (preMatchLength > 0) {
+ if (startIndex <= _maxStartValue &&
+ preMatchLength <= _maxLengthValue) {
+ matches.add(-(startIndex | preMatchLength << _lengthShift));
+ } else {
+ matches.add(startIndex);
+ matches.add(matchStart);
+ }
+ length += preMatchLength;
+ }
+ matches.add(replacement);
+ length += replacementLength;
+ startIndex = match.end;
+ }
}
- if (replacement is! String) {
- throw new ArgumentError(
- "${replacement} is not a String or Match->String function");
+ if (startIndex < this.length) {
+ matches.add(startIndex);
+ matches.add(this.length);
+ length += this.length - startIndex;
}
- StringBuffer buffer = new StringBuffer();
- int startIndex = 0;
- for (Match match in pattern.allMatches(this)) {
- buffer..write(this.substring(startIndex, match.start))
- ..write(replacement);
- startIndex = match.end;
+ bool replacementIsOneByte = (replacement is _OneByteString) ||
+ (replacement is _ExternalOneByteString);
+ bool thisIsOneByte = (this is _OneByteString) ||
+ (this is _ExternalOneByteString);
+ if (replacementIsOneByte && thisIsOneByte) {
+ _joinReplaceAllOneByteResult(this, matches, length);
}
- return (buffer..write(this.substring(startIndex))).toString();
+ return _joinReplaceAllResult(this, matches, length,
Florian Schneider 2015/01/20 16:39:33 Would it make sense to implement the 'this' two-by
Lasse Reichstein Nielsen 2015/01/21 10:48:43 The one-byte case actually turns out to be slower
+ replacementIsOneByte);
}
+ /**
+ * As [_joinReplaceAllResult], but knowing that the result
+ * is always a [_OneByteString].
+ */
+ static String _joinReplaceAllOneByteResult(String base,
+ List matches,
+ int length) {
+ _OneByteString result = _OneByteString._allocate(length);
+ int writeIndex = 0;
+ for (int i = 0; i < matches.length; i++) {
+ var entry = matches[i];
+ if (entry is _Smi) {
+ int sliceStart = entry;
+ int sliceEnd;
+ if (sliceStart < 0) {
+ int bits = -sliceStart;
+ sliceStart = bits & _startMask;
+ sliceEnd = sliceStart + (bits >> _lengthShift);
+ } else {
+ i++;
siva 2015/01/21 00:29:20 This is really dicey, 'i' is being incremented and
Lasse Reichstein Nielsen 2015/01/21 10:48:43 Adding assert. This function should only be called
+ sliceEnd = matches[i];
+ }
+ for (int j = sliceStart; j < sliceEnd; j++) {
+ result._setAt(writeIndex++, base.codeUnitAt(j));
+ }
+ } else {
+ _OneByteString replacement = entry;
+ for (int j = 0; j < replacement.length; j++) {
+ result._setAt(writeIndex++, replacement.codeUnitAt(j));
+ }
+ }
+ }
+ assert(writeIndex == length);
+ return result;
+ }
+
+ /**
+ * Combine the results of a [replaceAll] match into a new string.
+ *
+ * The [matches] lists contains Smi index pairs representing slices of
+ * [base] and [String]s to be put in between the slices.
+ *
+ * The total [length] of the resulting string is known, as is
+ * whether the replacement strings are one-byte strings.
+ * If they are, then we have to check the base string slices to know
+ * whether the result must be a one-byte string.
+ */
+ static String _joinReplaceAllResult(String base, List matches, int length,
+ bool replacementStringsAreOneByte)
+ native "StringBase_joinReplaceAllResult";
+
String replaceAllMapped(Pattern pattern, String replace(Match match)) {
- return splitMapJoin(pattern, onMatch: replace);
+ List matches = [];
+ int length = 0;
+ int startIndex = 0;
+ bool replacementStringsAreOneByte = true;
+ for (Match match in pattern.allMatches(this)) {
+ int matchStart = match.start;
+ int preMatchLength = match.start - startIndex;
+ if (preMatchLength > 0) {
+ if (startIndex <= _maxStartValue &&
+ preMatchLength <= _maxLengthValue) {
+ matches.add(-(startIndex | preMatchLength << _lengthShift));
+ } else {
+ matches.add(startIndex);
+ matches.add(matchStart);
+ }
+ length += preMatchLength;
+ }
+ String replacement = replace(match);
+ matches.add(replacement);
+ length += replacement.length;
+ replacementStringsAreOneByte = replacementStringsAreOneByte &&
+ (replacement is _OneByteString ||
+ replacement is _ExternalOneByteString);
+ startIndex = match.end;
+ }
+ if (startIndex < this.length) {
+ matches.add(startIndex);
+ matches.add(this.length);
+ length += this.length - startIndex;
+ }
+ bool thisIsOneByte = (this is _OneByteString) ||
+ (this is _ExternalOneByteString);
+ if (replacementStringsAreOneByte && thisIsOneByte) {
+ _joinReplaceAllOneByteResult(this, matches, length);
+ }
+ return _joinReplaceAllResult(this, matches, length,
+ replacementStringsAreOneByte);
}
static String _matchString(Match match) => match[0];

Powered by Google App Engine
This is Rietveld 408576698