Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(563)

Unified Diff: runtime/lib/string_patch.dart

Issue 858543002: Avoid extra duplication of substrings during string.replaceAll. (Closed) Base URL: https://dart.googlecode.com/svn/branches/bleeding_edge/dart
Patch Set: Created 5 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « runtime/lib/string.cc ('k') | runtime/vm/bootstrap_natives.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: runtime/lib/string_patch.dart
diff --git a/runtime/lib/string_patch.dart b/runtime/lib/string_patch.dart
index 3a1fd711b6cd06e4a46f44ad383d6258a8d47a76..f22b6e1fe834d727301712c211dee90b1da25a22 100644
--- a/runtime/lib/string_patch.dart
+++ b/runtime/lib/string_patch.dart
@@ -40,6 +40,35 @@ patch class String {
* implementations, e.g., _OneByteString.
*/
class _StringBase {
+ // Constants used by replaceAll encoding of string slices between matches.
+ // A string slice (start+length) is encoded in a single Smi to save memory
+ // overhead in the common case.
+ // We use fewer bits for length (11 bits) than for the start index (19+ bits).
+ // For long strings, it's possible to have many large indices,
+ // but it's unlikely to have many long lengths since slices don't overlap.
+ // If there are few matches in a long string, then there are few long slices,
+ // and if there are many matches, there'll likely be many short slices.
+ //
+ // Encoding is: 0((start << _lengthBits) | length)
+
+ // Number of bits used by length.
+ // This is the shift used to encode and decode the start index.
+ static const int _lengthBits = 11;
+ // The maximal allowed length value in an encoded slice.
+ static const int _maxLengthValue = (1 << _lengthBits) - 1;
+ // Mask of length in encoded smi value.
+ static const int _lengthMask = _maxLengthValue;
+ static const int _startBits = _maxUnsignedSmiBits - _lengthBits;
+ // Maximal allowed start index value in an encoded slice.
+ static const int _maxStartValue = (1 << _startBits) - 1;
+ // We pick 30 as a safe lower bound on available bits in a negative smi.
+ // TODO(lrn): Consider allowing more bits for start on 64-bit systems.
+ static const int _maxUnsignedSmiBits = 30;
+
+ // For longer strings, calling into C++ to create the result of a
+ // [replaceAll] is faster than [_joinReplaceAllOneByteResult].
+ // TODO(lrn): See if this limit can be tweaked.
+ static const int _maxJoinReplaceOneByteStringLength = 500;
factory _StringBase._uninstantiable() {
throw new UnsupportedError(
@@ -487,26 +516,141 @@ class _StringBase {
"${this.substring(match.end)}";
}
+
+ static int _addReplaceSlice(List matches, int start, int end) {
+ int length = end - start;
+ if (length > 0) {
+ if (length <= _maxLengthValue && start <= _maxStartValue) {
+ matches.add(-((start << _lengthBits) | length));
+ } else {
+ matches.add(start);
+ matches.add(end);
+ }
+ }
+ return length;
+ }
+
String replaceAll(Pattern pattern, String replacement) {
- if (pattern is! Pattern) {
- throw new ArgumentError("${pattern} is not a Pattern");
+ if (pattern == null) throw new ArgumentError.notNull("pattern");
+ if (replacement == null) throw new ArgumentError.notNull("replacement");
+ List matches = [];
+ int length = 0;
+ int replacementLength = replacement.length;
+ int startIndex = 0;
+ if (replacementLength == 0) {
+ int count = 0;
+ for (Match match in pattern.allMatches(this)) {
+ length += _addReplaceSlice(matches, startIndex, match.start);
+ startIndex = match.end;
+ }
+ } else {
+ for (Match match in pattern.allMatches(this)) {
+ length += _addReplaceSlice(matches, startIndex, match.start);
+ matches.add(replacement);
+ length += replacementLength;
+ startIndex = match.end;
+ }
}
- if (replacement is! String) {
- throw new ArgumentError(
- "${replacement} is not a String or Match->String function");
+ length += _addReplaceSlice(matches, startIndex, this.length);
+ bool replacementIsOneByte = (replacement is _OneByteString) ||
+ (replacement is _ExternalOneByteString);
+ if (replacementIsOneByte && length < _maxJoinReplaceOneByteStringLength) {
+ // TODO(lrn): Is there a cut-off point, or is runtime always faster?
+ bool thisIsOneByte = (this is _OneByteString) ||
+ (this is _ExternalOneByteString);
+ if (replacementIsOneByte && thisIsOneByte) {
+ return _joinReplaceAllOneByteResult(this, matches, length);
+ }
}
- StringBuffer buffer = new StringBuffer();
- int startIndex = 0;
- for (Match match in pattern.allMatches(this)) {
- buffer..write(this.substring(startIndex, match.start))
- ..write(replacement);
- startIndex = match.end;
+ return _joinReplaceAllResult(this, matches, length,
+ replacementIsOneByte);
+ }
+
+ /**
+ * As [_joinReplaceAllResult], but knowing that the result
+ * is always a [_OneByteString].
+ */
+ static String _joinReplaceAllOneByteResult(String base,
+ List matches,
+ int length) {
+ _OneByteString result = _OneByteString._allocate(length);
+ int writeIndex = 0;
+ for (int i = 0; i < matches.length; i++) {
+ var entry = matches[i];
+ if (entry is _Smi) {
+ int sliceStart = entry;
+ int sliceEnd;
+ if (sliceStart < 0) {
+ int bits = -sliceStart;
+ int sliceLength = bits & _lengthMask;
+ sliceStart = bits >> _lengthBits;
+ sliceEnd = sliceStart + sliceLength;
+ } else {
+ i++;
+ // This function should only be called with valid matches lists.
+ // If the list is short, or sliceEnd is not an integer, one of
+ // the next few lines will throw anyway.
+ assert(i < matches.length);
+ sliceEnd = matches[i];
+ }
+ for (int j = sliceStart; j < sliceEnd; j++) {
+ result._setAt(writeIndex++, base.codeUnitAt(j));
+ }
+ } else {
+ // Replacement is a one-byte string.
+ String replacement = entry;
+ for (int j = 0; j < replacement.length; j++) {
+ result._setAt(writeIndex++, replacement.codeUnitAt(j));
+ }
+ }
}
- return (buffer..write(this.substring(startIndex))).toString();
+ assert(writeIndex == length);
+ return result;
}
+ /**
+ * Combine the results of a [replaceAll] match into a new string.
+ *
+ * The [matches] lists contains Smi index pairs representing slices of
+ * [base] and [String]s to be put in between the slices.
+ *
+ * The total [length] of the resulting string is known, as is
+ * whether the replacement strings are one-byte strings.
+ * If they are, then we have to check the base string slices to know
+ * whether the result must be a one-byte string.
+ */
+ static String _joinReplaceAllResult(String base, List matches, int length,
+ bool replacementStringsAreOneByte)
+ native "StringBase_joinReplaceAllResult";
+
String replaceAllMapped(Pattern pattern, String replace(Match match)) {
- return splitMapJoin(pattern, onMatch: replace);
+ if (pattern == null) throw new ArgumentError.notNull("pattern");
+ if (replace == null) throw new ArgumentError.notNull("replace");
+ List matches = [];
+ int length = 0;
+ int startIndex = 0;
+ bool replacementStringsAreOneByte = true;
+ for (Match match in pattern.allMatches(this)) {
+ length += _addReplaceSlice(matches, startIndex, match.start);
+ String replacement = replace(match).toString();
+ matches.add(replacement);
+ length += replacement.length;
+ replacementStringsAreOneByte = replacementStringsAreOneByte &&
+ (replacement is _OneByteString ||
+ replacement is _ExternalOneByteString);
+ startIndex = match.end;
+ }
+ length += _addReplaceSlice(matches, startIndex, this.length);
+ if (replacementStringsAreOneByte &&
+ length < _maxJoinReplaceOneByteStringLength) {
+ bool thisIsOneByte = (this is _OneByteString) ||
+ (this is _ExternalOneByteString);
+ if (thisIsOneByte) {
+ return _joinReplaceAllOneByteResult(this, matches, length);
+ }
+ }
+ return _joinReplaceAllResult(this, matches, length,
+ replacementStringsAreOneByte);
}
static String _matchString(Match match) => match[0];
« no previous file with comments | « runtime/lib/string.cc ('k') | runtime/vm/bootstrap_natives.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698