Chromium Code Reviews| Index: src/string.js |
| =================================================================== |
| --- src/string.js (revision 1489) |
| +++ src/string.js (working copy) |
| @@ -165,8 +165,9 @@ |
| // Build the result array. |
| var result = new $Array(match_string); |
| for (var i = 0; i < matches.length; ++i) { |
| - var match = matches[i]; |
| - var match_string = subject.slice(match[0], match[1]); |
| + var matchInfo = matches[i]; |
| + var match_string = subject.slice(matchInfo[CAPTURE0], |
| + matchInfo[CAPTURE1]); |
| result[i] = match_string; |
| } |
| @@ -218,7 +219,9 @@ |
| if (IS_FUNCTION(replace)) { |
| builder.add(replace.call(null, search, start, subject)); |
| } else { |
| - ExpandReplacement(ToString(replace), subject, [ start, end ], builder); |
| + reusableMatchInfo[CAPTURE0] = start; |
| + reusableMatchInfo[CAPTURE1] = end; |
| + ExpandReplacement(ToString(replace), subject, reusableMatchInfo, builder); |
| } |
| // suffix |
| @@ -228,6 +231,15 @@ |
| } |
| +// This has the same size as the lastMatchInfo array, and can be used for |
| +// functions that expect that structure to be returned. It is used when the |
| +// needle is a string rather than a regexp. In this case we can't update |
| +// lastMatchArray without erroneously affecting the properties on the global |
| +// RegExp object. |
| +var reusableMatchInfo = [2, -1, -1, "", ""]; |
| +var reusableMatchArray = [ void 0 ]; |
| + |
| + |
| // Helper function for regular expressions in String.prototype.replace. |
| function StringReplaceRegExp(subject, regexp, replace) { |
| // Compute an array of matches; each match is really a list of |
| @@ -237,9 +249,10 @@ |
| matches = DoRegExpExecGlobal(regexp, subject); |
| if (matches.length == 0) return subject; |
| } else { |
| - var captures = DoRegExpExec(regexp, subject, 0); |
| - if (IS_NULL(captures)) return subject; |
| - matches = [ captures ]; |
| + var lastMatchInfo = DoRegExpExec(regexp, subject, 0); |
| + if (IS_NULL(lastMatchInfo)) return subject; |
| + reusableMatchArray[0] = lastMatchInfo; |
| + matches = reusableMatchArray; |
| } |
| // Determine the number of matches. |
| @@ -253,17 +266,17 @@ |
| replace = ToString(replace); |
| if (%StringIndexOf(replace, "$", 0) < 0) { |
| for (var i = 0; i < length; i++) { |
| - var captures = matches[i]; |
| - result.addSpecialSlice(previous, captures[0]); |
| + var matchInfo = matches[i]; |
| + result.addSpecialSlice(previous, matchInfo[CAPTURE0]); |
| result.add(replace); |
| - previous = captures[1]; // continue after match |
| + previous = matchInfo[CAPTURE1]; // continue after match |
| } |
| } else { |
| for (var i = 0; i < length; i++) { |
| - var captures = matches[i]; |
| - result.addSpecialSlice(previous, captures[0]); |
| - ExpandReplacement(replace, subject, captures, result); |
| - previous = captures[1]; // continue after match |
| + var matchInfo = matches[i]; |
| + result.addSpecialSlice(previous, matchInfo[CAPTURE0]); |
| + ExpandReplacement(replace, subject, matchInfo, result); |
| + previous = matchInfo[CAPTURE1]; // continue after match |
| } |
| } |
| result.addSpecialSlice(previous, subject.length); |
| @@ -273,7 +286,7 @@ |
| // Expand the $-expressions in the string and return a new string with |
| // the result. |
| -function ExpandReplacement(string, subject, captures, builder) { |
| +function ExpandReplacement(string, subject, matchInfo, builder) { |
| var next = %StringIndexOf(string, '$', 0); |
| if (next < 0) { |
| builder.add(string); |
| @@ -281,11 +294,12 @@ |
| } |
| // Compute the number of captures; see ECMA-262, 15.5.4.11, p. 102. |
| - var m = captures.length >> 1; // includes the match |
| + var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; // Includes the match. |
| if (next > 0) builder.add(SubString(string, 0, next)); |
| var length = string.length; |
| + |
|
Mads Ager (chromium)
2009/03/11 13:49:17
Any reason for the extra space here?
Erik Corry
2009/03/11 14:01:06
No.
|
| while (true) { |
| var expansion = '$'; |
| var position = next + 1; |
| @@ -299,13 +313,14 @@ |
| builder.add('$'); |
| } else if (peek == 38) { // $& - match |
| ++position; |
| - builder.addSpecialSlice(captures[0], captures[1]); |
| + builder.addSpecialSlice(matchInfo[CAPTURE0], |
| + matchInfo[CAPTURE1]); |
| } else if (peek == 96) { // $` - prefix |
| ++position; |
| - builder.addSpecialSlice(0, captures[0]); |
| + builder.addSpecialSlice(0, matchInfo[CAPTURE0]); |
| } else if (peek == 39) { // $' - suffix |
| ++position; |
| - builder.addSpecialSlice(captures[1], subject.length); |
| + builder.addSpecialSlice(matchInfo[CAPTURE1], subject.length); |
| } else if (peek >= 48 && peek <= 57) { // $n, 0 <= n <= 9 |
| ++position; |
| var n = peek - 48; |
| @@ -329,7 +344,7 @@ |
| } |
| } |
| if (0 < n && n < m) { |
| - addCaptureString(builder, captures, n); |
| + addCaptureString(builder, matchInfo, n); |
| } else { |
| // Because of the captures range check in the parsing of two |
| // digit capture references, we can only enter here when a |
| @@ -361,26 +376,27 @@ |
| }; |
| -// Compute the string of a given PCRE capture. |
| -function CaptureString(string, captures, index) { |
| +// Compute the string of a given regular expression capture. |
| +function CaptureString(string, lastCaptureInfo, index) { |
| // Scale the index. |
| var scaled = index << 1; |
| // Compute start and end. |
| - var start = captures[scaled]; |
| - var end = captures[scaled + 1]; |
| + var start = lastCaptureInfo[CAPTURE(scaled)]; |
| + var end = lastCaptureInfo[CAPTURE(scaled + 1)]; |
| // If either start or end is missing return undefined. |
| if (start < 0 || end < 0) return; |
| return SubString(string, start, end); |
| }; |
| -// Add the string of a given PCRE capture to the ReplaceResultBuilder |
| -function addCaptureString(builder, captures, index) { |
| +// Add the string of a given regular expression capture to the |
| +// ReplaceResultBuilder |
| +function addCaptureString(builder, matchInfo, index) { |
| // Scale the index. |
| var scaled = index << 1; |
| // Compute start and end. |
| - var start = captures[scaled]; |
| - var end = captures[scaled + 1]; |
| + var start = matchInfo[CAPTURE(scaled)]; |
| + var end = matchInfo[CAPTURE(scaled + 1)]; |
| // If either start or end is missing return. |
| if (start < 0 || end <= start) return; |
| builder.addSpecialSlice(start, end); |
| @@ -396,10 +412,8 @@ |
| // should be 'abcd' and not 'dddd' (or anything else). |
| function StringReplaceRegExpWithFunction(subject, regexp, replace) { |
| var result = new ReplaceResultBuilder(subject); |
| - // Captures is an array of pairs of (start, end) indices for the match and |
| - // any captured substrings. |
| - var captures = DoRegExpExec(regexp, subject, 0); |
| - if (IS_NULL(captures)) return subject; |
| + var lastMatchInfo = DoRegExpExec(regexp, subject, 0); |
| + if (IS_NULL(lastMatchInfo)) return subject; |
| // There's at least one match. If the regexp is global, we have to loop |
| // over all matches. The loop is not in C++ code here like the one in |
| @@ -409,13 +423,16 @@ |
| if (regexp.global) { |
| var previous = 0; |
| do { |
| - result.addSpecialSlice(previous, captures[0]); |
| - result.add(ApplyReplacementFunction(replace, captures, subject)); |
| + result.addSpecialSlice(previous, lastMatchInfo[CAPTURE0]); |
| + var startOfMatch = lastMatchInfo[CAPTURE0]; |
| + previous = lastMatchInfo[CAPTURE1]; |
| + result.add(ApplyReplacementFunction(replace, lastMatchInfo, subject)); |
| + // Can't use lastMatchInfo any more from here, since the function could |
| + // overwrite it. |
| // Continue with the next match. |
| - previous = captures[1]; |
| // Increment previous if we matched an empty string, as per ECMA-262 |
| // 15.5.4.10. |
| - if (previous == captures[0]) { |
| + if (previous == startOfMatch) { |
| // Add the skipped character to the output, if any. |
| if (previous < subject.length) { |
| result.addSpecialSlice(previous, previous + 1); |
| @@ -425,19 +442,22 @@ |
| // Per ECMA-262 15.10.6.2, if the previous index is greater than the |
| // string length, there is no match |
| - captures = (previous > subject.length) |
| + lastMatchInfo = (previous > subject.length) |
| ? null |
| : DoRegExpExec(regexp, subject, previous); |
| - } while (!IS_NULL(captures)); |
| + } while (!IS_NULL(lastMatchInfo)); |
| // Tack on the final right substring after the last match, if necessary. |
| if (previous < subject.length) { |
| result.addSpecialSlice(previous, subject.length); |
| } |
| } else { // Not a global regexp, no need to loop. |
| - result.addSpecialSlice(0, captures[0]); |
| - result.add(ApplyReplacementFunction(replace, captures, subject)); |
| - result.addSpecialSlice(captures[1], subject.length); |
| + result.addSpecialSlice(0, lastMatchInfo[CAPTURE0]); |
| + var endOfMatch = lastMatchInfo[CAPTURE1]; |
| + result.add(ApplyReplacementFunction(replace, lastMatchInfo, subject)); |
| + // Can't use lastMatchInfo any more from here, since the function could |
| + // overwrite it. |
| + result.addSpecialSlice(endOfMatch, subject.length); |
| } |
| return result.generate(); |
| @@ -445,20 +465,20 @@ |
| // Helper function to apply a string replacement function once. |
| -function ApplyReplacementFunction(replace, captures, subject) { |
| +function ApplyReplacementFunction(replace, lastMatchInfo, subject) { |
| // Compute the parameter list consisting of the match, captures, index, |
| // and subject for the replace function invocation. |
| - var index = captures[0]; |
| + var index = lastMatchInfo[CAPTURE0]; |
| // The number of captures plus one for the match. |
| - var m = captures.length >> 1; |
| + var m = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; |
| if (m == 1) { |
| - var s = CaptureString(subject, captures, 0); |
| + var s = CaptureString(subject, lastMatchInfo, 0); |
| // Don't call directly to avoid exposing the built-in global object. |
| return ToString(replace.call(null, s, index, subject)); |
| } |
| var parameters = $Array(m + 2); |
| for (var j = 0; j < m; j++) { |
| - parameters[j] = CaptureString(subject, captures, j); |
| + parameters[j] = CaptureString(subject, lastMatchInfo, j); |
| } |
| parameters[j] = index; |
| parameters[j + 1] = subject; |
| @@ -559,14 +579,14 @@ |
| return result; |
| } |
| - var match = splitMatch(sep, subject, currentIndex, startIndex); |
| + var lastMatchInfo = splitMatch(sep, subject, currentIndex, startIndex); |
| - if (IS_NULL(match)) { |
| + if (IS_NULL(lastMatchInfo)) { |
| result[result.length] = subject.slice(currentIndex, length); |
| return result; |
| } |
| - var endIndex = match[0]; |
| + var endIndex = lastMatchInfo[CAPTURE1]; |
| // We ignore a zero-length match at the currentIndex. |
| if (startIndex === endIndex && endIndex === currentIndex) { |
| @@ -574,11 +594,20 @@ |
| continue; |
| } |
| - result[result.length] = match[1]; |
| + result[result.length] = |
| + SubString(subject, currentIndex, lastMatchInfo[CAPTURE0]); |
| if (result.length === lim) return result; |
| - for (var i = 2; i < match.length; i++) { |
| - result[result.length] = match[i]; |
| + for (var i = 2; i < NUMBER_OF_CAPTURES(lastMatchInfo); i += 2) { |
| + var start = lastMatchInfo[CAPTURE(i)]; |
| + var end = lastMatchInfo[CAPTURE(i + 1)]; |
| + if (start != -1 && end != -1) { |
| + result[result.length] = SubString(subject, |
| + lastMatchInfo[CAPTURE(i)], |
| + lastMatchInfo[CAPTURE(i + 1)]); |
| + } else { |
| + result[result.length] = void 0; |
| + } |
| if (result.length === lim) return result; |
| } |
| @@ -588,32 +617,24 @@ |
| // ECMA-262 section 15.5.4.14 |
| -// Helper function used by split. |
| +// Helper function used by split. This version returns the lastMatchInfo |
| +// instead of allocating a new array with basically the same information. |
| function splitMatch(separator, subject, current_index, start_index) { |
| if (IS_REGEXP(separator)) { |
| - var ovector = DoRegExpExec(separator, subject, start_index); |
| - if (ovector == null) return null; |
| - var nof_results = ovector.length >> 1; |
| - var result = new $Array(nof_results + 1); |
| + var lastMatchInfo = DoRegExpExec(separator, subject, start_index); |
| + if (lastMatchInfo == null) return null; |
| // Section 15.5.4.14 paragraph two says that we do not allow zero length |
| // matches at the end of the string. |
| - if (ovector[0] === subject.length) return null; |
| - result[0] = ovector[1]; |
| - result[1] = subject.slice(current_index, ovector[0]); |
| - for (var i = 1; i < nof_results; i++) { |
| - var matching_start = ovector[2*i]; |
| - var matching_end = ovector[2*i + 1]; |
| - if (matching_start != -1 && matching_end != -1) { |
| - result[i + 1] = subject.slice(matching_start, matching_end); |
| - } |
| - } |
| - return result; |
| + if (lastMatchInfo[CAPTURE0] === subject.length) return null; |
| + return lastMatchInfo; |
| } |
| var separatorIndex = subject.indexOf(separator, start_index); |
| if (separatorIndex === -1) return null; |
| - return [ separatorIndex + separator.length, subject.slice(current_index, separatorIndex) ]; |
| + reusableMatchInfo[CAPTURE0] = separatorIndex; |
| + reusableMatchInfo[CAPTURE1] = separatorIndex + separator.length; |
| + return reusableMatchInfo; |
| }; |