src/string.js - Issue 43075: * Reapply revisions 1383, 1384, 1391, 1398, 1401, 1402,...

Unified Diff: src/string.js

Issue 43075: * Reapply revisions 1383, 1384, 1391, 1398, 1401, 1402,... (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: Created 11 years, 9 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/string.js

===================================================================

--- src/string.js (revision 1489)

+++ src/string.js (working copy)

@@ -165,8 +165,9 @@

// Build the result array.

var result = new $Array(match_string);

for (var i = 0; i < matches.length; ++i) {

- var match = matches[i];

- var match_string = subject.slice(match[0], match[1]);

+ var matchInfo = matches[i];

+ var match_string = subject.slice(matchInfo[CAPTURE0],

+ matchInfo[CAPTURE1]);

result[i] = match_string;

}

@@ -218,7 +219,9 @@

if (IS_FUNCTION(replace)) {

builder.add(replace.call(null, search, start, subject));

} else {

- ExpandReplacement(ToString(replace), subject, [ start, end ], builder);

+ reusableMatchInfo[CAPTURE0] = start;

+ reusableMatchInfo[CAPTURE1] = end;

+ ExpandReplacement(ToString(replace), subject, reusableMatchInfo, builder);

}

// suffix

@@ -228,6 +231,15 @@

}

+// This has the same size as the lastMatchInfo array, and can be used for

+// functions that expect that structure to be returned. It is used when the

+// needle is a string rather than a regexp. In this case we can't update

+// lastMatchArray without erroneously affecting the properties on the global

+// RegExp object.

+var reusableMatchInfo = [2, -1, -1, "", ""];

+var reusableMatchArray = [ void 0 ];

// Helper function for regular expressions in String.prototype.replace.

function StringReplaceRegExp(subject, regexp, replace) {

// Compute an array of matches; each match is really a list of

@@ -237,9 +249,10 @@

matches = DoRegExpExecGlobal(regexp, subject);

if (matches.length == 0) return subject;

} else {

- var captures = DoRegExpExec(regexp, subject, 0);

- if (IS_NULL(captures)) return subject;

- matches = [ captures ];

+ var lastMatchInfo = DoRegExpExec(regexp, subject, 0);

+ if (IS_NULL(lastMatchInfo)) return subject;

+ reusableMatchArray[0] = lastMatchInfo;

+ matches = reusableMatchArray;

}

// Determine the number of matches.

@@ -253,17 +266,17 @@

replace = ToString(replace);

if (%StringIndexOf(replace, "$", 0) < 0) {

for (var i = 0; i < length; i++) {

- var captures = matches[i];

- result.addSpecialSlice(previous, captures[0]);

+ var matchInfo = matches[i];

+ result.addSpecialSlice(previous, matchInfo[CAPTURE0]);

result.add(replace);

- previous = captures[1]; // continue after match

+ previous = matchInfo[CAPTURE1]; // continue after match

}

} else {

for (var i = 0; i < length; i++) {

- var captures = matches[i];

- result.addSpecialSlice(previous, captures[0]);

- ExpandReplacement(replace, subject, captures, result);

- previous = captures[1]; // continue after match

+ var matchInfo = matches[i];

+ result.addSpecialSlice(previous, matchInfo[CAPTURE0]);

+ ExpandReplacement(replace, subject, matchInfo, result);

+ previous = matchInfo[CAPTURE1]; // continue after match

}

result.addSpecialSlice(previous, subject.length);

@@ -273,7 +286,7 @@

// Expand the $-expressions in the string and return a new string with

// the result.

-function ExpandReplacement(string, subject, captures, builder) {

+function ExpandReplacement(string, subject, matchInfo, builder) {

var next = %StringIndexOf(string, '$', 0);

if (next < 0) {

builder.add(string);

@@ -281,11 +294,12 @@

}

// Compute the number of captures; see ECMA-262, 15.5.4.11, p. 102.

- var m = captures.length >> 1; // includes the match

+ var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; // Includes the match.

if (next > 0) builder.add(SubString(string, 0, next));

var length = string.length;

Mads Ager (chromium) 2009/03/11 13:49:17 Any reason for the extra space here?

Erik Corry 2009/03/11 14:01:06 No.

while (true) {

var expansion = '$';

var position = next + 1;

@@ -299,13 +313,14 @@

builder.add('$');

} else if (peek == 38) { // $& - match

++position;

- builder.addSpecialSlice(captures[0], captures[1]);

+ builder.addSpecialSlice(matchInfo[CAPTURE0],

+ matchInfo[CAPTURE1]);

} else if (peek == 96) { // $` - prefix

++position;

- builder.addSpecialSlice(0, captures[0]);

+ builder.addSpecialSlice(0, matchInfo[CAPTURE0]);

} else if (peek == 39) { // $' - suffix

++position;

- builder.addSpecialSlice(captures[1], subject.length);

+ builder.addSpecialSlice(matchInfo[CAPTURE1], subject.length);

} else if (peek >= 48 && peek <= 57) { // $n, 0 <= n <= 9

++position;

var n = peek - 48;

@@ -329,7 +344,7 @@

}

if (0 < n && n < m) {

- addCaptureString(builder, captures, n);

+ addCaptureString(builder, matchInfo, n);

} else {

// Because of the captures range check in the parsing of two

// digit capture references, we can only enter here when a

@@ -361,26 +376,27 @@

};

-// Compute the string of a given PCRE capture.

-function CaptureString(string, captures, index) {

+// Compute the string of a given regular expression capture.

+function CaptureString(string, lastCaptureInfo, index) {

// Scale the index.

var scaled = index << 1;

// Compute start and end.

- var start = captures[scaled];

- var end = captures[scaled + 1];

+ var start = lastCaptureInfo[CAPTURE(scaled)];

+ var end = lastCaptureInfo[CAPTURE(scaled + 1)];

// If either start or end is missing return undefined.

if (start < 0 || end < 0) return;

return SubString(string, start, end);

};

-// Add the string of a given PCRE capture to the ReplaceResultBuilder

-function addCaptureString(builder, captures, index) {

+// Add the string of a given regular expression capture to the

+// ReplaceResultBuilder

+function addCaptureString(builder, matchInfo, index) {

// Scale the index.

var scaled = index << 1;

// Compute start and end.

- var start = captures[scaled];

- var end = captures[scaled + 1];

+ var start = matchInfo[CAPTURE(scaled)];

+ var end = matchInfo[CAPTURE(scaled + 1)];

// If either start or end is missing return.

if (start < 0 || end <= start) return;

builder.addSpecialSlice(start, end);

@@ -396,10 +412,8 @@

// should be 'abcd' and not 'dddd' (or anything else).

function StringReplaceRegExpWithFunction(subject, regexp, replace) {

var result = new ReplaceResultBuilder(subject);

- // Captures is an array of pairs of (start, end) indices for the match and

- // any captured substrings.

- var captures = DoRegExpExec(regexp, subject, 0);

- if (IS_NULL(captures)) return subject;

+ var lastMatchInfo = DoRegExpExec(regexp, subject, 0);

+ if (IS_NULL(lastMatchInfo)) return subject;

// There's at least one match. If the regexp is global, we have to loop

// over all matches. The loop is not in C++ code here like the one in

@@ -409,13 +423,16 @@

if (regexp.global) {

var previous = 0;

do {

- result.addSpecialSlice(previous, captures[0]);

- result.add(ApplyReplacementFunction(replace, captures, subject));

+ result.addSpecialSlice(previous, lastMatchInfo[CAPTURE0]);

+ var startOfMatch = lastMatchInfo[CAPTURE0];

+ previous = lastMatchInfo[CAPTURE1];

+ result.add(ApplyReplacementFunction(replace, lastMatchInfo, subject));

+ // Can't use lastMatchInfo any more from here, since the function could

+ // overwrite it.

// Continue with the next match.

- previous = captures[1];

// Increment previous if we matched an empty string, as per ECMA-262

// 15.5.4.10.

- if (previous == captures[0]) {

+ if (previous == startOfMatch) {

// Add the skipped character to the output, if any.

if (previous < subject.length) {

result.addSpecialSlice(previous, previous + 1);

@@ -425,19 +442,22 @@

// Per ECMA-262 15.10.6.2, if the previous index is greater than the

// string length, there is no match

- captures = (previous > subject.length)

+ lastMatchInfo = (previous > subject.length)

? null

: DoRegExpExec(regexp, subject, previous);

- } while (!IS_NULL(captures));

+ } while (!IS_NULL(lastMatchInfo));

// Tack on the final right substring after the last match, if necessary.

if (previous < subject.length) {

result.addSpecialSlice(previous, subject.length);

}

} else { // Not a global regexp, no need to loop.

- result.addSpecialSlice(0, captures[0]);

- result.add(ApplyReplacementFunction(replace, captures, subject));

- result.addSpecialSlice(captures[1], subject.length);

+ result.addSpecialSlice(0, lastMatchInfo[CAPTURE0]);

+ var endOfMatch = lastMatchInfo[CAPTURE1];

+ result.add(ApplyReplacementFunction(replace, lastMatchInfo, subject));

+ // Can't use lastMatchInfo any more from here, since the function could

+ // overwrite it.

+ result.addSpecialSlice(endOfMatch, subject.length);

}

return result.generate();

@@ -445,20 +465,20 @@

// Helper function to apply a string replacement function once.

-function ApplyReplacementFunction(replace, captures, subject) {

+function ApplyReplacementFunction(replace, lastMatchInfo, subject) {

// Compute the parameter list consisting of the match, captures, index,

// and subject for the replace function invocation.

- var index = captures[0];

+ var index = lastMatchInfo[CAPTURE0];

// The number of captures plus one for the match.

- var m = captures.length >> 1;

+ var m = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1;

if (m == 1) {

- var s = CaptureString(subject, captures, 0);

+ var s = CaptureString(subject, lastMatchInfo, 0);

// Don't call directly to avoid exposing the built-in global object.

return ToString(replace.call(null, s, index, subject));

}

var parameters = $Array(m + 2);

for (var j = 0; j < m; j++) {

- parameters[j] = CaptureString(subject, captures, j);

+ parameters[j] = CaptureString(subject, lastMatchInfo, j);

}

parameters[j] = index;

parameters[j + 1] = subject;

@@ -559,14 +579,14 @@

return result;

}

- var match = splitMatch(sep, subject, currentIndex, startIndex);

+ var lastMatchInfo = splitMatch(sep, subject, currentIndex, startIndex);

- if (IS_NULL(match)) {

+ if (IS_NULL(lastMatchInfo)) {

result[result.length] = subject.slice(currentIndex, length);

return result;

}

- var endIndex = match[0];

+ var endIndex = lastMatchInfo[CAPTURE1];

// We ignore a zero-length match at the currentIndex.

if (startIndex === endIndex && endIndex === currentIndex) {

@@ -574,11 +594,20 @@

continue;

}

- result[result.length] = match[1];

+ result[result.length] =

+ SubString(subject, currentIndex, lastMatchInfo[CAPTURE0]);

if (result.length === lim) return result;

- for (var i = 2; i < match.length; i++) {

- result[result.length] = match[i];

+ for (var i = 2; i < NUMBER_OF_CAPTURES(lastMatchInfo); i += 2) {

+ var start = lastMatchInfo[CAPTURE(i)];

+ var end = lastMatchInfo[CAPTURE(i + 1)];

+ if (start != -1 && end != -1) {

+ result[result.length] = SubString(subject,

+ lastMatchInfo[CAPTURE(i)],

+ lastMatchInfo[CAPTURE(i + 1)]);

+ } else {

+ result[result.length] = void 0;

+ }

if (result.length === lim) return result;

}

@@ -588,32 +617,24 @@

// ECMA-262 section 15.5.4.14

-// Helper function used by split.

+// Helper function used by split. This version returns the lastMatchInfo

+// instead of allocating a new array with basically the same information.

function splitMatch(separator, subject, current_index, start_index) {

if (IS_REGEXP(separator)) {

- var ovector = DoRegExpExec(separator, subject, start_index);

- if (ovector == null) return null;

- var nof_results = ovector.length >> 1;

- var result = new $Array(nof_results + 1);

+ var lastMatchInfo = DoRegExpExec(separator, subject, start_index);

+ if (lastMatchInfo == null) return null;

// Section 15.5.4.14 paragraph two says that we do not allow zero length

// matches at the end of the string.

- if (ovector[0] === subject.length) return null;

- result[0] = ovector[1];

- result[1] = subject.slice(current_index, ovector[0]);

- for (var i = 1; i < nof_results; i++) {

- var matching_start = ovector[2*i];

- var matching_end = ovector[2*i + 1];

- if (matching_start != -1 && matching_end != -1) {

- result[i + 1] = subject.slice(matching_start, matching_end);

- }

- return result;

+ if (lastMatchInfo[CAPTURE0] === subject.length) return null;

+ return lastMatchInfo;

}

var separatorIndex = subject.indexOf(separator, start_index);

if (separatorIndex === -1) return null;

- return [ separatorIndex + separator.length, subject.slice(current_index, separatorIndex) ];

+ reusableMatchInfo[CAPTURE0] = separatorIndex;

+ reusableMatchInfo[CAPTURE1] = separatorIndex + separator.length;

+ return reusableMatchInfo;

};

« src/regexp-delay.js ('K') | « src/runtime.cc ('k') | test/mjsunit/regexp-static.js » ('j') | no next file with comments »