src/string.js - Issue 5959002: Improve regexp split, replace and test.

Unified Diff: src/string.js

Issue 5959002: Improve regexp split, replace and test. (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: src/string.js

===================================================================

--- src/string.js (revision 6062)

+++ src/string.js (working copy)

@@ -159,7 +159,7 @@

function StringMatch(regexp) {

var subject = TO_STRING_INLINE(this);

if (IS_REGEXP(regexp)) {

- if (!regexp.global) return regexp.exec(subject);

+ if (!regexp.global) return RegExpExecNoTests(regexp, subject, 0);

%_Log('regexp', 'regexp-match,%0S,%1r', [subject, regexp]);

// lastMatchInfo is defined in regexp.js.

return %StringMatch(subject, regexp, lastMatchInfo);

@@ -247,14 +247,14 @@

function ExpandReplacement(string, subject, matchInfo, builder) {

var next = %StringIndexOf(string, '$', 0);

if (next < 0) {

- builder.add(string);

+ builder.addString(string);

return;

}

// Compute the number of captures; see ECMA-262, 15.5.4.11, p. 102.

var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; // Includes the match.

- if (next > 0) builder.add(SubString(string, 0, next));

+ if (next > 0) builder.elements.push(SubString(string, 0, next));

Lasse Reichstein 2010/12/17 10:38:30 Consider extracting elements from builder, so you

sandholm 2010/12/17 11:50:24 Done.

var length = string.length;

while (true) {

@@ -264,7 +264,7 @@

var peek = %_StringCharCodeAt(string, position);

if (peek == 36) { // $$

++position;

- builder.add('$');

+ builder.elements.push('$');

} else if (peek == 38) { // $& - match

++position;

builder.addSpecialSlice(matchInfo[CAPTURE0],

@@ -301,14 +301,14 @@

// digit capture references, we can only enter here when a

// single digit capture reference is outside the range of

// captures.

- builder.add('$');

+ builder.elements.push('$');

--position;

}

} else {

- builder.add('$');

+ builder.elements.push('$');

}

} else {

- builder.add('$');

+ builder.elements.push('$');

}

// Go the the next $ in the string.

@@ -318,13 +318,13 @@

// haven't reached the end, we need to append the suffix.

if (next < 0) {

if (position < length) {

- builder.add(SubString(string, position, length));

+ builder.elements.push(SubString(string, position, length));

}

return;

}

// Append substring between the previous and the next $ character.

- builder.add(SubString(string, position, next));

+ builder.addString(SubString(string, position, next));

}

};

@@ -559,23 +559,23 @@

var currentIndex = 0;

var startIndex = 0;

+ var startMatch = 0;

var result = [];

outer_loop:

while (true) {

if (startIndex === length) {

- result.push(subject.slice(currentIndex, length));

+ result.push(SubString(subject, currentIndex, length));

break;

}

- var matchInfo = splitMatch(separator, subject, currentIndex, startIndex);

- if (IS_NULL(matchInfo)) {

- result.push(subject.slice(currentIndex, length));

+ var matchInfo = DoRegExpExec(separator, subject, startIndex);

+ if (IS_NULL_OR_UNDEFINED(matchInfo)

Lasse Reichstein 2010/12/17 10:38:30 Why accept undefined?

sandholm 2010/12/17 11:50:24 I am just applying the same semantics as the inlin

+ || length === (startMatch = matchInfo[CAPTURE0])) {

+ result.push(SubString(subject, currentIndex, length));

break;

}

var endIndex = matchInfo[CAPTURE1];

// We ignore a zero-length match at the currentIndex.

@@ -584,7 +584,12 @@

continue;

}

- result.push(SubString(subject, currentIndex, matchInfo[CAPTURE0]));

+ if (currentIndex + 1 == startMatch) {

Lasse Reichstein 2010/12/17 10:38:30 Does this really pay off? It seems to be simply in

sandholm 2010/12/17 11:50:24 It is faster. I generally only inline call sites w

+ result.push(%_StringCharAt(subject, currentIndex));

+ } else {

+ result.push(%_SubString(subject, currentIndex, startMatch));

+ }

if (result.length === limit) break;

var matchinfo_len = NUMBER_OF_CAPTURES(matchInfo) + REGEXP_FIRST_CAPTURE;

@@ -609,19 +614,6 @@

}

-// ECMA-262 section 15.5.4.14

-// Helper function used by split. This version returns the matchInfo

-// instead of allocating a new array with basically the same information.

-function splitMatch(separator, subject, current_index, start_index) {

- var matchInfo = DoRegExpExec(separator, subject, start_index);

- if (matchInfo == null) return null;

- // Section 15.5.4.14 paragraph two says that we do not allow zero length

- // matches at the end of the string.

- if (matchInfo[CAPTURE0] === subject.length) return null;

- return matchInfo;

// ECMA-262 section 15.5.4.15

function StringSubstring(start, end) {

var s = TO_STRING_INLINE(this);

@@ -844,24 +836,26 @@

ReplaceResultBuilder.prototype.add = function(str) {

str = TO_STRING_INLINE(str);

- if (str.length > 0) {

- var elements = this.elements;

- elements[elements.length] = str;

- }

+ if (str.length > 0) this.elements.push(str);

}

+ReplaceResultBuilder.prototype.addString = function(str) {

+ if (str.length > 0) this.elements.push(str);

ReplaceResultBuilder.prototype.addSpecialSlice = function(start, end) {

var len = end - start;

if (start < 0 || len <= 0) return;

- var elements = this.elements;

if (start < 0x80000 && len < 0x800) {

- elements[elements.length] = (start << 11) | len;

+ this.elements.push((start << 11) | len);

} else {

// 0 < len <= String::kMaxLength and Smi::kMaxValue >= String::kMaxLength,

// so -len is a smi.

- elements[elements.length] = -len;

- elements[elements.length] = start;

+ var elements = this.elements;

+ elements.push(-len);

+ elements.push(start);

}

« src/regexp.js ('K') | « src/regexp.js ('k') | no next file » | no next file with comments »