Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(383)

Unified Diff: src/js/regexp.js

Issue 1596483005: Add ES2015 RegExp full subclassing semantics behind a flag (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: camelCase Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/js/prologue.js ('k') | src/messages.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/js/regexp.js
diff --git a/src/js/regexp.js b/src/js/regexp.js
index dc53f17a0cbdaa115ada14c657ee04f0842d180a..a1055b3dd5e2f876dbe253dab400d53e96ee1aad 100644
--- a/src/js/regexp.js
+++ b/src/js/regexp.js
@@ -11,21 +11,30 @@
// -------------------------------------------------------------------
// Imports
+var AddIndexedProperty;
var ExpandReplacement;
+var GlobalArray = global.Array;
var GlobalObject = global.Object;
var GlobalRegExp = global.RegExp;
var GlobalRegExpPrototype;
var InternalArray = utils.InternalArray;
var InternalPackedArray = utils.InternalPackedArray;
var MakeTypeError;
+var MaxSimple;
+var MinSimple;
var matchSymbol = utils.ImportNow("match_symbol");
var replaceSymbol = utils.ImportNow("replace_symbol");
var searchSymbol = utils.ImportNow("search_symbol");
var splitSymbol = utils.ImportNow("split_symbol");
+var SpeciesConstructor;
utils.Import(function(from) {
+ AddIndexedProperty = from.AddIndexedProperty;
ExpandReplacement = from.ExpandReplacement;
MakeTypeError = from.MakeTypeError;
+ MaxSimple = from.MaxSimple;
+ MinSimple = from.MinSimple;
+ SpeciesConstructor = from.SpeciesConstructor;
});
// -------------------------------------------------------------------
@@ -46,6 +55,7 @@ var RegExpLastMatchInfo = new InternalPackedArray(
// -------------------------------------------------------------------
+// ES#sec-isregexp IsRegExp ( argument )
function IsRegExp(o) {
if (!IS_RECEIVER(o)) return false;
var is_regexp = o[matchSymbol];
@@ -54,7 +64,8 @@ function IsRegExp(o) {
}
-// ES6 section 21.2.3.2.2
+// ES#sec-regexpinitialize
+// Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
function RegExpInitialize(object, pattern, flags) {
pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern);
flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags);
@@ -72,6 +83,8 @@ function PatternFlags(pattern) {
}
+// ES#sec-regexp-pattern-flags
+// RegExp ( pattern, flags )
function RegExpConstructor(pattern, flags) {
var newtarget = new.target;
var pattern_is_regexp = IsRegExp(pattern);
@@ -101,6 +114,7 @@ function RegExpConstructor(pattern, flags) {
}
+// ES#sec-regexp.prototype.compile RegExp.prototype.compile (pattern, flags)
function RegExpCompileJS(pattern, flags) {
if (!IS_REGEXP(this)) {
throw MakeTypeError(kIncompatibleMethodReceiver,
@@ -165,6 +179,54 @@ function RegExpExecNoTests(regexp, string, start) {
}
+// ES#sec-regexp.prototype.exec
+// RegExp.prototype.exec ( string )
+function RegExpSubclassExecJS(string) {
+ if (!IS_REGEXP(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ 'RegExp.prototype.exec', this);
+ }
+
+ string = TO_STRING(string);
+ var lastIndex = this.lastIndex;
+
+ // Conversion is required by the ES2015 specification (RegExpBuiltinExec
+ // algorithm, step 4) even if the value is discarded for non-global RegExps.
+ var i = TO_LENGTH(lastIndex);
+
+ var global = TO_BOOLEAN(this.global);
+ var sticky = TO_BOOLEAN(this.sticky);
+ var updateLastIndex = global || sticky;
+ if (updateLastIndex) {
+ if (i > string.length) {
+ this.lastIndex = 0;
+ return null;
+ }
+ } else {
+ i = 0;
+ }
+
+ // matchIndices is either null or the RegExpLastMatchInfo array.
+ // TODO(littledan): Whether a RegExp is sticky is compiled into the RegExp
+ // itself, but ES2015 allows monkey-patching this property to differ from
+ // the internal flags. If it differs, recompile a different RegExp?
+ var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo);
+
+ if (IS_NULL(matchIndices)) {
+ this.lastIndex = 0;
+ return null;
+ }
+
+ // Successful match.
+ if (updateLastIndex) {
+ this.lastIndex = RegExpLastMatchInfo[CAPTURE1];
+ }
+ RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string);
+}
+%FunctionRemovePrototype(RegExpSubclassExecJS);
+
+
+// Legacy implementation of RegExp.prototype.exec
function RegExpExecJS(string) {
if (!IS_REGEXP(this)) {
throw MakeTypeError(kIncompatibleMethodReceiver,
@@ -204,10 +266,25 @@ function RegExpExecJS(string) {
}
+// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
+function RegExpSubclassExec(regexp, string) {
+ var exec = regexp.exec;
+ if (IS_CALLABLE(exec)) {
+ var result = %_Call(exec, regexp, string);
+ if (!IS_OBJECT(result) && !IS_NULL(result)) {
+ throw MakeTypeError(kInvalidRegExpExecResult);
+ }
+ return result;
+ }
+ return %_Call(RegExpExecJS, regexp, string);
+}
+
+
// One-element cache for the simplified test regexp.
var regexp_key;
var regexp_val;
+// Legacy implementation of RegExp.prototype.test
// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
// that test is defined in terms of String.prototype.exec. However, it probably
// means the original value of String.prototype.exec, which is what everybody
@@ -261,6 +338,19 @@ function RegExpTest(string) {
}
}
+
+// ES#sec-regexp.prototype.test RegExp.prototype.test ( S )
+function RegExpSubclassTest(string) {
+ if (!IS_OBJECT(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ 'RegExp.prototype.test', this);
+ }
+ string = TO_STRING(string);
+ var match = RegExpSubclassExec(this, string);
+ return !IS_NULL(match);
+}
+%FunctionRemovePrototype(RegExpSubclassTest);
+
function TrimRegExp(regexp) {
if (regexp_key !== regexp) {
regexp_key = regexp;
@@ -308,7 +398,8 @@ function AtSurrogatePair(subject, index) {
}
-// ES6 21.2.5.11.
+// Legacy implementation of RegExp.prototype[Symbol.split] which
+// doesn't properly call the underlying exec, @@species methods
function RegExpSplit(string, limit) {
// TODO(yangguo): allow non-regexp receivers.
if (!IS_REGEXP(this)) {
@@ -382,9 +473,69 @@ function RegExpSplit(string, limit) {
}
-// ES6 21.2.5.6.
+// ES#sec-regexp.prototype-@@split
+// RegExp.prototype [ @@split ] ( string, limit )
+function RegExpSubclassSplit(string, limit) {
+ if (!IS_RECEIVER(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ "RegExp.prototype.@@split", this);
+ }
+ string = TO_STRING(string);
+ var constructor = SpeciesConstructor(this, GlobalRegExp);
+ var flags = TO_STRING(this.flags);
+ var unicode = %StringIndexOf(flags, 'u', 0) >= 0;
+ var sticky = %StringIndexOf(flags, 'y', 0) >= 0;
+ var newFlags = sticky ? flags : flags + "y";
+ var splitter = new constructor(this, newFlags);
+ var array = new GlobalArray();
+ var arrayIndex = 0;
+ var lim = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit);
+ var size = string.length;
+ var prevStringIndex = 0;
+ if (lim === 0) return array;
+ var result;
+ if (size === 0) {
+ result = RegExpSubclassExec(splitter, string);
+ if (IS_NULL(result)) AddIndexedProperty(array, 0, string);
+ return array;
+ }
+ var stringIndex = prevStringIndex;
+ while (stringIndex < size) {
+ splitter.lastIndex = stringIndex;
+ result = RegExpSubclassExec(splitter, string);
+ if (IS_NULL(result)) {
+ stringIndex += AdvanceStringIndex(string, stringIndex, unicode);
+ } else {
+ var end = MinSimple(TO_LENGTH(splitter.lastIndex), size);
+ if (end === stringIndex) {
+ stringIndex += AdvanceStringIndex(string, stringIndex, unicode);
+ } else {
+ AddIndexedProperty(
+ array, arrayIndex,
+ %_SubString(string, prevStringIndex, stringIndex));
+ arrayIndex++;
+ if (arrayIndex === lim) return array;
+ prevStringIndex = end;
+ var numberOfCaptures = MaxSimple(TO_LENGTH(result.length), 0);
+ for (var i = 1; i < numberOfCaptures; i++) {
+ AddIndexedProperty(array, arrayIndex, result[i]);
+ arrayIndex++;
+ if (arrayIndex === lim) return array;
+ }
+ stringIndex = prevStringIndex;
+ }
+ }
+ }
+ AddIndexedProperty(array, arrayIndex,
+ %_SubString(string, prevStringIndex, size));
+ return array;
+}
+%FunctionRemovePrototype(RegExpSubclassSplit);
+
+
+// Legacy implementation of RegExp.prototype[Symbol.match] which
+// doesn't properly call the underlying exec method
function RegExpMatch(string) {
- // TODO(yangguo): allow non-regexp receivers.
if (!IS_REGEXP(this)) {
throw MakeTypeError(kIncompatibleMethodReceiver,
"RegExp.prototype.@@match", this);
@@ -398,7 +549,38 @@ function RegExpMatch(string) {
}
-// ES6 21.2.5.8.
+// ES#sec-regexp.prototype-@@match
+// RegExp.prototype [ @@match ] ( string )
+function RegExpSubclassMatch(string) {
+ if (!IS_OBJECT(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ "RegExp.prototype.@@match", this);
+ }
+ string = TO_STRING(string);
+ var global = this.global;
+ if (!global) return RegExpSubclassExec(this, string);
+ var unicode = this.unicode;
+ this.lastIndex = 0;
+ var array = [];
+ var n = 0;
+ var result;
+ while (true) {
+ result = RegExpSubclassExec(this, string);
+ if (IS_NULL(result)) {
+ if (n === 0) return null;
+ return array;
+ }
+ var matchStr = TO_STRING(result[0]);
+ %AddElement(array, n, matchStr);
+ if (matchStr === "") SetAdvancedStringIndex(this, string, unicode);
+ n++;
+ }
+}
+%FunctionRemovePrototype(RegExpSubclassMatch);
+
+
+// Legacy implementation of RegExp.prototype[Symbol.replace] which
+// doesn't properly call the underlying exec method.
// TODO(lrn): This array will survive indefinitely if replace is never
// called again. However, it will be empty, since the contents are cleared
@@ -525,7 +707,6 @@ function StringReplaceNonGlobalRegExpWithFunction(subject, regexp, replace) {
function RegExpReplace(string, replace) {
- // TODO(littledan): allow non-regexp receivers.
if (!IS_REGEXP(this)) {
throw MakeTypeError(kIncompatibleMethodReceiver,
"RegExp.prototype.@@replace", this);
@@ -567,9 +748,189 @@ function RegExpReplace(string, replace) {
}
-// ES6 21.2.5.9.
+// ES#sec-getsubstitution
+// GetSubstitution(matched, str, position, captures, replacement)
+// Expand the $-expressions in the string and return a new string with
+// the result.
+// TODO(littledan): Call this function from String.prototype.replace instead
+// of the very similar ExpandReplacement in src/js/string.js
+function GetSubstitution(matched, string, position, captures, replacement) {
+ var matchLength = matched.length;
+ var stringLength = string.length;
+ var capturesLength = captures.length;
+ var tailPos = position + matchLength;
+ var result = "";
+ var pos, expansion, peek, next, scaledIndex, advance, newScaledIndex;
+
+ var next = %StringIndexOf(replacement, '$', 0);
+ if (next < 0) {
+ result += replacement;
+ return result;
+ }
+
+ if (next > 0) result += %_SubString(replacement, 0, next);
+
+ while (true) {
+ expansion = '$';
+ pos = next + 1;
+ if (pos < replacement.length) {
+ peek = %_StringCharCodeAt(replacement, pos);
+ if (peek == 36) { // $$
+ ++pos;
+ result += '$';
+ } else if (peek == 38) { // $& - match
+ ++pos;
+ result += matched;
+ } else if (peek == 96) { // $` - prefix
+ ++pos;
+ result += %_SubString(string, 0, position);
+ } else if (peek == 39) { // $' - suffix
+ ++pos;
+ result += %_SubString(string, tailPos, stringLength);
+ } else if (peek >= 48 && peek <= 57) {
+ // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
+ scaledIndex = (peek - 48);
+ advance = 1;
+ if (pos + 1 < replacement.length) {
+ next = %_StringCharCodeAt(replacement, pos + 1);
+ if (next >= 48 && next <= 57) {
+ newScaledIndex = scaledIndex * 10 + ((next - 48));
+ if (newScaledIndex < capturesLength) {
+ scaledIndex = newScaledIndex;
+ advance = 2;
+ }
+ }
+ }
+ if (scaledIndex != 0 && scaledIndex < capturesLength) {
+ var capture = captures[scaledIndex];
+ if (!IS_UNDEFINED(capture)) result += capture;
+ pos += advance;
+ } else {
+ result += '$';
+ }
+ } else {
+ result += '$';
+ }
+ } else {
+ result += '$';
+ }
+
+ // Go the the next $ in the replacement.
+ next = %StringIndexOf(replacement, '$', pos);
+
+ // Return if there are no more $ characters in the replacement. If we
+ // haven't reached the end, we need to append the suffix.
+ if (next < 0) {
+ if (pos < replacement.length) {
+ result += %_SubString(replacement, pos, replacement.length);
+ }
+ return result;
+ }
+
+ // Append substring between the previous and the next $ character.
+ if (next > pos) {
+ result += %_SubString(replacement, pos, next);
+ }
+ }
+ return result;
+}
+
+
+// ES#sec-advancestringindex
+// AdvanceStringIndex ( S, index, unicode )
+function AdvanceStringIndex(string, index, unicode) {
+ var increment = 1;
+ if (unicode) {
+ var first = %_StringCharCodeAt(string, index);
+ if (first >= 0xD800 && first <= 0xDBFF && string.length > index + 1) {
+ var second = %_StringCharCodeAt(string, index + 1);
+ if (second >= 0xDC00 && second <= 0xDFFF) {
+ increment = 2;
+ }
+ }
+ }
+ return increment;
+}
+
+
+function SetAdvancedStringIndex(regexp, string, unicode) {
+ var lastIndex = regexp.lastIndex;
+ regexp.lastIndex = lastIndex +
+ AdvanceStringIndex(string, lastIndex, unicode);
+}
+
+
+// ES#sec-regexp.prototype-@@replace
+// RegExp.prototype [ @@replace ] ( string, replaceValue )
+function RegExpSubclassReplace(string, replace) {
+ if (!IS_OBJECT(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ "RegExp.prototype.@@replace", this);
+ }
+ string = TO_STRING(string);
+ var length = string.length;
+ var functionalReplace = IS_CALLABLE(replace);
+ if (!functionalReplace) replace = TO_STRING(replace);
+ var global = this.global;
+ if (global) {
+ var unicode = this.unicode;
+ this.lastIndex = 0;
+ }
+ var results = new InternalArray();
+ var result, replacement;
+ while (true) {
+ result = RegExpSubclassExec(this, string);
+ if (IS_NULL(result)) {
+ break;
+ } else {
+ results.push(result);
+ if (!global) break;
+ var matchStr = TO_STRING(result[0]);
+ if (matchStr === "") SetAdvancedStringIndex(this, string, unicode);
+ }
+ }
+ var accumulatedResult = "";
+ var nextSourcePosition = 0;
+ for (var i = 0; i < results.length; i++) {
+ result = results[i];
+ var capturesLength = MaxSimple(TO_LENGTH(result.length), 0);
+ var matched = TO_STRING(result[0]);
+ var matchedLength = matched.length;
+ var position = MaxSimple(MinSimple(TO_INTEGER(result.index), length), 0);
+ var captures = new InternalArray();
+ for (var n = 0; n < capturesLength; n++) {
+ var capture = result[n];
+ if (!IS_UNDEFINED(capture)) capture = TO_STRING(capture);
+ captures[n] = capture;
+ }
+ if (functionalReplace) {
+ var parameters = new InternalArray(capturesLength + 2);
+ for (var j = 0; j < capturesLength; j++) {
+ parameters[j] = captures[j];
+ }
+ parameters[j] = position;
+ parameters[j + 1] = string;
+ replacement = %reflect_apply(replace, UNDEFINED, parameters, 0,
+ parameters.length);
+ } else {
+ replacement = GetSubstitution(matched, string, position, captures,
+ replace);
+ }
+ if (position >= nextSourcePosition) {
+ accumulatedResult +=
+ %_SubString(string, nextSourcePosition, position) + replacement;
+ nextSourcePosition = position + matchedLength;
+ }
+ }
+ if (nextSourcePosition >= length) return accumulatedResult;
+ return accumulatedResult + %_SubString(string, nextSourcePosition, length);
+}
+%FunctionRemovePrototype(RegExpSubclassReplace);
+
+
+// Legacy implementation of RegExp.prototype[Symbol.search] which
+// doesn't properly use the overridden exec method
function RegExpSearch(string) {
- // TODO(yangguo): allow non-regexp receivers.
if (!IS_REGEXP(this)) {
throw MakeTypeError(kIncompatibleMethodReceiver,
"RegExp.prototype.@@search", this);
@@ -580,6 +941,24 @@ function RegExpSearch(string) {
}
+// ES#sec-regexp.prototype-@@search
+// RegExp.prototype [ @@search ] ( string )
+function RegExpSubclassSearch(string) {
+ if (!IS_OBJECT(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ "RegExp.prototype.@@search", this);
+ }
+ string = TO_STRING(string);
+ var previousLastIndex = this.lastIndex;
+ this.lastIndex = 0;
+ var result = RegExpSubclassExec(this, string);
+ this.lastIndex = previousLastIndex;
+ if (IS_NULL(result)) return -1;
+ return result.index;
+}
+%FunctionRemovePrototype(RegExpSubclassSearch);
+
+
// Getters for the static properties lastMatch, lastParen, leftContext, and
// rightContext of the RegExp constructor. The properties are computed based
// on the captures array of the last successful match and the subject string
@@ -780,6 +1159,12 @@ for (var i = 1; i < 10; ++i) {
utils.Export(function(to) {
to.RegExpExec = DoRegExpExec;
to.RegExpLastMatchInfo = RegExpLastMatchInfo;
+ to.RegExpSubclassExecJS = RegExpSubclassExecJS;
+ to.RegExpSubclassMatch = RegExpSubclassMatch;
+ to.RegExpSubclassReplace = RegExpSubclassReplace;
+ to.RegExpSubclassSearch = RegExpSubclassSearch;
+ to.RegExpSubclassSplit = RegExpSubclassSplit;
+ to.RegExpSubclassTest = RegExpSubclassTest;
to.RegExpTest = RegExpTest;
to.IsRegExp = IsRegExp;
});
« no previous file with comments | « src/js/prologue.js ('k') | src/messages.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698