Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(227)

Unified Diff: src/js/regexp.js

Issue 1596483005: Add ES2015 RegExp full subclassing semantics behind a flag (Closed) Base URL: https://chromium.googlesource.com/v8/v8.git@master
Patch Set: removed stray edit Created 4 years, 9 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View side-by-side diff with in-line comments
Download patch
« no previous file with comments | « src/js/prologue.js ('k') | src/messages.h » ('j') | no next file with comments »
Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
Index: src/js/regexp.js
diff --git a/src/js/regexp.js b/src/js/regexp.js
index 830fc75b30f7f3d999e99b7971cf08d3a41243f8..d9f99d81f4811d1ef8caff4957887a97604bc3d0 100644
--- a/src/js/regexp.js
+++ b/src/js/regexp.js
@@ -11,21 +11,30 @@
// -------------------------------------------------------------------
// Imports
+var AddIndexedProperty;
var ExpandReplacement;
+var GlobalArray = global.Array;
var GlobalObject = global.Object;
var GlobalRegExp = global.RegExp;
var GlobalRegExpPrototype;
var InternalArray = utils.InternalArray;
var InternalPackedArray = utils.InternalPackedArray;
var MakeTypeError;
+var MaxSimple;
+var MinSimple;
var matchSymbol = utils.ImportNow("match_symbol");
var replaceSymbol = utils.ImportNow("replace_symbol");
var searchSymbol = utils.ImportNow("search_symbol");
var splitSymbol = utils.ImportNow("split_symbol");
+var SpeciesConstructor;
utils.Import(function(from) {
+ AddIndexedProperty = from.AddIndexedProperty;
ExpandReplacement = from.ExpandReplacement;
MakeTypeError = from.MakeTypeError;
+ MaxSimple = from.MaxSimple;
+ MinSimple = from.MinSimple;
+ SpeciesConstructor = from.SpeciesConstructor;
});
// -------------------------------------------------------------------
@@ -46,6 +55,7 @@ var RegExpLastMatchInfo = new InternalPackedArray(
// -------------------------------------------------------------------
+// ES#sec-isregexp IsRegExp ( argument )
function IsRegExp(o) {
if (!IS_RECEIVER(o)) return false;
var is_regexp = o[matchSymbol];
@@ -54,7 +64,8 @@ function IsRegExp(o) {
}
-// ES6 section 21.2.3.2.2
+// ES#sec-regexpinitialize
+// Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
function RegExpInitialize(object, pattern, flags) {
pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern);
flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags);
@@ -72,6 +83,8 @@ function PatternFlags(pattern) {
}
+// ES#sec-regexp-pattern-flags
+// RegExp ( pattern, flags )
function RegExpConstructor(pattern, flags) {
var newtarget = new.target;
var pattern_is_regexp = IsRegExp(pattern);
@@ -101,6 +114,7 @@ function RegExpConstructor(pattern, flags) {
}
+// ES#sec-regexp.prototype.compile RegExp.prototype.compile (pattern, flags)
function RegExpCompileJS(pattern, flags) {
if (!IS_REGEXP(this)) {
throw MakeTypeError(kIncompatibleMethodReceiver,
@@ -165,6 +179,54 @@ function RegExpExecNoTests(regexp, string, start) {
}
+// ES#sec-regexp.prototype.exec
+// RegExp.prototype.exec ( string )
+function RegExpSubclassExecJS(string) {
+ if (!IS_REGEXP(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ 'RegExp.prototype.exec', this);
+ }
+
+ string = TO_STRING(string);
+ var lastIndex = this.lastIndex;
+
+ // Conversion is required by the ES2015 specification (RegExpBuiltinExec
+ // algorithm, step 4) even if the value is discarded for non-global RegExps.
+ var i = TO_LENGTH(lastIndex);
+
+ var global = TO_BOOLEAN(this.global);
adamk 2016/03/22 22:23:42 Are these the only calls that differ from the exis
Dan Ehrenberg 2016/03/22 23:09:31 Yes. They showed up as relatively big items in the
+ var sticky = TO_BOOLEAN(this.sticky);
+ var updateLastIndex = global || sticky;
+ if (updateLastIndex) {
+ if (i < 0 || i > string.length) {
adamk 2016/03/22 22:23:42 I see where the > length is checked in the spec, b
Dan Ehrenberg 2016/03/22 23:09:32 My mistake, the < 0 path should be unreachable now
+ this.lastIndex = 0;
+ return null;
+ }
+ } else {
+ i = 0;
+ }
+
+ // matchIndices is either null or the RegExpLastMatchInfo array.
+ // TODO(littledan): Whether a RegExp is sticky is compiled into the RegExp
+ // itself, but ES2015 allows monkey-patching this property to differ from
+ // the internal flags. If it differs, recompile a different RegExp?
+ var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo);
+
+ if (IS_NULL(matchIndices)) {
+ this.lastIndex = 0;
+ return null;
+ }
+
+ // Successful match.
+ if (updateLastIndex) {
+ this.lastIndex = RegExpLastMatchInfo[CAPTURE1];
+ }
+ RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string);
+}
+%FunctionRemovePrototype(RegExpSubclassExecJS);
+
+
+// Legacy implementation of RegExp.prototype.exec
function RegExpExecJS(string) {
if (!IS_REGEXP(this)) {
throw MakeTypeError(kIncompatibleMethodReceiver,
@@ -204,10 +266,25 @@ function RegExpExecJS(string) {
}
+// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
+function RegExpSubclassExec(regexp, string) {
+ var exec = regexp.exec;
+ if (IS_CALLABLE(exec)) {
+ var result = %_Call(exec, regexp, string);
+ if (!IS_OBJECT(result) && !IS_NULL(result)) {
+ throw MakeTypeError(kInvalidRegExpExecResult);
+ }
+ return result;
+ }
+ return %_Call(RegExpExecJS, regexp, string);
+}
+
+
// One-element cache for the simplified test regexp.
var regexp_key;
var regexp_val;
+// Legacy implementation of RegExp.prototype.test
// Section 15.10.6.3 doesn't actually make sense, but the intention seems to be
// that test is defined in terms of String.prototype.exec. However, it probably
// means the original value of String.prototype.exec, which is what everybody
@@ -261,6 +338,19 @@ function RegExpTest(string) {
}
}
+
+// ES#sec-regexp.prototype.test RegExp.prototype.test ( S )
+function RegExpSubclassTest(string) {
+ if (!IS_OBJECT(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ 'RegExp.prototype.test', this);
+ }
+ string = TO_STRING(string);
+ var match = RegExpSubclassExec(this, string);
+ return !IS_NULL(match);
+}
+%FunctionRemovePrototype(RegExpSubclassTest);
+
function TrimRegExp(regexp) {
if (regexp_key !== regexp) {
regexp_key = regexp;
@@ -308,7 +398,8 @@ function AtSurrogatePair(subject, index) {
}
-// ES6 21.2.5.11.
+// Legacy implementation of RegExp.prototype[Symbol.split] which
+// doesn't properly call the underlying exec, @@species methods
function RegExpSplit(string, limit) {
// TODO(yangguo): allow non-regexp receivers.
if (!IS_REGEXP(this)) {
@@ -382,9 +473,71 @@ function RegExpSplit(string, limit) {
}
-// ES6 21.2.5.6.
+// ES#sec-regexp.prototype-@@split
+// RegExp.prototype [ @@split ] ( string, limit )
+function RegExpSubclassSplit(string, limit) {
+ if (!IS_RECEIVER(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ "RegExp.prototype.@@split", this);
+ }
+ string = TO_STRING(string);
+ var constructor = SpeciesConstructor(this, GlobalRegExp);
+ var flags = TO_STRING(this.flags);
+ var unicode = %StringIndexOf(flags, 'u', 0) >= 0;
+ var sticky = %StringIndexOf(flags, 'y', 0) >= 0;
+ var new_flags = sticky ? flags : flags + "y";
adamk 2016/03/23 01:07:19 I'd prefer if new code added in this patch used ca
Dan Ehrenberg 2016/03/24 00:50:54 fixed
+ var splitter = new constructor(this, new_flags);
+ var array = new GlobalArray();
adamk 2016/03/22 22:23:42 It seems like the main reason to use an Array here
Dan Ehrenberg 2016/03/22 23:09:31 Good idea for optimization; I was just blindly fol
adamk 2016/03/23 01:07:19 Given that we already know there's lots of optimiz
+ var array_index = 0;
+ var lim = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit);
+ var size = string.length;
+ var prev_string_index = 0;
+ if (lim === 0) return array;
+ var result;
+ if (size === 0) {
+ result = RegExpSubclassExec(splitter, string);
+ if (IS_NULL(result)) AddIndexedProperty(array, 0, string);
+ return array;
+ }
+ var string_index = prev_string_index;
+ while (string_index < size) {
+ splitter.lastIndex = string_index;
+ result = RegExpSubclassExec(splitter, string);
+ if (IS_NULL(result)) {
+ string_index += GetUnicodeAdvancedIncrement(string, string_index,
+ unicode);
+ } else {
+ var end = MinSimple(splitter.lastIndex, size);
adamk 2016/03/22 22:23:42 Missing TO_LENGTH? Hard to tell it's required, but
Dan Ehrenberg 2016/03/22 23:09:31 Oops, definitely needed around splitter.lastIndex.
+ if (end === prev_string_index) {
+ string_index += GetUnicodeAdvancedIncrement(string, string_index,
+ unicode);
+ } else {
+ AddIndexedProperty(
+ array, array_index,
+ %_SubString(string, prev_string_index, string_index));
+ array_index++;
+ if (array_index === lim) return array;
+ prev_string_index = end;
+ var number_of_captures = MaxSimple(TO_LENGTH(result.length), 0);
+ for (var i = 1; i < number_of_captures; i++) {
+ AddIndexedProperty(array, array_index, result[i]);
+ array_index++;
+ if (array_index === lim) return array;
+ }
+ string_index = prev_string_index;
+ }
+ }
+ }
+ AddIndexedProperty(array, array_index,
+ %_SubString(string, prev_string_index, size));
+ return array;
+}
+%FunctionRemovePrototype(RegExpSubclassSplit);
+
+
+// Legacy implementation of RegExp.prototype[Symbol.match] which
+// doesn't properly call the underlying exec method
function RegExpMatch(string) {
- // TODO(yangguo): allow non-regexp receivers.
if (!IS_REGEXP(this)) {
throw MakeTypeError(kIncompatibleMethodReceiver,
"RegExp.prototype.@@match", this);
@@ -398,7 +551,38 @@ function RegExpMatch(string) {
}
-// ES6 21.2.5.8.
+// ES#sec-regexp.prototype-@@match
+// RegExp.prototype [ @@match ] ( string )
+function RegExpSubclassMatch(string) {
+ if (!IS_OBJECT(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ "RegExp.prototype.@@match", this);
+ }
+ string = TO_STRING(string);
+ var global = this.global;
+ if (!global) return RegExpSubclassExec(this, string);
+ var unicode = this.unicode;
+ this.lastIndex = 0;
+ var array = [];
+ var n = 0;
+ var result;
+ while (true) {
+ result = RegExpSubclassExec(this, string);
+ if (IS_NULL(result)) {
+ if (n === 0) return null;
+ return array;
+ }
+ var matchStr = TO_STRING(result[0]);
+ %AddElement(array, n, matchStr);
+ if (matchStr === "") AdvanceStringIndex(this, string, unicode);
adamk 2016/03/22 22:23:42 I found it quite confusing that what the spec call
Dan Ehrenberg 2016/03/22 23:09:32 Fixed it to have more meaningful names.
+ n++;
+ }
+}
+%FunctionRemovePrototype(RegExpSubclassMatch);
+
+
+// Legacy implementation of RegExp.prototype[Symbol.replace] which
+// doesn't properly call the underlying exec method.
// TODO(lrn): This array will survive indefinitely if replace is never
// called again. However, it will be empty, since the contents are cleared
@@ -525,7 +709,6 @@ function StringReplaceNonGlobalRegExpWithFunction(subject, regexp, replace) {
function RegExpReplace(string, replace) {
- // TODO(littledan): allow non-regexp receivers.
if (!IS_REGEXP(this)) {
throw MakeTypeError(kIncompatibleMethodReceiver,
"RegExp.prototype.@@replace", this);
@@ -567,9 +750,188 @@ function RegExpReplace(string, replace) {
}
-// ES6 21.2.5.9.
+// ES#sec-getsubstitution
+// GetSubstitution(matched, str, position, captures, replacement)
+// Expand the $-expressions in the string and return a new string with
+// the result.
+// TODO(littledan): Call this function from String.prototype.replace instead
adamk 2016/03/22 22:23:42 I agree :) This one's really big; what's the diff
Dan Ehrenberg 2016/03/22 23:09:32 They take the captures/replacement in different fo
+// of the very similar ExpandReplacement in src/js/string.js
+function GetSubstitution(matched, string, position, captures, replacement) {
+ var match_length = matched.length;
+ var string_length = string.length;
+ var captures_length = captures.length;
+ var tail_pos = position + match_length;
+ var result = "";
+ var pos, expansion, peek, next, scaled_index, advance, new_scaled_index;
+
+ var next = %StringIndexOf(replacement, '$', 0);
+ if (next < 0) {
+ result += replacement;
+ return result;
+ }
+
+ if (next > 0) result += %_SubString(replacement, 0, next);
+
+ while (true) {
+ expansion = '$';
+ pos = next + 1;
+ if (pos < replacement.length) {
+ peek = %_StringCharCodeAt(replacement, pos);
+ if (peek == 36) { // $$
+ ++pos;
+ result += '$';
+ } else if (peek == 38) { // $& - match
+ ++pos;
+ result += matched;
+ } else if (peek == 96) { // $` - prefix
+ ++pos;
+ result += %_SubString(string, 0, position);
+ } else if (peek == 39) { // $' - suffix
+ ++pos;
+ result += %_SubString(string, tail_pos, string_length);
+ } else if (peek >= 48 && peek <= 57) {
+ // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99
+ scaled_index = (peek - 48);
+ advance = 1;
+ if (pos + 1 < replacement.length) {
+ next = %_StringCharCodeAt(replacement, pos + 1);
+ if (next >= 48 && next <= 57) {
+ new_scaled_index = scaled_index * 10 + ((next - 48));
+ if (new_scaled_index < captures_length) {
+ scaled_index = new_scaled_index;
+ advance = 2;
+ }
+ }
+ }
+ if (scaled_index != 0 && scaled_index < captures_length) {
+ var capture = captures[scaled_index];
+ if (!IS_UNDEFINED(capture)) result += capture;
+ pos += advance;
+ } else {
+ result += '$';
+ }
+ } else {
+ result += '$';
+ }
+ } else {
+ result += '$';
+ }
+
+ // Go the the next $ in the replacement.
+ next = %StringIndexOf(replacement, '$', pos);
+
+ // Return if there are no more $ characters in the replacement. If we
+ // haven't reached the end, we need to append the suffix.
+ if (next < 0) {
+ if (pos < replacement.length) {
+ result += %_SubString(replacement, pos, replacement.length);
+ }
+ return result;
+ }
+
+ // Append substring between the previous and the next $ character.
+ if (next > pos) {
+ result += %_SubString(replacement, pos, next);
+ }
+ }
+ return result;
+}
+
+
+function GetUnicodeAdvancedIncrement(string, index, unicode) {
+ var increment = 1;
+ if (unicode) {
+ var first = %_StringCharCodeAt(string, index);
+ if (first >= 0xD800 && first <= 0xDBFF && string.length > index + 1) {
+ var second = %_StringCharCodeAt(string, index + 1);
+ if (second >= 0xDC00 && second <= 0xDFFF) {
+ increment = 2;
+ }
+ }
+ }
+ return increment;
+}
+
+
+// ES#sec-advancestringindex
+// AdvanceStringIndex ( S, index, unicode )
+function AdvanceStringIndex(regexp, string, unicode) {
+ var last_index = regexp.lastIndex;
+ regexp.lastIndex = last_index +
+ GetUnicodeAdvancedIncrement(string, last_index, unicode);
+}
+
+
+// ES#sec-regexp.prototype-@@replace
+// RegExp.prototype [ @@replace ] ( string, replaceValue )
+function RegExpSubclassReplace(string, replace) {
+ if (!IS_OBJECT(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ "RegExp.prototype.@@replace", this);
+ }
+ string = TO_STRING(string);
+ var length = string.length;
+ var functional_replace = IS_CALLABLE(replace);
+ if (!functional_replace) replace = TO_STRING(replace);
+ var global = this.global;
+ if (global) {
+ var unicode = this.unicode;
+ this.lastIndex = 0;
+ }
+ var results = new InternalArray();
+ var result, replacement;
+ while (true) {
+ result = RegExpSubclassExec(this, string);
+ if (IS_NULL(result)) {
+ break;
+ } else {
+ results.push(result);
+ if (!global) break;
+ var match_str = TO_STRING(result[0]);
+ if (match_str === "") AdvanceStringIndex(this, string, unicode);
+ }
+ }
+ var accumulated_result = "";
+ var next_source_position = 0;
+ for (var i = 0; i < results.length; i++) {
+ result = results[i];
+ var captures_length = MaxSimple(TO_LENGTH(result.length), 0);
+ var matched = TO_STRING(result[0]);
+ var matched_length = matched.length;
+ var position = MaxSimple(MinSimple(TO_INTEGER(result.index), length), 0);
+ var captures = new InternalArray();
+ for (var n = 0; n < captures_length; n++) {
+ var capture = result[n];
+ if (!IS_UNDEFINED(capture)) capture = TO_STRING(capture);
+ captures[n] = capture;
+ }
+ if (functional_replace) {
+ var parameters = new InternalArray(captures_length + 2);
+ for (var j = 0; j < captures_length; j++) {
+ parameters[j] = captures[j];
+ }
+ parameters[j] = position;
+ parameters[j + 1] = string;
+ replacement = %reflect_apply(replace, UNDEFINED, parameters, 0,
+ parameters.length);
+ } else {
+ replacement = GetSubstitution(matched, string, position, captures, replace);
adamk 2016/03/23 01:07:19 Nit: 80 cols.
Dan Ehrenberg 2016/03/24 00:50:54 fixed
+ }
+ if (position >= next_source_position) {
+ accumulated_result +=
+ %_SubString(string, next_source_position, position) + replacement;
+ next_source_position = position + matched_length;
+ }
+ }
+ if (next_source_position >= length) return accumulated_result;
+ return accumulated_result + %_SubString(string, next_source_position, length);
+}
+%FunctionRemovePrototype(RegExpSubclassReplace);
+
+
+// Legacy implementation of RegExp.prototype[Symbol.search] which
+// doesn't properly use the overridden exec method
function RegExpSearch(string) {
- // TODO(yangguo): allow non-regexp receivers.
if (!IS_REGEXP(this)) {
throw MakeTypeError(kIncompatibleMethodReceiver,
"RegExp.prototype.@@search", this);
@@ -580,6 +942,24 @@ function RegExpSearch(string) {
}
+// ES#sec-regexp.prototype-@@search
+// RegExp.prototype [ @@search ] ( string )
+function RegExpSubclassSearch(string) {
+ if (!IS_OBJECT(this)) {
+ throw MakeTypeError(kIncompatibleMethodReceiver,
+ "RegExp.prototype.@@search", this);
+ }
+ string = TO_STRING(string);
+ var previousLastIndex = this.lastIndex;
+ this.lastIndex = 0;
+ var result = RegExpSubclassExec(this, string);
+ this.lastIndex = previousLastIndex;
+ if (IS_NULL(result)) return -1;
+ return result.index;
+}
+%FunctionRemovePrototype(RegExpSubclassSearch);
+
+
// Getters for the static properties lastMatch, lastParen, leftContext, and
// rightContext of the RegExp constructor. The properties are computed based
// on the captures array of the last successful match and the subject string
@@ -781,6 +1161,12 @@ utils.Export(function(to) {
to.RegExpExec = DoRegExpExec;
to.RegExpExecNoTests = RegExpExecNoTests;
to.RegExpLastMatchInfo = RegExpLastMatchInfo;
+ to.RegExpSubclassExecJS = RegExpSubclassExecJS;
+ to.RegExpSubclassMatch = RegExpSubclassMatch;
+ to.RegExpSubclassReplace = RegExpSubclassReplace;
+ to.RegExpSubclassSearch = RegExpSubclassSearch;
+ to.RegExpSubclassSplit = RegExpSubclassSplit;
+ to.RegExpSubclassTest = RegExpSubclassTest;
to.RegExpTest = RegExpTest;
to.IsRegExp = IsRegExp;
});
« no previous file with comments | « src/js/prologue.js ('k') | src/messages.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698