Chromium Code Reviews| Index: src/js/regexp.js |
| diff --git a/src/js/regexp.js b/src/js/regexp.js |
| index 830fc75b30f7f3d999e99b7971cf08d3a41243f8..d9f99d81f4811d1ef8caff4957887a97604bc3d0 100644 |
| --- a/src/js/regexp.js |
| +++ b/src/js/regexp.js |
| @@ -11,21 +11,30 @@ |
| // ------------------------------------------------------------------- |
| // Imports |
| +var AddIndexedProperty; |
| var ExpandReplacement; |
| +var GlobalArray = global.Array; |
| var GlobalObject = global.Object; |
| var GlobalRegExp = global.RegExp; |
| var GlobalRegExpPrototype; |
| var InternalArray = utils.InternalArray; |
| var InternalPackedArray = utils.InternalPackedArray; |
| var MakeTypeError; |
| +var MaxSimple; |
| +var MinSimple; |
| var matchSymbol = utils.ImportNow("match_symbol"); |
| var replaceSymbol = utils.ImportNow("replace_symbol"); |
| var searchSymbol = utils.ImportNow("search_symbol"); |
| var splitSymbol = utils.ImportNow("split_symbol"); |
| +var SpeciesConstructor; |
| utils.Import(function(from) { |
| + AddIndexedProperty = from.AddIndexedProperty; |
| ExpandReplacement = from.ExpandReplacement; |
| MakeTypeError = from.MakeTypeError; |
| + MaxSimple = from.MaxSimple; |
| + MinSimple = from.MinSimple; |
| + SpeciesConstructor = from.SpeciesConstructor; |
| }); |
| // ------------------------------------------------------------------- |
| @@ -46,6 +55,7 @@ var RegExpLastMatchInfo = new InternalPackedArray( |
| // ------------------------------------------------------------------- |
| +// ES#sec-isregexp IsRegExp ( argument ) |
| function IsRegExp(o) { |
| if (!IS_RECEIVER(o)) return false; |
| var is_regexp = o[matchSymbol]; |
| @@ -54,7 +64,8 @@ function IsRegExp(o) { |
| } |
| -// ES6 section 21.2.3.2.2 |
| +// ES#sec-regexpinitialize |
| +// Runtime Semantics: RegExpInitialize ( obj, pattern, flags ) |
| function RegExpInitialize(object, pattern, flags) { |
| pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern); |
| flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags); |
| @@ -72,6 +83,8 @@ function PatternFlags(pattern) { |
| } |
| +// ES#sec-regexp-pattern-flags |
| +// RegExp ( pattern, flags ) |
| function RegExpConstructor(pattern, flags) { |
| var newtarget = new.target; |
| var pattern_is_regexp = IsRegExp(pattern); |
| @@ -101,6 +114,7 @@ function RegExpConstructor(pattern, flags) { |
| } |
| +// ES#sec-regexp.prototype.compile RegExp.prototype.compile (pattern, flags) |
| function RegExpCompileJS(pattern, flags) { |
| if (!IS_REGEXP(this)) { |
| throw MakeTypeError(kIncompatibleMethodReceiver, |
| @@ -165,6 +179,54 @@ function RegExpExecNoTests(regexp, string, start) { |
| } |
| +// ES#sec-regexp.prototype.exec |
| +// RegExp.prototype.exec ( string ) |
| +function RegExpSubclassExecJS(string) { |
| + if (!IS_REGEXP(this)) { |
| + throw MakeTypeError(kIncompatibleMethodReceiver, |
| + 'RegExp.prototype.exec', this); |
| + } |
| + |
| + string = TO_STRING(string); |
| + var lastIndex = this.lastIndex; |
| + |
| + // Conversion is required by the ES2015 specification (RegExpBuiltinExec |
| + // algorithm, step 4) even if the value is discarded for non-global RegExps. |
| + var i = TO_LENGTH(lastIndex); |
| + |
| + var global = TO_BOOLEAN(this.global); |
|
adamk
2016/03/22 22:23:42
Are these the only calls that differ from the exis
Dan Ehrenberg
2016/03/22 23:09:31
Yes. They showed up as relatively big items in the
|
| + var sticky = TO_BOOLEAN(this.sticky); |
| + var updateLastIndex = global || sticky; |
| + if (updateLastIndex) { |
| + if (i < 0 || i > string.length) { |
|
adamk
2016/03/22 22:23:42
I see where the > length is checked in the spec, b
Dan Ehrenberg
2016/03/22 23:09:32
My mistake, the < 0 path should be unreachable now
|
| + this.lastIndex = 0; |
| + return null; |
| + } |
| + } else { |
| + i = 0; |
| + } |
| + |
| + // matchIndices is either null or the RegExpLastMatchInfo array. |
| + // TODO(littledan): Whether a RegExp is sticky is compiled into the RegExp |
| + // itself, but ES2015 allows monkey-patching this property to differ from |
| + // the internal flags. If it differs, recompile a different RegExp? |
| + var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo); |
| + |
| + if (IS_NULL(matchIndices)) { |
| + this.lastIndex = 0; |
| + return null; |
| + } |
| + |
| + // Successful match. |
| + if (updateLastIndex) { |
| + this.lastIndex = RegExpLastMatchInfo[CAPTURE1]; |
| + } |
| + RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string); |
| +} |
| +%FunctionRemovePrototype(RegExpSubclassExecJS); |
| + |
| + |
| +// Legacy implementation of RegExp.prototype.exec |
| function RegExpExecJS(string) { |
| if (!IS_REGEXP(this)) { |
| throw MakeTypeError(kIncompatibleMethodReceiver, |
| @@ -204,10 +266,25 @@ function RegExpExecJS(string) { |
| } |
| +// ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S ) |
| +function RegExpSubclassExec(regexp, string) { |
| + var exec = regexp.exec; |
| + if (IS_CALLABLE(exec)) { |
| + var result = %_Call(exec, regexp, string); |
| + if (!IS_OBJECT(result) && !IS_NULL(result)) { |
| + throw MakeTypeError(kInvalidRegExpExecResult); |
| + } |
| + return result; |
| + } |
| + return %_Call(RegExpExecJS, regexp, string); |
| +} |
| + |
| + |
| // One-element cache for the simplified test regexp. |
| var regexp_key; |
| var regexp_val; |
| +// Legacy implementation of RegExp.prototype.test |
| // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be |
| // that test is defined in terms of String.prototype.exec. However, it probably |
| // means the original value of String.prototype.exec, which is what everybody |
| @@ -261,6 +338,19 @@ function RegExpTest(string) { |
| } |
| } |
| + |
| +// ES#sec-regexp.prototype.test RegExp.prototype.test ( S ) |
| +function RegExpSubclassTest(string) { |
| + if (!IS_OBJECT(this)) { |
| + throw MakeTypeError(kIncompatibleMethodReceiver, |
| + 'RegExp.prototype.test', this); |
| + } |
| + string = TO_STRING(string); |
| + var match = RegExpSubclassExec(this, string); |
| + return !IS_NULL(match); |
| +} |
| +%FunctionRemovePrototype(RegExpSubclassTest); |
| + |
| function TrimRegExp(regexp) { |
| if (regexp_key !== regexp) { |
| regexp_key = regexp; |
| @@ -308,7 +398,8 @@ function AtSurrogatePair(subject, index) { |
| } |
| -// ES6 21.2.5.11. |
| +// Legacy implementation of RegExp.prototype[Symbol.split] which |
| +// doesn't properly call the underlying exec, @@species methods |
| function RegExpSplit(string, limit) { |
| // TODO(yangguo): allow non-regexp receivers. |
| if (!IS_REGEXP(this)) { |
| @@ -382,9 +473,71 @@ function RegExpSplit(string, limit) { |
| } |
| -// ES6 21.2.5.6. |
| +// ES#sec-regexp.prototype-@@split |
| +// RegExp.prototype [ @@split ] ( string, limit ) |
| +function RegExpSubclassSplit(string, limit) { |
| + if (!IS_RECEIVER(this)) { |
| + throw MakeTypeError(kIncompatibleMethodReceiver, |
| + "RegExp.prototype.@@split", this); |
| + } |
| + string = TO_STRING(string); |
| + var constructor = SpeciesConstructor(this, GlobalRegExp); |
| + var flags = TO_STRING(this.flags); |
| + var unicode = %StringIndexOf(flags, 'u', 0) >= 0; |
| + var sticky = %StringIndexOf(flags, 'y', 0) >= 0; |
| + var new_flags = sticky ? flags : flags + "y"; |
|
adamk
2016/03/23 01:07:19
I'd prefer if new code added in this patch used ca
Dan Ehrenberg
2016/03/24 00:50:54
fixed
|
| + var splitter = new constructor(this, new_flags); |
| + var array = new GlobalArray(); |
|
adamk
2016/03/22 22:23:42
It seems like the main reason to use an Array here
Dan Ehrenberg
2016/03/22 23:09:31
Good idea for optimization; I was just blindly fol
adamk
2016/03/23 01:07:19
Given that we already know there's lots of optimiz
|
| + var array_index = 0; |
| + var lim = (IS_UNDEFINED(limit)) ? kMaxUint32 : TO_UINT32(limit); |
| + var size = string.length; |
| + var prev_string_index = 0; |
| + if (lim === 0) return array; |
| + var result; |
| + if (size === 0) { |
| + result = RegExpSubclassExec(splitter, string); |
| + if (IS_NULL(result)) AddIndexedProperty(array, 0, string); |
| + return array; |
| + } |
| + var string_index = prev_string_index; |
| + while (string_index < size) { |
| + splitter.lastIndex = string_index; |
| + result = RegExpSubclassExec(splitter, string); |
| + if (IS_NULL(result)) { |
| + string_index += GetUnicodeAdvancedIncrement(string, string_index, |
| + unicode); |
| + } else { |
| + var end = MinSimple(splitter.lastIndex, size); |
|
adamk
2016/03/22 22:23:42
Missing TO_LENGTH? Hard to tell it's required, but
Dan Ehrenberg
2016/03/22 23:09:31
Oops, definitely needed around splitter.lastIndex.
|
| + if (end === prev_string_index) { |
| + string_index += GetUnicodeAdvancedIncrement(string, string_index, |
| + unicode); |
| + } else { |
| + AddIndexedProperty( |
| + array, array_index, |
| + %_SubString(string, prev_string_index, string_index)); |
| + array_index++; |
| + if (array_index === lim) return array; |
| + prev_string_index = end; |
| + var number_of_captures = MaxSimple(TO_LENGTH(result.length), 0); |
| + for (var i = 1; i < number_of_captures; i++) { |
| + AddIndexedProperty(array, array_index, result[i]); |
| + array_index++; |
| + if (array_index === lim) return array; |
| + } |
| + string_index = prev_string_index; |
| + } |
| + } |
| + } |
| + AddIndexedProperty(array, array_index, |
| + %_SubString(string, prev_string_index, size)); |
| + return array; |
| +} |
| +%FunctionRemovePrototype(RegExpSubclassSplit); |
| + |
| + |
| +// Legacy implementation of RegExp.prototype[Symbol.match] which |
| +// doesn't properly call the underlying exec method |
| function RegExpMatch(string) { |
| - // TODO(yangguo): allow non-regexp receivers. |
| if (!IS_REGEXP(this)) { |
| throw MakeTypeError(kIncompatibleMethodReceiver, |
| "RegExp.prototype.@@match", this); |
| @@ -398,7 +551,38 @@ function RegExpMatch(string) { |
| } |
| -// ES6 21.2.5.8. |
| +// ES#sec-regexp.prototype-@@match |
| +// RegExp.prototype [ @@match ] ( string ) |
| +function RegExpSubclassMatch(string) { |
| + if (!IS_OBJECT(this)) { |
| + throw MakeTypeError(kIncompatibleMethodReceiver, |
| + "RegExp.prototype.@@match", this); |
| + } |
| + string = TO_STRING(string); |
| + var global = this.global; |
| + if (!global) return RegExpSubclassExec(this, string); |
| + var unicode = this.unicode; |
| + this.lastIndex = 0; |
| + var array = []; |
| + var n = 0; |
| + var result; |
| + while (true) { |
| + result = RegExpSubclassExec(this, string); |
| + if (IS_NULL(result)) { |
| + if (n === 0) return null; |
| + return array; |
| + } |
| + var matchStr = TO_STRING(result[0]); |
| + %AddElement(array, n, matchStr); |
| + if (matchStr === "") AdvanceStringIndex(this, string, unicode); |
|
adamk
2016/03/22 22:23:42
I found it quite confusing that what the spec call
Dan Ehrenberg
2016/03/22 23:09:32
Fixed it to have more meaningful names.
|
| + n++; |
| + } |
| +} |
| +%FunctionRemovePrototype(RegExpSubclassMatch); |
| + |
| + |
| +// Legacy implementation of RegExp.prototype[Symbol.replace] which |
| +// doesn't properly call the underlying exec method. |
| // TODO(lrn): This array will survive indefinitely if replace is never |
| // called again. However, it will be empty, since the contents are cleared |
| @@ -525,7 +709,6 @@ function StringReplaceNonGlobalRegExpWithFunction(subject, regexp, replace) { |
| function RegExpReplace(string, replace) { |
| - // TODO(littledan): allow non-regexp receivers. |
| if (!IS_REGEXP(this)) { |
| throw MakeTypeError(kIncompatibleMethodReceiver, |
| "RegExp.prototype.@@replace", this); |
| @@ -567,9 +750,188 @@ function RegExpReplace(string, replace) { |
| } |
| -// ES6 21.2.5.9. |
| +// ES#sec-getsubstitution |
| +// GetSubstitution(matched, str, position, captures, replacement) |
| +// Expand the $-expressions in the string and return a new string with |
| +// the result. |
| +// TODO(littledan): Call this function from String.prototype.replace instead |
|
adamk
2016/03/22 22:23:42
I agree :)
This one's really big; what's the diff
Dan Ehrenberg
2016/03/22 23:09:32
They take the captures/replacement in different fo
|
| +// of the very similar ExpandReplacement in src/js/string.js |
| +function GetSubstitution(matched, string, position, captures, replacement) { |
| + var match_length = matched.length; |
| + var string_length = string.length; |
| + var captures_length = captures.length; |
| + var tail_pos = position + match_length; |
| + var result = ""; |
| + var pos, expansion, peek, next, scaled_index, advance, new_scaled_index; |
| + |
| + var next = %StringIndexOf(replacement, '$', 0); |
| + if (next < 0) { |
| + result += replacement; |
| + return result; |
| + } |
| + |
| + if (next > 0) result += %_SubString(replacement, 0, next); |
| + |
| + while (true) { |
| + expansion = '$'; |
| + pos = next + 1; |
| + if (pos < replacement.length) { |
| + peek = %_StringCharCodeAt(replacement, pos); |
| + if (peek == 36) { // $$ |
| + ++pos; |
| + result += '$'; |
| + } else if (peek == 38) { // $& - match |
| + ++pos; |
| + result += matched; |
| + } else if (peek == 96) { // $` - prefix |
| + ++pos; |
| + result += %_SubString(string, 0, position); |
| + } else if (peek == 39) { // $' - suffix |
| + ++pos; |
| + result += %_SubString(string, tail_pos, string_length); |
| + } else if (peek >= 48 && peek <= 57) { |
| + // Valid indices are $1 .. $9, $01 .. $09 and $10 .. $99 |
| + scaled_index = (peek - 48); |
| + advance = 1; |
| + if (pos + 1 < replacement.length) { |
| + next = %_StringCharCodeAt(replacement, pos + 1); |
| + if (next >= 48 && next <= 57) { |
| + new_scaled_index = scaled_index * 10 + ((next - 48)); |
| + if (new_scaled_index < captures_length) { |
| + scaled_index = new_scaled_index; |
| + advance = 2; |
| + } |
| + } |
| + } |
| + if (scaled_index != 0 && scaled_index < captures_length) { |
| + var capture = captures[scaled_index]; |
| + if (!IS_UNDEFINED(capture)) result += capture; |
| + pos += advance; |
| + } else { |
| + result += '$'; |
| + } |
| + } else { |
| + result += '$'; |
| + } |
| + } else { |
| + result += '$'; |
| + } |
| + |
| + // Go the the next $ in the replacement. |
| + next = %StringIndexOf(replacement, '$', pos); |
| + |
| + // Return if there are no more $ characters in the replacement. If we |
| + // haven't reached the end, we need to append the suffix. |
| + if (next < 0) { |
| + if (pos < replacement.length) { |
| + result += %_SubString(replacement, pos, replacement.length); |
| + } |
| + return result; |
| + } |
| + |
| + // Append substring between the previous and the next $ character. |
| + if (next > pos) { |
| + result += %_SubString(replacement, pos, next); |
| + } |
| + } |
| + return result; |
| +} |
| + |
| + |
| +function GetUnicodeAdvancedIncrement(string, index, unicode) { |
| + var increment = 1; |
| + if (unicode) { |
| + var first = %_StringCharCodeAt(string, index); |
| + if (first >= 0xD800 && first <= 0xDBFF && string.length > index + 1) { |
| + var second = %_StringCharCodeAt(string, index + 1); |
| + if (second >= 0xDC00 && second <= 0xDFFF) { |
| + increment = 2; |
| + } |
| + } |
| + } |
| + return increment; |
| +} |
| + |
| + |
| +// ES#sec-advancestringindex |
| +// AdvanceStringIndex ( S, index, unicode ) |
| +function AdvanceStringIndex(regexp, string, unicode) { |
| + var last_index = regexp.lastIndex; |
| + regexp.lastIndex = last_index + |
| + GetUnicodeAdvancedIncrement(string, last_index, unicode); |
| +} |
| + |
| + |
| +// ES#sec-regexp.prototype-@@replace |
| +// RegExp.prototype [ @@replace ] ( string, replaceValue ) |
| +function RegExpSubclassReplace(string, replace) { |
| + if (!IS_OBJECT(this)) { |
| + throw MakeTypeError(kIncompatibleMethodReceiver, |
| + "RegExp.prototype.@@replace", this); |
| + } |
| + string = TO_STRING(string); |
| + var length = string.length; |
| + var functional_replace = IS_CALLABLE(replace); |
| + if (!functional_replace) replace = TO_STRING(replace); |
| + var global = this.global; |
| + if (global) { |
| + var unicode = this.unicode; |
| + this.lastIndex = 0; |
| + } |
| + var results = new InternalArray(); |
| + var result, replacement; |
| + while (true) { |
| + result = RegExpSubclassExec(this, string); |
| + if (IS_NULL(result)) { |
| + break; |
| + } else { |
| + results.push(result); |
| + if (!global) break; |
| + var match_str = TO_STRING(result[0]); |
| + if (match_str === "") AdvanceStringIndex(this, string, unicode); |
| + } |
| + } |
| + var accumulated_result = ""; |
| + var next_source_position = 0; |
| + for (var i = 0; i < results.length; i++) { |
| + result = results[i]; |
| + var captures_length = MaxSimple(TO_LENGTH(result.length), 0); |
| + var matched = TO_STRING(result[0]); |
| + var matched_length = matched.length; |
| + var position = MaxSimple(MinSimple(TO_INTEGER(result.index), length), 0); |
| + var captures = new InternalArray(); |
| + for (var n = 0; n < captures_length; n++) { |
| + var capture = result[n]; |
| + if (!IS_UNDEFINED(capture)) capture = TO_STRING(capture); |
| + captures[n] = capture; |
| + } |
| + if (functional_replace) { |
| + var parameters = new InternalArray(captures_length + 2); |
| + for (var j = 0; j < captures_length; j++) { |
| + parameters[j] = captures[j]; |
| + } |
| + parameters[j] = position; |
| + parameters[j + 1] = string; |
| + replacement = %reflect_apply(replace, UNDEFINED, parameters, 0, |
| + parameters.length); |
| + } else { |
| + replacement = GetSubstitution(matched, string, position, captures, replace); |
|
adamk
2016/03/23 01:07:19
Nit: 80 cols.
Dan Ehrenberg
2016/03/24 00:50:54
fixed
|
| + } |
| + if (position >= next_source_position) { |
| + accumulated_result += |
| + %_SubString(string, next_source_position, position) + replacement; |
| + next_source_position = position + matched_length; |
| + } |
| + } |
| + if (next_source_position >= length) return accumulated_result; |
| + return accumulated_result + %_SubString(string, next_source_position, length); |
| +} |
| +%FunctionRemovePrototype(RegExpSubclassReplace); |
| + |
| + |
| +// Legacy implementation of RegExp.prototype[Symbol.search] which |
| +// doesn't properly use the overridden exec method |
| function RegExpSearch(string) { |
| - // TODO(yangguo): allow non-regexp receivers. |
| if (!IS_REGEXP(this)) { |
| throw MakeTypeError(kIncompatibleMethodReceiver, |
| "RegExp.prototype.@@search", this); |
| @@ -580,6 +942,24 @@ function RegExpSearch(string) { |
| } |
| +// ES#sec-regexp.prototype-@@search |
| +// RegExp.prototype [ @@search ] ( string ) |
| +function RegExpSubclassSearch(string) { |
| + if (!IS_OBJECT(this)) { |
| + throw MakeTypeError(kIncompatibleMethodReceiver, |
| + "RegExp.prototype.@@search", this); |
| + } |
| + string = TO_STRING(string); |
| + var previousLastIndex = this.lastIndex; |
| + this.lastIndex = 0; |
| + var result = RegExpSubclassExec(this, string); |
| + this.lastIndex = previousLastIndex; |
| + if (IS_NULL(result)) return -1; |
| + return result.index; |
| +} |
| +%FunctionRemovePrototype(RegExpSubclassSearch); |
| + |
| + |
| // Getters for the static properties lastMatch, lastParen, leftContext, and |
| // rightContext of the RegExp constructor. The properties are computed based |
| // on the captures array of the last successful match and the subject string |
| @@ -781,6 +1161,12 @@ utils.Export(function(to) { |
| to.RegExpExec = DoRegExpExec; |
| to.RegExpExecNoTests = RegExpExecNoTests; |
| to.RegExpLastMatchInfo = RegExpLastMatchInfo; |
| + to.RegExpSubclassExecJS = RegExpSubclassExecJS; |
| + to.RegExpSubclassMatch = RegExpSubclassMatch; |
| + to.RegExpSubclassReplace = RegExpSubclassReplace; |
| + to.RegExpSubclassSearch = RegExpSubclassSearch; |
| + to.RegExpSubclassSplit = RegExpSubclassSplit; |
| + to.RegExpSubclassTest = RegExpSubclassTest; |
| to.RegExpTest = RegExpTest; |
| to.IsRegExp = IsRegExp; |
| }); |