Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(534)

Side by Side Diff: src/js/regexp.js

Issue 2398423002: [regexp] Port RegExp.prototype[@@replace] (Closed)
Patch Set: Tweaks in string code-stub-assembler methods Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 (function(global, utils) { 5 (function(global, utils) {
6 6
7 'use strict'; 7 'use strict';
8 8
9 %CheckIsBootstrapping(); 9 %CheckIsBootstrapping();
10 10
11 // ------------------------------------------------------------------- 11 // -------------------------------------------------------------------
12 // Imports 12 // Imports
13 13
14 var GlobalArray = global.Array;
15 var GlobalObject = global.Object;
16 var GlobalRegExp = global.RegExp; 14 var GlobalRegExp = global.RegExp;
17 var GlobalRegExpPrototype = GlobalRegExp.prototype; 15 var GlobalRegExpPrototype = GlobalRegExp.prototype;
18 var InternalArray = utils.InternalArray;
19 var InternalPackedArray = utils.InternalPackedArray;
20 var MaxSimple;
21 var MinSimple;
22 var RegExpExecJS = GlobalRegExp.prototype.exec; 16 var RegExpExecJS = GlobalRegExp.prototype.exec;
23 var matchSymbol = utils.ImportNow("match_symbol"); 17 var matchSymbol = utils.ImportNow("match_symbol");
24 var replaceSymbol = utils.ImportNow("replace_symbol");
25 var searchSymbol = utils.ImportNow("search_symbol");
26 var speciesSymbol = utils.ImportNow("species_symbol");
27 var splitSymbol = utils.ImportNow("split_symbol");
28 var SpeciesConstructor;
29
30 utils.Import(function(from) {
31 MaxSimple = from.MaxSimple;
32 MinSimple = from.MinSimple;
33 SpeciesConstructor = from.SpeciesConstructor;
34 });
35 18
36 // ------------------------------------------------------------------- 19 // -------------------------------------------------------------------
37 20
38 // Property of the builtins object for recording the result of the last 21 // Property of the builtins object for recording the result of the last
39 // regexp match. The property RegExpLastMatchInfo includes the matchIndices 22 // regexp match. The property RegExpLastMatchInfo includes the matchIndices
40 // array of the last successful regexp match (an array of start/end index 23 // array of the last successful regexp match (an array of start/end index
41 // pairs for the match and all the captured substrings), the invariant is 24 // pairs for the match and all the captured substrings), the invariant is
42 // that there are at least two capture indices. The array also contains 25 // that there are at least two capture indices. The array also contains
43 // the subject string for the last successful match. 26 // the subject string for the last successful match.
44 // We use a JSObject rather than a JSArray so we don't have to manually update 27 // We use a JSObject rather than a JSArray so we don't have to manually update
(...skipping 20 matching lines...) Expand all
65 // ES#sec-regexpinitialize 48 // ES#sec-regexpinitialize
66 // Runtime Semantics: RegExpInitialize ( obj, pattern, flags ) 49 // Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
67 function RegExpInitialize(object, pattern, flags) { 50 function RegExpInitialize(object, pattern, flags) {
68 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern); 51 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern);
69 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags); 52 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags);
70 %RegExpInitializeAndCompile(object, pattern, flags); 53 %RegExpInitializeAndCompile(object, pattern, flags);
71 return object; 54 return object;
72 } 55 }
73 56
74 57
75 function DoRegExpExec(regexp, string, index) {
76 return %_RegExpExec(regexp, string, index, RegExpLastMatchInfo);
77 }
78
79
80 // This is kind of performance sensitive, so we want to avoid unnecessary 58 // This is kind of performance sensitive, so we want to avoid unnecessary
81 // type checks on inputs. But we also don't want to inline it several times 59 // type checks on inputs. But we also don't want to inline it several times
82 // manually, so we use a macro :-) 60 // manually, so we use a macro :-)
83 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) 61 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING)
84 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; 62 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1;
85 var start = MATCHINFO[CAPTURE0]; 63 var start = MATCHINFO[CAPTURE0];
86 var end = MATCHINFO[CAPTURE1]; 64 var end = MATCHINFO[CAPTURE1];
87 // Calculate the substring of the first match before creating the result array 65 // Calculate the substring of the first match before creating the result array
88 // to avoid an unnecessary write barrier storing the first result. 66 // to avoid an unnecessary write barrier storing the first result.
89 var first = %_SubString(STRING, start, end); 67 var first = %_SubString(STRING, start, end);
90 var result = %_RegExpConstructResult(numResults, start, STRING); 68 var result = %_RegExpConstructResult(numResults, start, STRING);
91 result[0] = first; 69 result[0] = first;
92 if (numResults == 1) return result; 70 if (numResults == 1) return result;
93 var j = REGEXP_FIRST_CAPTURE + 2; 71 var j = REGEXP_FIRST_CAPTURE + 2;
94 for (var i = 1; i < numResults; i++) { 72 for (var i = 1; i < numResults; i++) {
95 start = MATCHINFO[j++]; 73 start = MATCHINFO[j++];
96 if (start != -1) { 74 if (start != -1) {
97 end = MATCHINFO[j]; 75 end = MATCHINFO[j];
98 result[i] = %_SubString(STRING, start, end); 76 result[i] = %_SubString(STRING, start, end);
99 } 77 }
100 j++; 78 j++;
101 } 79 }
102 return result; 80 return result;
103 endmacro 81 endmacro
104 82
105
106 // ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
107 // Also takes an optional exec method in case our caller
108 // has already fetched exec.
109 function RegExpSubclassExec(regexp, string, exec) {
110 if (IS_UNDEFINED(exec)) {
111 exec = regexp.exec;
112 }
113 if (IS_CALLABLE(exec)) {
114 var result = %_Call(exec, regexp, string);
115 if (!IS_RECEIVER(result) && !IS_NULL(result)) {
116 throw %make_type_error(kInvalidRegExpExecResult);
117 }
118 return result;
119 }
120 return %_Call(RegExpExecJS, regexp, string);
121 }
122 %SetForceInlineFlag(RegExpSubclassExec);
123
124
125 // Legacy implementation of RegExp.prototype[Symbol.replace] which
126 // doesn't properly call the underlying exec method.
127
128 // TODO(lrn): This array will survive indefinitely if replace is never
129 // called again. However, it will be empty, since the contents are cleared
130 // in the finally block.
131 var reusableReplaceArray = new InternalArray(4);
132
133 // Helper function for replacing regular expressions with the result of a
134 // function application in String.prototype.replace.
135 function StringReplaceGlobalRegExpWithFunction(subject, regexp, replace) {
136 var resultArray = reusableReplaceArray;
137 if (resultArray) {
138 reusableReplaceArray = null;
139 } else {
140 // Inside a nested replace (replace called from the replacement function
141 // of another replace) or we have failed to set the reusable array
142 // back due to an exception in a replacement function. Create a new
143 // array to use in the future, or until the original is written back.
144 resultArray = new InternalArray(16);
145 }
146 var res = %RegExpExecMultiple(regexp,
147 subject,
148 RegExpLastMatchInfo,
149 resultArray);
150 regexp.lastIndex = 0;
151 if (IS_NULL(res)) {
152 // No matches at all.
153 reusableReplaceArray = resultArray;
154 return subject;
155 }
156 var len = res.length;
157 if (NUMBER_OF_CAPTURES(RegExpLastMatchInfo) == 2) {
158 // If the number of captures is two then there are no explicit captures in
159 // the regexp, just the implicit capture that captures the whole match. In
160 // this case we can simplify quite a bit and end up with something faster.
161 // The builder will consist of some integers that indicate slices of the
162 // input string and some replacements that were returned from the replace
163 // function.
164 var match_start = 0;
165 for (var i = 0; i < len; i++) {
166 var elem = res[i];
167 if (%_IsSmi(elem)) {
168 // Integers represent slices of the original string.
169 if (elem > 0) {
170 match_start = (elem >> 11) + (elem & 0x7ff);
171 } else {
172 match_start = res[++i] - elem;
173 }
174 } else {
175 var func_result = replace(elem, match_start, subject);
176 // Overwrite the i'th element in the results with the string we got
177 // back from the callback function.
178 res[i] = TO_STRING(func_result);
179 match_start += elem.length;
180 }
181 }
182 } else {
183 for (var i = 0; i < len; i++) {
184 var elem = res[i];
185 if (!%_IsSmi(elem)) {
186 // elem must be an Array.
187 // Use the apply argument as backing for global RegExp properties.
188 var func_result = %reflect_apply(replace, UNDEFINED, elem);
189 // Overwrite the i'th element in the results with the string we got
190 // back from the callback function.
191 res[i] = TO_STRING(func_result);
192 }
193 }
194 }
195 var result = %StringBuilderConcat(res, len, subject);
196 resultArray.length = 0;
197 reusableReplaceArray = resultArray;
198 return result;
199 }
200
201
202 // Compute the string of a given regular expression capture.
203 function CaptureString(string, lastCaptureInfo, index) {
204 // Scale the index.
205 var scaled = index << 1;
206 // Compute start and end.
207 var start = lastCaptureInfo[CAPTURE(scaled)];
208 // If start isn't valid, return undefined.
209 if (start < 0) return;
210 var end = lastCaptureInfo[CAPTURE(scaled + 1)];
211 return %_SubString(string, start, end);
212 }
213
214
215 function StringReplaceNonGlobalRegExpWithFunction(subject, regexp, replace) {
216 var matchInfo = DoRegExpExec(regexp, subject, 0);
217 if (IS_NULL(matchInfo)) {
218 regexp.lastIndex = 0;
219 return subject;
220 }
221 var index = matchInfo[CAPTURE0];
222 var result = %_SubString(subject, 0, index);
223 var endOfMatch = matchInfo[CAPTURE1];
224 // Compute the parameter list consisting of the match, captures, index,
225 // and subject for the replace function invocation.
226 // The number of captures plus one for the match.
227 var m = NUMBER_OF_CAPTURES(matchInfo) >> 1;
228 var replacement;
229 if (m == 1) {
230 // No captures, only the match, which is always valid.
231 var s = %_SubString(subject, index, endOfMatch);
232 // Don't call directly to avoid exposing the built-in global object.
233 replacement = replace(s, index, subject);
234 } else {
235 var parameters = new InternalArray(m + 2);
236 for (var j = 0; j < m; j++) {
237 parameters[j] = CaptureString(subject, matchInfo, j);
238 }
239 parameters[j] = index;
240 parameters[j + 1] = subject;
241
242 replacement = %reflect_apply(replace, UNDEFINED, parameters);
243 }
244
245 result += replacement; // The add method converts to string if necessary.
246 // Can't use matchInfo any more from here, since the function could
247 // overwrite it.
248 return result + %_SubString(subject, endOfMatch, subject.length);
249 }
250
251 // Wraps access to matchInfo's captures into a format understood by
252 // GetSubstitution.
253 function MatchInfoCaptureWrapper(matches, subject) {
254 this.length = NUMBER_OF_CAPTURES(matches) >> 1;
255 this.match = matches;
256 this.subject = subject;
257 }
258
259 MatchInfoCaptureWrapper.prototype.at = function(ix) {
260 const match = this.match;
261 const start = match[CAPTURE(ix << 1)];
262 if (start < 0) return UNDEFINED;
263 return %_SubString(this.subject, start, match[CAPTURE((ix << 1) + 1)]);
264 };
265 %SetForceInlineFlag(MatchInfoCaptureWrapper.prototype.at);
266
267 function ArrayCaptureWrapper(array) {
268 this.length = array.length;
269 this.array = array;
270 }
271
272 ArrayCaptureWrapper.prototype.at = function(ix) {
273 return this.array[ix];
274 };
275 %SetForceInlineFlag(ArrayCaptureWrapper.prototype.at);
276
277 function RegExpReplace(string, replace) {
278 if (!IS_REGEXP(this)) {
279 throw %make_type_error(kIncompatibleMethodReceiver,
280 "RegExp.prototype.@@replace", this);
281 }
282 var subject = TO_STRING(string);
283 var search = this;
284
285 if (!IS_CALLABLE(replace)) {
286 replace = TO_STRING(replace);
287
288 if (!REGEXP_GLOBAL(search)) {
289 // Non-global regexp search, string replace.
290 var match = DoRegExpExec(search, subject, 0);
291 if (match == null) {
292 search.lastIndex = 0
293 return subject;
294 }
295 if (replace.length == 0) {
296 return %_SubString(subject, 0, match[CAPTURE0]) +
297 %_SubString(subject, match[CAPTURE1], subject.length)
298 }
299 const captures = new MatchInfoCaptureWrapper(match, subject);
300 const start = match[CAPTURE0];
301 const end = match[CAPTURE1];
302
303 const prefix = %_SubString(subject, 0, start);
304 const matched = %_SubString(subject, start, end);
305 const suffix = %_SubString(subject, end, subject.length);
306
307 return prefix +
308 GetSubstitution(matched, subject, start, captures, replace) +
309 suffix;
310 }
311
312 // Global regexp search, string replace.
313 search.lastIndex = 0;
314 return %StringReplaceGlobalRegExpWithString(
315 subject, search, replace, RegExpLastMatchInfo);
316 }
317
318 if (REGEXP_GLOBAL(search)) {
319 // Global regexp search, function replace.
320 return StringReplaceGlobalRegExpWithFunction(subject, search, replace);
321 }
322 // Non-global regexp search, function replace.
323 return StringReplaceNonGlobalRegExpWithFunction(subject, search, replace);
324 }
325
326
327 // ES#sec-getsubstitution 83 // ES#sec-getsubstitution
328 // GetSubstitution(matched, str, position, captures, replacement) 84 // GetSubstitution(matched, str, position, captures, replacement)
329 // Expand the $-expressions in the string and return a new string with 85 // Expand the $-expressions in the string and return a new string with
330 // the result. 86 // the result.
331 function GetSubstitution(matched, string, position, captures, replacement) { 87 function GetSubstitution(matched, string, position, captures, replacement) {
332 var matchLength = matched.length; 88 var matchLength = matched.length;
333 var stringLength = string.length; 89 var stringLength = string.length;
334 var capturesLength = captures.length; 90 var capturesLength = captures.length;
335 var tailPos = position + matchLength; 91 var tailPos = position + matchLength;
336 var result = ""; 92 var result = "";
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
402 } 158 }
403 159
404 // Append substring between the previous and the next $ character. 160 // Append substring between the previous and the next $ character.
405 if (next > pos) { 161 if (next > pos) {
406 result += %_SubString(replacement, pos, next); 162 result += %_SubString(replacement, pos, next);
407 } 163 }
408 } 164 }
409 return result; 165 return result;
410 } 166 }
411 167
412
413 // ES#sec-advancestringindex
414 // AdvanceStringIndex ( S, index, unicode )
415 function AdvanceStringIndex(string, index, unicode) {
416 var increment = 1;
417 if (unicode) {
418 var first = %_StringCharCodeAt(string, index);
419 if (first >= 0xD800 && first <= 0xDBFF && string.length > index + 1) {
420 var second = %_StringCharCodeAt(string, index + 1);
421 if (second >= 0xDC00 && second <= 0xDFFF) {
422 increment = 2;
423 }
424 }
425 }
426 return increment;
427 }
428
429
430 function SetAdvancedStringIndex(regexp, string, unicode) {
431 var lastIndex = regexp.lastIndex;
432 regexp.lastIndex = lastIndex +
433 AdvanceStringIndex(string, lastIndex, unicode);
434 }
435
436
437 // ES#sec-regexp.prototype-@@replace
438 // RegExp.prototype [ @@replace ] ( string, replaceValue )
439 function RegExpSubclassReplace(string, replace) {
440 if (!IS_RECEIVER(this)) {
441 throw %make_type_error(kIncompatibleMethodReceiver,
442 "RegExp.prototype.@@replace", this);
443 }
444 string = TO_STRING(string);
445 var length = string.length;
446 var functionalReplace = IS_CALLABLE(replace);
447 if (!functionalReplace) replace = TO_STRING(replace);
448 var global = TO_BOOLEAN(this.global);
449 if (global) {
450 var unicode = TO_BOOLEAN(this.unicode);
451 this.lastIndex = 0;
452 }
453
454 // TODO(adamk): this fast path is wrong as we doesn't ensure that 'exec'
455 // is actually a data property on RegExp.prototype.
456 var exec;
457 if (IS_REGEXP(this)) {
458 exec = this.exec;
459 if (exec === RegExpExecJS) {
460 return %_Call(RegExpReplace, this, string, replace);
461 }
462 }
463
464 var results = new InternalArray();
465 var result, replacement;
466 while (true) {
467 result = RegExpSubclassExec(this, string, exec);
468 // Ensure exec will be read again on the next loop through.
469 exec = UNDEFINED;
470 if (IS_NULL(result)) {
471 break;
472 } else {
473 results.push(result);
474 if (!global) break;
475 var matchStr = TO_STRING(result[0]);
476 if (matchStr === "") SetAdvancedStringIndex(this, string, unicode);
477 }
478 }
479 var accumulatedResult = "";
480 var nextSourcePosition = 0;
481 for (var i = 0; i < results.length; i++) {
482 result = results[i];
483 var capturesLength = MaxSimple(TO_LENGTH(result.length), 0);
484 var matched = TO_STRING(result[0]);
485 var matchedLength = matched.length;
486 var position = MaxSimple(MinSimple(TO_INTEGER(result.index), length), 0);
487 var captures = new InternalArray();
488 for (var n = 0; n < capturesLength; n++) {
489 var capture = result[n];
490 if (!IS_UNDEFINED(capture)) capture = TO_STRING(capture);
491 captures[n] = capture;
492 }
493 if (functionalReplace) {
494 var parameters = new InternalArray(capturesLength + 2);
495 for (var j = 0; j < capturesLength; j++) {
496 parameters[j] = captures[j];
497 }
498 parameters[j] = position;
499 parameters[j + 1] = string;
500 replacement = %reflect_apply(replace, UNDEFINED, parameters, 0,
501 parameters.length);
502 } else {
503 const capturesWrapper = new ArrayCaptureWrapper(captures);
504 replacement = GetSubstitution(matched, string, position, capturesWrapper,
505 replace);
506 }
507 if (position >= nextSourcePosition) {
508 accumulatedResult +=
509 %_SubString(string, nextSourcePosition, position) + replacement;
510 nextSourcePosition = position + matchedLength;
511 }
512 }
513 if (nextSourcePosition >= length) return accumulatedResult;
514 return accumulatedResult + %_SubString(string, nextSourcePosition, length);
515 }
516 %FunctionRemovePrototype(RegExpSubclassReplace);
517
518
519
520 // ------------------------------------------------------------------- 168 // -------------------------------------------------------------------
521 169
522 utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [
523 replaceSymbol, RegExpSubclassReplace,
524 ]);
525
526 %InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]); 170 %InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]);
527 171
528 // ------------------------------------------------------------------- 172 // -------------------------------------------------------------------
529 // Internal 173 // Internal
530 174
531 var InternalRegExpMatchInfo = { 175 var InternalRegExpMatchInfo = {
532 REGEXP_NUMBER_OF_CAPTURES: 2, 176 REGEXP_NUMBER_OF_CAPTURES: 2,
533 REGEXP_LAST_SUBJECT: "", 177 REGEXP_LAST_SUBJECT: "",
534 REGEXP_LAST_INPUT: UNDEFINED, 178 REGEXP_LAST_INPUT: UNDEFINED,
535 CAPTURE0: 0, 179 CAPTURE0: 0,
(...skipping 14 matching lines...) Expand all
550 } 194 }
551 195
552 // ------------------------------------------------------------------- 196 // -------------------------------------------------------------------
553 // Exports 197 // Exports
554 198
555 utils.Export(function(to) { 199 utils.Export(function(to) {
556 to.GetSubstitution = GetSubstitution; 200 to.GetSubstitution = GetSubstitution;
557 to.InternalRegExpMatch = InternalRegExpMatch; 201 to.InternalRegExpMatch = InternalRegExpMatch;
558 to.InternalRegExpReplace = InternalRegExpReplace; 202 to.InternalRegExpReplace = InternalRegExpReplace;
559 to.IsRegExp = IsRegExp; 203 to.IsRegExp = IsRegExp;
560 to.RegExpExec = DoRegExpExec;
561 to.RegExpInitialize = RegExpInitialize; 204 to.RegExpInitialize = RegExpInitialize;
562 to.RegExpLastMatchInfo = RegExpLastMatchInfo; 205 to.RegExpLastMatchInfo = RegExpLastMatchInfo;
563 }); 206 });
564 207
565 }) 208 })
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698