Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(236)

Side by Side Diff: src/js/regexp.js

Issue 2398423002: [regexp] Port RegExp.prototype[@@replace] (Closed)
Patch Set: Smi::kZero Created 4 years, 2 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « src/js/prologue.js ('k') | src/objects.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 // Copyright 2012 the V8 project authors. All rights reserved. 1 // Copyright 2012 the V8 project authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be 2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file. 3 // found in the LICENSE file.
4 4
5 (function(global, utils) { 5 (function(global, utils) {
6 6
7 'use strict'; 7 'use strict';
8 8
9 %CheckIsBootstrapping(); 9 %CheckIsBootstrapping();
10 10
11 // ------------------------------------------------------------------- 11 // -------------------------------------------------------------------
12 // Imports 12 // Imports
13 13
14 var GlobalArray = global.Array;
15 var GlobalObject = global.Object;
16 var GlobalRegExp = global.RegExp; 14 var GlobalRegExp = global.RegExp;
17 var GlobalRegExpPrototype = GlobalRegExp.prototype; 15 var GlobalRegExpPrototype = GlobalRegExp.prototype;
18 var InternalArray = utils.InternalArray;
19 var MaxSimple;
20 var MinSimple;
21 var RegExpExecJS = GlobalRegExp.prototype.exec; 16 var RegExpExecJS = GlobalRegExp.prototype.exec;
22 var matchSymbol = utils.ImportNow("match_symbol"); 17 var matchSymbol = utils.ImportNow("match_symbol");
23 var replaceSymbol = utils.ImportNow("replace_symbol");
24 var searchSymbol = utils.ImportNow("search_symbol");
25 var speciesSymbol = utils.ImportNow("species_symbol");
26 var splitSymbol = utils.ImportNow("split_symbol");
27 var SpeciesConstructor;
28
29 utils.Import(function(from) {
30 MaxSimple = from.MaxSimple;
31 MinSimple = from.MinSimple;
32 SpeciesConstructor = from.SpeciesConstructor;
33 });
34 18
35 // ------------------------------------------------------------------- 19 // -------------------------------------------------------------------
36 20
37 // Property of the builtins object for recording the result of the last 21 // Property of the builtins object for recording the result of the last
38 // regexp match. The property RegExpLastMatchInfo includes the matchIndices 22 // regexp match. The property RegExpLastMatchInfo includes the matchIndices
39 // array of the last successful regexp match (an array of start/end index 23 // array of the last successful regexp match (an array of start/end index
40 // pairs for the match and all the captured substrings), the invariant is 24 // pairs for the match and all the captured substrings), the invariant is
41 // that there are at least two capture indices. The array also contains 25 // that there are at least two capture indices. The array also contains
42 // the subject string for the last successful match. 26 // the subject string for the last successful match.
43 // We use a JSObject rather than a JSArray so we don't have to manually update 27 // We use a JSObject rather than a JSArray so we don't have to manually update
(...skipping 20 matching lines...) Expand all
64 // ES#sec-regexpinitialize 48 // ES#sec-regexpinitialize
65 // Runtime Semantics: RegExpInitialize ( obj, pattern, flags ) 49 // Runtime Semantics: RegExpInitialize ( obj, pattern, flags )
66 function RegExpInitialize(object, pattern, flags) { 50 function RegExpInitialize(object, pattern, flags) {
67 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern); 51 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern);
68 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags); 52 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags);
69 %RegExpInitializeAndCompile(object, pattern, flags); 53 %RegExpInitializeAndCompile(object, pattern, flags);
70 return object; 54 return object;
71 } 55 }
72 56
73 57
74 function DoRegExpExec(regexp, string, index) {
75 return %_RegExpExec(regexp, string, index, RegExpLastMatchInfo);
76 }
77
78
79 // This is kind of performance sensitive, so we want to avoid unnecessary 58 // This is kind of performance sensitive, so we want to avoid unnecessary
80 // type checks on inputs. But we also don't want to inline it several times 59 // type checks on inputs. But we also don't want to inline it several times
81 // manually, so we use a macro :-) 60 // manually, so we use a macro :-)
82 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) 61 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING)
83 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; 62 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1;
84 var start = MATCHINFO[CAPTURE0]; 63 var start = MATCHINFO[CAPTURE0];
85 var end = MATCHINFO[CAPTURE1]; 64 var end = MATCHINFO[CAPTURE1];
86 // Calculate the substring of the first match before creating the result array 65 // Calculate the substring of the first match before creating the result array
87 // to avoid an unnecessary write barrier storing the first result. 66 // to avoid an unnecessary write barrier storing the first result.
88 var first = %_SubString(STRING, start, end); 67 var first = %_SubString(STRING, start, end);
89 var result = %_RegExpConstructResult(numResults, start, STRING); 68 var result = %_RegExpConstructResult(numResults, start, STRING);
90 result[0] = first; 69 result[0] = first;
91 if (numResults == 1) return result; 70 if (numResults == 1) return result;
92 var j = REGEXP_FIRST_CAPTURE + 2; 71 var j = REGEXP_FIRST_CAPTURE + 2;
93 for (var i = 1; i < numResults; i++) { 72 for (var i = 1; i < numResults; i++) {
94 start = MATCHINFO[j++]; 73 start = MATCHINFO[j++];
95 if (start != -1) { 74 if (start != -1) {
96 end = MATCHINFO[j]; 75 end = MATCHINFO[j];
97 result[i] = %_SubString(STRING, start, end); 76 result[i] = %_SubString(STRING, start, end);
98 } 77 }
99 j++; 78 j++;
100 } 79 }
101 return result; 80 return result;
102 endmacro 81 endmacro
103 82
104
105 // ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S )
106 // Also takes an optional exec method in case our caller
107 // has already fetched exec.
108 function RegExpSubclassExec(regexp, string, exec) {
109 if (IS_UNDEFINED(exec)) {
110 exec = regexp.exec;
111 }
112 if (IS_CALLABLE(exec)) {
113 var result = %_Call(exec, regexp, string);
114 if (!IS_RECEIVER(result) && !IS_NULL(result)) {
115 throw %make_type_error(kInvalidRegExpExecResult);
116 }
117 return result;
118 }
119 return %_Call(RegExpExecJS, regexp, string);
120 }
121 %SetForceInlineFlag(RegExpSubclassExec);
122
123
124 // Legacy implementation of RegExp.prototype[Symbol.replace] which
125 // doesn't properly call the underlying exec method.
126
127 // TODO(lrn): This array will survive indefinitely if replace is never
128 // called again. However, it will be empty, since the contents are cleared
129 // in the finally block.
130 var reusableReplaceArray = new InternalArray(4);
131
132 // Helper function for replacing regular expressions with the result of a
133 // function application in String.prototype.replace.
134 function StringReplaceGlobalRegExpWithFunction(subject, regexp, replace) {
135 var resultArray = reusableReplaceArray;
136 if (resultArray) {
137 reusableReplaceArray = null;
138 } else {
139 // Inside a nested replace (replace called from the replacement function
140 // of another replace) or we have failed to set the reusable array
141 // back due to an exception in a replacement function. Create a new
142 // array to use in the future, or until the original is written back.
143 resultArray = new InternalArray(16);
144 }
145 var res = %RegExpExecMultiple(regexp,
146 subject,
147 RegExpLastMatchInfo,
148 resultArray);
149 regexp.lastIndex = 0;
150 if (IS_NULL(res)) {
151 // No matches at all.
152 reusableReplaceArray = resultArray;
153 return subject;
154 }
155 var len = res.length;
156 if (NUMBER_OF_CAPTURES(RegExpLastMatchInfo) == 2) {
157 // If the number of captures is two then there are no explicit captures in
158 // the regexp, just the implicit capture that captures the whole match. In
159 // this case we can simplify quite a bit and end up with something faster.
160 // The builder will consist of some integers that indicate slices of the
161 // input string and some replacements that were returned from the replace
162 // function.
163 var match_start = 0;
164 for (var i = 0; i < len; i++) {
165 var elem = res[i];
166 if (%_IsSmi(elem)) {
167 // Integers represent slices of the original string.
168 if (elem > 0) {
169 match_start = (elem >> 11) + (elem & 0x7ff);
170 } else {
171 match_start = res[++i] - elem;
172 }
173 } else {
174 var func_result = replace(elem, match_start, subject);
175 // Overwrite the i'th element in the results with the string we got
176 // back from the callback function.
177 res[i] = TO_STRING(func_result);
178 match_start += elem.length;
179 }
180 }
181 } else {
182 for (var i = 0; i < len; i++) {
183 var elem = res[i];
184 if (!%_IsSmi(elem)) {
185 // elem must be an Array.
186 // Use the apply argument as backing for global RegExp properties.
187 var func_result = %reflect_apply(replace, UNDEFINED, elem);
188 // Overwrite the i'th element in the results with the string we got
189 // back from the callback function.
190 res[i] = TO_STRING(func_result);
191 }
192 }
193 }
194 var result = %StringBuilderConcat(res, len, subject);
195 resultArray.length = 0;
196 reusableReplaceArray = resultArray;
197 return result;
198 }
199
200
201 // Compute the string of a given regular expression capture.
202 function CaptureString(string, lastCaptureInfo, index) {
203 // Scale the index.
204 var scaled = index << 1;
205 // Compute start and end.
206 var start = lastCaptureInfo[CAPTURE(scaled)];
207 // If start isn't valid, return undefined.
208 if (start < 0) return;
209 var end = lastCaptureInfo[CAPTURE(scaled + 1)];
210 return %_SubString(string, start, end);
211 }
212
213
214 function StringReplaceNonGlobalRegExpWithFunction(subject, regexp, replace) {
215 var matchInfo = DoRegExpExec(regexp, subject, 0);
216 if (IS_NULL(matchInfo)) {
217 regexp.lastIndex = 0;
218 return subject;
219 }
220 var index = matchInfo[CAPTURE0];
221 var result = %_SubString(subject, 0, index);
222 var endOfMatch = matchInfo[CAPTURE1];
223 // Compute the parameter list consisting of the match, captures, index,
224 // and subject for the replace function invocation.
225 // The number of captures plus one for the match.
226 var m = NUMBER_OF_CAPTURES(matchInfo) >> 1;
227 var replacement;
228 if (m == 1) {
229 // No captures, only the match, which is always valid.
230 var s = %_SubString(subject, index, endOfMatch);
231 // Don't call directly to avoid exposing the built-in global object.
232 replacement = replace(s, index, subject);
233 } else {
234 var parameters = new InternalArray(m + 2);
235 for (var j = 0; j < m; j++) {
236 parameters[j] = CaptureString(subject, matchInfo, j);
237 }
238 parameters[j] = index;
239 parameters[j + 1] = subject;
240
241 replacement = %reflect_apply(replace, UNDEFINED, parameters);
242 }
243
244 result += replacement; // The add method converts to string if necessary.
245 // Can't use matchInfo any more from here, since the function could
246 // overwrite it.
247 return result + %_SubString(subject, endOfMatch, subject.length);
248 }
249
250 // Wraps access to matchInfo's captures into a format understood by
251 // GetSubstitution.
252 function MatchInfoCaptureWrapper(matches, subject) {
253 this.length = NUMBER_OF_CAPTURES(matches) >> 1;
254 this.match = matches;
255 this.subject = subject;
256 }
257
258 MatchInfoCaptureWrapper.prototype.at = function(ix) {
259 const match = this.match;
260 const start = match[CAPTURE(ix << 1)];
261 if (start < 0) return UNDEFINED;
262 return %_SubString(this.subject, start, match[CAPTURE((ix << 1) + 1)]);
263 };
264 %SetForceInlineFlag(MatchInfoCaptureWrapper.prototype.at);
265
266 function ArrayCaptureWrapper(array) {
267 this.length = array.length;
268 this.array = array;
269 }
270
271 ArrayCaptureWrapper.prototype.at = function(ix) {
272 return this.array[ix];
273 };
274 %SetForceInlineFlag(ArrayCaptureWrapper.prototype.at);
275
276 function RegExpReplace(string, replace) {
277 if (!IS_REGEXP(this)) {
278 throw %make_type_error(kIncompatibleMethodReceiver,
279 "RegExp.prototype.@@replace", this);
280 }
281 var subject = TO_STRING(string);
282 var search = this;
283
284 if (!IS_CALLABLE(replace)) {
285 replace = TO_STRING(replace);
286
287 if (!REGEXP_GLOBAL(search)) {
288 // Non-global regexp search, string replace.
289 var match = DoRegExpExec(search, subject, 0);
290 if (match == null) {
291 search.lastIndex = 0
292 return subject;
293 }
294 if (replace.length == 0) {
295 return %_SubString(subject, 0, match[CAPTURE0]) +
296 %_SubString(subject, match[CAPTURE1], subject.length)
297 }
298 const captures = new MatchInfoCaptureWrapper(match, subject);
299 const start = match[CAPTURE0];
300 const end = match[CAPTURE1];
301
302 const prefix = %_SubString(subject, 0, start);
303 const matched = %_SubString(subject, start, end);
304 const suffix = %_SubString(subject, end, subject.length);
305
306 return prefix +
307 GetSubstitution(matched, subject, start, captures, replace) +
308 suffix;
309 }
310
311 // Global regexp search, string replace.
312 search.lastIndex = 0;
313 return %StringReplaceGlobalRegExpWithString(
314 subject, search, replace, RegExpLastMatchInfo);
315 }
316
317 if (REGEXP_GLOBAL(search)) {
318 // Global regexp search, function replace.
319 return StringReplaceGlobalRegExpWithFunction(subject, search, replace);
320 }
321 // Non-global regexp search, function replace.
322 return StringReplaceNonGlobalRegExpWithFunction(subject, search, replace);
323 }
324
325
326 // ES#sec-getsubstitution 83 // ES#sec-getsubstitution
327 // GetSubstitution(matched, str, position, captures, replacement) 84 // GetSubstitution(matched, str, position, captures, replacement)
328 // Expand the $-expressions in the string and return a new string with 85 // Expand the $-expressions in the string and return a new string with
329 // the result. 86 // the result.
330 function GetSubstitution(matched, string, position, captures, replacement) { 87 function GetSubstitution(matched, string, position, captures, replacement) {
331 var matchLength = matched.length; 88 var matchLength = matched.length;
332 var stringLength = string.length; 89 var stringLength = string.length;
333 var capturesLength = captures.length; 90 var capturesLength = captures.length;
334 var tailPos = position + matchLength; 91 var tailPos = position + matchLength;
335 var result = ""; 92 var result = "";
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after
401 } 158 }
402 159
403 // Append substring between the previous and the next $ character. 160 // Append substring between the previous and the next $ character.
404 if (next > pos) { 161 if (next > pos) {
405 result += %_SubString(replacement, pos, next); 162 result += %_SubString(replacement, pos, next);
406 } 163 }
407 } 164 }
408 return result; 165 return result;
409 } 166 }
410 167
411
412 // ES#sec-advancestringindex
413 // AdvanceStringIndex ( S, index, unicode )
414 function AdvanceStringIndex(string, index, unicode) {
415 var increment = 1;
416 if (unicode) {
417 var first = %_StringCharCodeAt(string, index);
418 if (first >= 0xD800 && first <= 0xDBFF && string.length > index + 1) {
419 var second = %_StringCharCodeAt(string, index + 1);
420 if (second >= 0xDC00 && second <= 0xDFFF) {
421 increment = 2;
422 }
423 }
424 }
425 return increment;
426 }
427
428
429 function SetAdvancedStringIndex(regexp, string, unicode) {
430 var lastIndex = regexp.lastIndex;
431 regexp.lastIndex = lastIndex +
432 AdvanceStringIndex(string, lastIndex, unicode);
433 }
434
435
436 // ES#sec-regexp.prototype-@@replace
437 // RegExp.prototype [ @@replace ] ( string, replaceValue )
438 function RegExpSubclassReplace(string, replace) {
439 if (!IS_RECEIVER(this)) {
440 throw %make_type_error(kIncompatibleMethodReceiver,
441 "RegExp.prototype.@@replace", this);
442 }
443 string = TO_STRING(string);
444 var length = string.length;
445 var functionalReplace = IS_CALLABLE(replace);
446 if (!functionalReplace) replace = TO_STRING(replace);
447 var global = TO_BOOLEAN(this.global);
448 if (global) {
449 var unicode = TO_BOOLEAN(this.unicode);
450 this.lastIndex = 0;
451 }
452
453 // TODO(adamk): this fast path is wrong as we doesn't ensure that 'exec'
454 // is actually a data property on RegExp.prototype.
455 var exec;
456 if (IS_REGEXP(this)) {
457 exec = this.exec;
458 if (exec === RegExpExecJS) {
459 return %_Call(RegExpReplace, this, string, replace);
460 }
461 }
462
463 var results = new InternalArray();
464 var result, replacement;
465 while (true) {
466 result = RegExpSubclassExec(this, string, exec);
467 // Ensure exec will be read again on the next loop through.
468 exec = UNDEFINED;
469 if (IS_NULL(result)) {
470 break;
471 } else {
472 results.push(result);
473 if (!global) break;
474 var matchStr = TO_STRING(result[0]);
475 if (matchStr === "") SetAdvancedStringIndex(this, string, unicode);
476 }
477 }
478 var accumulatedResult = "";
479 var nextSourcePosition = 0;
480 for (var i = 0; i < results.length; i++) {
481 result = results[i];
482 var capturesLength = MaxSimple(TO_LENGTH(result.length), 0);
483 var matched = TO_STRING(result[0]);
484 var matchedLength = matched.length;
485 var position = MaxSimple(MinSimple(TO_INTEGER(result.index), length), 0);
486 var captures = new InternalArray();
487 for (var n = 0; n < capturesLength; n++) {
488 var capture = result[n];
489 if (!IS_UNDEFINED(capture)) capture = TO_STRING(capture);
490 captures[n] = capture;
491 }
492 if (functionalReplace) {
493 var parameters = new InternalArray(capturesLength + 2);
494 for (var j = 0; j < capturesLength; j++) {
495 parameters[j] = captures[j];
496 }
497 parameters[j] = position;
498 parameters[j + 1] = string;
499 replacement = %reflect_apply(replace, UNDEFINED, parameters, 0,
500 parameters.length);
501 } else {
502 const capturesWrapper = new ArrayCaptureWrapper(captures);
503 replacement = GetSubstitution(matched, string, position, capturesWrapper,
504 replace);
505 }
506 if (position >= nextSourcePosition) {
507 accumulatedResult +=
508 %_SubString(string, nextSourcePosition, position) + replacement;
509 nextSourcePosition = position + matchedLength;
510 }
511 }
512 if (nextSourcePosition >= length) return accumulatedResult;
513 return accumulatedResult + %_SubString(string, nextSourcePosition, length);
514 }
515 %FunctionRemovePrototype(RegExpSubclassReplace);
516
517
518
519 // ------------------------------------------------------------------- 168 // -------------------------------------------------------------------
520 169
521 utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [
522 replaceSymbol, RegExpSubclassReplace,
523 ]);
524
525 %InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]); 170 %InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]);
526 171
527 // ------------------------------------------------------------------- 172 // -------------------------------------------------------------------
528 // Internal 173 // Internal
529 174
530 var InternalRegExpMatchInfo = { 175 var InternalRegExpMatchInfo = {
531 REGEXP_NUMBER_OF_CAPTURES: 2, 176 REGEXP_NUMBER_OF_CAPTURES: 2,
532 REGEXP_LAST_SUBJECT: "", 177 REGEXP_LAST_SUBJECT: "",
533 REGEXP_LAST_INPUT: UNDEFINED, 178 REGEXP_LAST_INPUT: UNDEFINED,
534 CAPTURE0: 0, 179 CAPTURE0: 0,
(...skipping 14 matching lines...) Expand all
549 } 194 }
550 195
551 // ------------------------------------------------------------------- 196 // -------------------------------------------------------------------
552 // Exports 197 // Exports
553 198
554 utils.Export(function(to) { 199 utils.Export(function(to) {
555 to.GetSubstitution = GetSubstitution; 200 to.GetSubstitution = GetSubstitution;
556 to.InternalRegExpMatch = InternalRegExpMatch; 201 to.InternalRegExpMatch = InternalRegExpMatch;
557 to.InternalRegExpReplace = InternalRegExpReplace; 202 to.InternalRegExpReplace = InternalRegExpReplace;
558 to.IsRegExp = IsRegExp; 203 to.IsRegExp = IsRegExp;
559 to.RegExpExec = DoRegExpExec;
560 to.RegExpInitialize = RegExpInitialize; 204 to.RegExpInitialize = RegExpInitialize;
561 to.RegExpLastMatchInfo = RegExpLastMatchInfo; 205 to.RegExpLastMatchInfo = RegExpLastMatchInfo;
562 }); 206 });
563 207
564 }) 208 })
OLDNEW
« no previous file with comments | « src/js/prologue.js ('k') | src/objects.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698