OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 (function(global, utils) { | 5 (function(global, utils) { |
6 | 6 |
7 'use strict'; | 7 'use strict'; |
8 | 8 |
9 %CheckIsBootstrapping(); | 9 %CheckIsBootstrapping(); |
10 | 10 |
11 // ------------------------------------------------------------------- | 11 // ------------------------------------------------------------------- |
12 // Imports | 12 // Imports |
13 | 13 |
14 var GlobalArray = global.Array; | |
15 var GlobalObject = global.Object; | |
16 var GlobalRegExp = global.RegExp; | 14 var GlobalRegExp = global.RegExp; |
17 var GlobalRegExpPrototype = GlobalRegExp.prototype; | 15 var GlobalRegExpPrototype = GlobalRegExp.prototype; |
18 var InternalArray = utils.InternalArray; | |
19 var InternalPackedArray = utils.InternalPackedArray; | |
20 var MaxSimple; | |
21 var MinSimple; | |
22 var RegExpExecJS = GlobalRegExp.prototype.exec; | 16 var RegExpExecJS = GlobalRegExp.prototype.exec; |
23 var matchSymbol = utils.ImportNow("match_symbol"); | 17 var matchSymbol = utils.ImportNow("match_symbol"); |
24 var replaceSymbol = utils.ImportNow("replace_symbol"); | |
25 var searchSymbol = utils.ImportNow("search_symbol"); | |
26 var speciesSymbol = utils.ImportNow("species_symbol"); | |
27 var splitSymbol = utils.ImportNow("split_symbol"); | |
28 var SpeciesConstructor; | |
29 | |
30 utils.Import(function(from) { | |
31 MaxSimple = from.MaxSimple; | |
32 MinSimple = from.MinSimple; | |
33 SpeciesConstructor = from.SpeciesConstructor; | |
34 }); | |
35 | 18 |
36 // ------------------------------------------------------------------- | 19 // ------------------------------------------------------------------- |
37 | 20 |
38 // Property of the builtins object for recording the result of the last | 21 // Property of the builtins object for recording the result of the last |
39 // regexp match. The property RegExpLastMatchInfo includes the matchIndices | 22 // regexp match. The property RegExpLastMatchInfo includes the matchIndices |
40 // array of the last successful regexp match (an array of start/end index | 23 // array of the last successful regexp match (an array of start/end index |
41 // pairs for the match and all the captured substrings), the invariant is | 24 // pairs for the match and all the captured substrings), the invariant is |
42 // that there are at least two capture indices. The array also contains | 25 // that there are at least two capture indices. The array also contains |
43 // the subject string for the last successful match. | 26 // the subject string for the last successful match. |
44 // We use a JSObject rather than a JSArray so we don't have to manually update | 27 // We use a JSObject rather than a JSArray so we don't have to manually update |
(...skipping 20 matching lines...) Expand all Loading... |
65 // ES#sec-regexpinitialize | 48 // ES#sec-regexpinitialize |
66 // Runtime Semantics: RegExpInitialize ( obj, pattern, flags ) | 49 // Runtime Semantics: RegExpInitialize ( obj, pattern, flags ) |
67 function RegExpInitialize(object, pattern, flags) { | 50 function RegExpInitialize(object, pattern, flags) { |
68 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern); | 51 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern); |
69 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags); | 52 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags); |
70 %RegExpInitializeAndCompile(object, pattern, flags); | 53 %RegExpInitializeAndCompile(object, pattern, flags); |
71 return object; | 54 return object; |
72 } | 55 } |
73 | 56 |
74 | 57 |
75 function DoRegExpExec(regexp, string, index) { | |
76 return %_RegExpExec(regexp, string, index, RegExpLastMatchInfo); | |
77 } | |
78 | |
79 | |
80 // This is kind of performance sensitive, so we want to avoid unnecessary | 58 // This is kind of performance sensitive, so we want to avoid unnecessary |
81 // type checks on inputs. But we also don't want to inline it several times | 59 // type checks on inputs. But we also don't want to inline it several times |
82 // manually, so we use a macro :-) | 60 // manually, so we use a macro :-) |
83 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) | 61 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) |
84 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; | 62 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; |
85 var start = MATCHINFO[CAPTURE0]; | 63 var start = MATCHINFO[CAPTURE0]; |
86 var end = MATCHINFO[CAPTURE1]; | 64 var end = MATCHINFO[CAPTURE1]; |
87 // Calculate the substring of the first match before creating the result array | 65 // Calculate the substring of the first match before creating the result array |
88 // to avoid an unnecessary write barrier storing the first result. | 66 // to avoid an unnecessary write barrier storing the first result. |
89 var first = %_SubString(STRING, start, end); | 67 var first = %_SubString(STRING, start, end); |
90 var result = %_RegExpConstructResult(numResults, start, STRING); | 68 var result = %_RegExpConstructResult(numResults, start, STRING); |
91 result[0] = first; | 69 result[0] = first; |
92 if (numResults == 1) return result; | 70 if (numResults == 1) return result; |
93 var j = REGEXP_FIRST_CAPTURE + 2; | 71 var j = REGEXP_FIRST_CAPTURE + 2; |
94 for (var i = 1; i < numResults; i++) { | 72 for (var i = 1; i < numResults; i++) { |
95 start = MATCHINFO[j++]; | 73 start = MATCHINFO[j++]; |
96 if (start != -1) { | 74 if (start != -1) { |
97 end = MATCHINFO[j]; | 75 end = MATCHINFO[j]; |
98 result[i] = %_SubString(STRING, start, end); | 76 result[i] = %_SubString(STRING, start, end); |
99 } | 77 } |
100 j++; | 78 j++; |
101 } | 79 } |
102 return result; | 80 return result; |
103 endmacro | 81 endmacro |
104 | 82 |
105 | |
106 // ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S ) | |
107 // Also takes an optional exec method in case our caller | |
108 // has already fetched exec. | |
109 function RegExpSubclassExec(regexp, string, exec) { | |
110 if (IS_UNDEFINED(exec)) { | |
111 exec = regexp.exec; | |
112 } | |
113 if (IS_CALLABLE(exec)) { | |
114 var result = %_Call(exec, regexp, string); | |
115 if (!IS_RECEIVER(result) && !IS_NULL(result)) { | |
116 throw %make_type_error(kInvalidRegExpExecResult); | |
117 } | |
118 return result; | |
119 } | |
120 return %_Call(RegExpExecJS, regexp, string); | |
121 } | |
122 %SetForceInlineFlag(RegExpSubclassExec); | |
123 | |
124 | |
125 // Legacy implementation of RegExp.prototype[Symbol.replace] which | |
126 // doesn't properly call the underlying exec method. | |
127 | |
128 // TODO(lrn): This array will survive indefinitely if replace is never | |
129 // called again. However, it will be empty, since the contents are cleared | |
130 // in the finally block. | |
131 var reusableReplaceArray = new InternalArray(4); | |
132 | |
133 // Helper function for replacing regular expressions with the result of a | |
134 // function application in String.prototype.replace. | |
135 function StringReplaceGlobalRegExpWithFunction(subject, regexp, replace) { | |
136 var resultArray = reusableReplaceArray; | |
137 if (resultArray) { | |
138 reusableReplaceArray = null; | |
139 } else { | |
140 // Inside a nested replace (replace called from the replacement function | |
141 // of another replace) or we have failed to set the reusable array | |
142 // back due to an exception in a replacement function. Create a new | |
143 // array to use in the future, or until the original is written back. | |
144 resultArray = new InternalArray(16); | |
145 } | |
146 var res = %RegExpExecMultiple(regexp, | |
147 subject, | |
148 RegExpLastMatchInfo, | |
149 resultArray); | |
150 regexp.lastIndex = 0; | |
151 if (IS_NULL(res)) { | |
152 // No matches at all. | |
153 reusableReplaceArray = resultArray; | |
154 return subject; | |
155 } | |
156 var len = res.length; | |
157 if (NUMBER_OF_CAPTURES(RegExpLastMatchInfo) == 2) { | |
158 // If the number of captures is two then there are no explicit captures in | |
159 // the regexp, just the implicit capture that captures the whole match. In | |
160 // this case we can simplify quite a bit and end up with something faster. | |
161 // The builder will consist of some integers that indicate slices of the | |
162 // input string and some replacements that were returned from the replace | |
163 // function. | |
164 var match_start = 0; | |
165 for (var i = 0; i < len; i++) { | |
166 var elem = res[i]; | |
167 if (%_IsSmi(elem)) { | |
168 // Integers represent slices of the original string. | |
169 if (elem > 0) { | |
170 match_start = (elem >> 11) + (elem & 0x7ff); | |
171 } else { | |
172 match_start = res[++i] - elem; | |
173 } | |
174 } else { | |
175 var func_result = replace(elem, match_start, subject); | |
176 // Overwrite the i'th element in the results with the string we got | |
177 // back from the callback function. | |
178 res[i] = TO_STRING(func_result); | |
179 match_start += elem.length; | |
180 } | |
181 } | |
182 } else { | |
183 for (var i = 0; i < len; i++) { | |
184 var elem = res[i]; | |
185 if (!%_IsSmi(elem)) { | |
186 // elem must be an Array. | |
187 // Use the apply argument as backing for global RegExp properties. | |
188 var func_result = %reflect_apply(replace, UNDEFINED, elem); | |
189 // Overwrite the i'th element in the results with the string we got | |
190 // back from the callback function. | |
191 res[i] = TO_STRING(func_result); | |
192 } | |
193 } | |
194 } | |
195 var result = %StringBuilderConcat(res, len, subject); | |
196 resultArray.length = 0; | |
197 reusableReplaceArray = resultArray; | |
198 return result; | |
199 } | |
200 | |
201 | |
202 // Compute the string of a given regular expression capture. | |
203 function CaptureString(string, lastCaptureInfo, index) { | |
204 // Scale the index. | |
205 var scaled = index << 1; | |
206 // Compute start and end. | |
207 var start = lastCaptureInfo[CAPTURE(scaled)]; | |
208 // If start isn't valid, return undefined. | |
209 if (start < 0) return; | |
210 var end = lastCaptureInfo[CAPTURE(scaled + 1)]; | |
211 return %_SubString(string, start, end); | |
212 } | |
213 | |
214 | |
215 function StringReplaceNonGlobalRegExpWithFunction(subject, regexp, replace) { | |
216 var matchInfo = DoRegExpExec(regexp, subject, 0); | |
217 if (IS_NULL(matchInfo)) { | |
218 regexp.lastIndex = 0; | |
219 return subject; | |
220 } | |
221 var index = matchInfo[CAPTURE0]; | |
222 var result = %_SubString(subject, 0, index); | |
223 var endOfMatch = matchInfo[CAPTURE1]; | |
224 // Compute the parameter list consisting of the match, captures, index, | |
225 // and subject for the replace function invocation. | |
226 // The number of captures plus one for the match. | |
227 var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; | |
228 var replacement; | |
229 if (m == 1) { | |
230 // No captures, only the match, which is always valid. | |
231 var s = %_SubString(subject, index, endOfMatch); | |
232 // Don't call directly to avoid exposing the built-in global object. | |
233 replacement = replace(s, index, subject); | |
234 } else { | |
235 var parameters = new InternalArray(m + 2); | |
236 for (var j = 0; j < m; j++) { | |
237 parameters[j] = CaptureString(subject, matchInfo, j); | |
238 } | |
239 parameters[j] = index; | |
240 parameters[j + 1] = subject; | |
241 | |
242 replacement = %reflect_apply(replace, UNDEFINED, parameters); | |
243 } | |
244 | |
245 result += replacement; // The add method converts to string if necessary. | |
246 // Can't use matchInfo any more from here, since the function could | |
247 // overwrite it. | |
248 return result + %_SubString(subject, endOfMatch, subject.length); | |
249 } | |
250 | |
251 // Wraps access to matchInfo's captures into a format understood by | |
252 // GetSubstitution. | |
253 function MatchInfoCaptureWrapper(matches, subject) { | |
254 this.length = NUMBER_OF_CAPTURES(matches) >> 1; | |
255 this.match = matches; | |
256 this.subject = subject; | |
257 } | |
258 | |
259 MatchInfoCaptureWrapper.prototype.at = function(ix) { | |
260 const match = this.match; | |
261 const start = match[CAPTURE(ix << 1)]; | |
262 if (start < 0) return UNDEFINED; | |
263 return %_SubString(this.subject, start, match[CAPTURE((ix << 1) + 1)]); | |
264 }; | |
265 %SetForceInlineFlag(MatchInfoCaptureWrapper.prototype.at); | |
266 | |
267 function ArrayCaptureWrapper(array) { | |
268 this.length = array.length; | |
269 this.array = array; | |
270 } | |
271 | |
272 ArrayCaptureWrapper.prototype.at = function(ix) { | |
273 return this.array[ix]; | |
274 }; | |
275 %SetForceInlineFlag(ArrayCaptureWrapper.prototype.at); | |
276 | |
277 function RegExpReplace(string, replace) { | |
278 if (!IS_REGEXP(this)) { | |
279 throw %make_type_error(kIncompatibleMethodReceiver, | |
280 "RegExp.prototype.@@replace", this); | |
281 } | |
282 var subject = TO_STRING(string); | |
283 var search = this; | |
284 | |
285 if (!IS_CALLABLE(replace)) { | |
286 replace = TO_STRING(replace); | |
287 | |
288 if (!REGEXP_GLOBAL(search)) { | |
289 // Non-global regexp search, string replace. | |
290 var match = DoRegExpExec(search, subject, 0); | |
291 if (match == null) { | |
292 search.lastIndex = 0 | |
293 return subject; | |
294 } | |
295 if (replace.length == 0) { | |
296 return %_SubString(subject, 0, match[CAPTURE0]) + | |
297 %_SubString(subject, match[CAPTURE1], subject.length) | |
298 } | |
299 const captures = new MatchInfoCaptureWrapper(match, subject); | |
300 const start = match[CAPTURE0]; | |
301 const end = match[CAPTURE1]; | |
302 | |
303 const prefix = %_SubString(subject, 0, start); | |
304 const matched = %_SubString(subject, start, end); | |
305 const suffix = %_SubString(subject, end, subject.length); | |
306 | |
307 return prefix + | |
308 GetSubstitution(matched, subject, start, captures, replace) + | |
309 suffix; | |
310 } | |
311 | |
312 // Global regexp search, string replace. | |
313 search.lastIndex = 0; | |
314 return %StringReplaceGlobalRegExpWithString( | |
315 subject, search, replace, RegExpLastMatchInfo); | |
316 } | |
317 | |
318 if (REGEXP_GLOBAL(search)) { | |
319 // Global regexp search, function replace. | |
320 return StringReplaceGlobalRegExpWithFunction(subject, search, replace); | |
321 } | |
322 // Non-global regexp search, function replace. | |
323 return StringReplaceNonGlobalRegExpWithFunction(subject, search, replace); | |
324 } | |
325 | |
326 | |
327 // ES#sec-getsubstitution | 83 // ES#sec-getsubstitution |
328 // GetSubstitution(matched, str, position, captures, replacement) | 84 // GetSubstitution(matched, str, position, captures, replacement) |
329 // Expand the $-expressions in the string and return a new string with | 85 // Expand the $-expressions in the string and return a new string with |
330 // the result. | 86 // the result. |
331 function GetSubstitution(matched, string, position, captures, replacement) { | 87 function GetSubstitution(matched, string, position, captures, replacement) { |
332 var matchLength = matched.length; | 88 var matchLength = matched.length; |
333 var stringLength = string.length; | 89 var stringLength = string.length; |
334 var capturesLength = captures.length; | 90 var capturesLength = captures.length; |
335 var tailPos = position + matchLength; | 91 var tailPos = position + matchLength; |
336 var result = ""; | 92 var result = ""; |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
402 } | 158 } |
403 | 159 |
404 // Append substring between the previous and the next $ character. | 160 // Append substring between the previous and the next $ character. |
405 if (next > pos) { | 161 if (next > pos) { |
406 result += %_SubString(replacement, pos, next); | 162 result += %_SubString(replacement, pos, next); |
407 } | 163 } |
408 } | 164 } |
409 return result; | 165 return result; |
410 } | 166 } |
411 | 167 |
412 | |
413 // ES#sec-advancestringindex | |
414 // AdvanceStringIndex ( S, index, unicode ) | |
415 function AdvanceStringIndex(string, index, unicode) { | |
416 var increment = 1; | |
417 if (unicode) { | |
418 var first = %_StringCharCodeAt(string, index); | |
419 if (first >= 0xD800 && first <= 0xDBFF && string.length > index + 1) { | |
420 var second = %_StringCharCodeAt(string, index + 1); | |
421 if (second >= 0xDC00 && second <= 0xDFFF) { | |
422 increment = 2; | |
423 } | |
424 } | |
425 } | |
426 return increment; | |
427 } | |
428 | |
429 | |
430 function SetAdvancedStringIndex(regexp, string, unicode) { | |
431 var lastIndex = regexp.lastIndex; | |
432 regexp.lastIndex = lastIndex + | |
433 AdvanceStringIndex(string, lastIndex, unicode); | |
434 } | |
435 | |
436 | |
437 // ES#sec-regexp.prototype-@@replace | |
438 // RegExp.prototype [ @@replace ] ( string, replaceValue ) | |
439 function RegExpSubclassReplace(string, replace) { | |
440 if (!IS_RECEIVER(this)) { | |
441 throw %make_type_error(kIncompatibleMethodReceiver, | |
442 "RegExp.prototype.@@replace", this); | |
443 } | |
444 string = TO_STRING(string); | |
445 var length = string.length; | |
446 var functionalReplace = IS_CALLABLE(replace); | |
447 if (!functionalReplace) replace = TO_STRING(replace); | |
448 var global = TO_BOOLEAN(this.global); | |
449 if (global) { | |
450 var unicode = TO_BOOLEAN(this.unicode); | |
451 this.lastIndex = 0; | |
452 } | |
453 | |
454 // TODO(adamk): this fast path is wrong as we doesn't ensure that 'exec' | |
455 // is actually a data property on RegExp.prototype. | |
456 var exec; | |
457 if (IS_REGEXP(this)) { | |
458 exec = this.exec; | |
459 if (exec === RegExpExecJS) { | |
460 return %_Call(RegExpReplace, this, string, replace); | |
461 } | |
462 } | |
463 | |
464 var results = new InternalArray(); | |
465 var result, replacement; | |
466 while (true) { | |
467 result = RegExpSubclassExec(this, string, exec); | |
468 // Ensure exec will be read again on the next loop through. | |
469 exec = UNDEFINED; | |
470 if (IS_NULL(result)) { | |
471 break; | |
472 } else { | |
473 results.push(result); | |
474 if (!global) break; | |
475 var matchStr = TO_STRING(result[0]); | |
476 if (matchStr === "") SetAdvancedStringIndex(this, string, unicode); | |
477 } | |
478 } | |
479 var accumulatedResult = ""; | |
480 var nextSourcePosition = 0; | |
481 for (var i = 0; i < results.length; i++) { | |
482 result = results[i]; | |
483 var capturesLength = MaxSimple(TO_LENGTH(result.length), 0); | |
484 var matched = TO_STRING(result[0]); | |
485 var matchedLength = matched.length; | |
486 var position = MaxSimple(MinSimple(TO_INTEGER(result.index), length), 0); | |
487 var captures = new InternalArray(); | |
488 for (var n = 0; n < capturesLength; n++) { | |
489 var capture = result[n]; | |
490 if (!IS_UNDEFINED(capture)) capture = TO_STRING(capture); | |
491 captures[n] = capture; | |
492 } | |
493 if (functionalReplace) { | |
494 var parameters = new InternalArray(capturesLength + 2); | |
495 for (var j = 0; j < capturesLength; j++) { | |
496 parameters[j] = captures[j]; | |
497 } | |
498 parameters[j] = position; | |
499 parameters[j + 1] = string; | |
500 replacement = %reflect_apply(replace, UNDEFINED, parameters, 0, | |
501 parameters.length); | |
502 } else { | |
503 const capturesWrapper = new ArrayCaptureWrapper(captures); | |
504 replacement = GetSubstitution(matched, string, position, capturesWrapper, | |
505 replace); | |
506 } | |
507 if (position >= nextSourcePosition) { | |
508 accumulatedResult += | |
509 %_SubString(string, nextSourcePosition, position) + replacement; | |
510 nextSourcePosition = position + matchedLength; | |
511 } | |
512 } | |
513 if (nextSourcePosition >= length) return accumulatedResult; | |
514 return accumulatedResult + %_SubString(string, nextSourcePosition, length); | |
515 } | |
516 %FunctionRemovePrototype(RegExpSubclassReplace); | |
517 | |
518 | |
519 | |
520 // ------------------------------------------------------------------- | 168 // ------------------------------------------------------------------- |
521 | 169 |
522 utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [ | |
523 replaceSymbol, RegExpSubclassReplace, | |
524 ]); | |
525 | |
526 %InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]); | 170 %InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]); |
527 | 171 |
528 // ------------------------------------------------------------------- | 172 // ------------------------------------------------------------------- |
529 // Internal | 173 // Internal |
530 | 174 |
531 var InternalRegExpMatchInfo = { | 175 var InternalRegExpMatchInfo = { |
532 REGEXP_NUMBER_OF_CAPTURES: 2, | 176 REGEXP_NUMBER_OF_CAPTURES: 2, |
533 REGEXP_LAST_SUBJECT: "", | 177 REGEXP_LAST_SUBJECT: "", |
534 REGEXP_LAST_INPUT: UNDEFINED, | 178 REGEXP_LAST_INPUT: UNDEFINED, |
535 CAPTURE0: 0, | 179 CAPTURE0: 0, |
(...skipping 14 matching lines...) Expand all Loading... |
550 } | 194 } |
551 | 195 |
552 // ------------------------------------------------------------------- | 196 // ------------------------------------------------------------------- |
553 // Exports | 197 // Exports |
554 | 198 |
555 utils.Export(function(to) { | 199 utils.Export(function(to) { |
556 to.GetSubstitution = GetSubstitution; | 200 to.GetSubstitution = GetSubstitution; |
557 to.InternalRegExpMatch = InternalRegExpMatch; | 201 to.InternalRegExpMatch = InternalRegExpMatch; |
558 to.InternalRegExpReplace = InternalRegExpReplace; | 202 to.InternalRegExpReplace = InternalRegExpReplace; |
559 to.IsRegExp = IsRegExp; | 203 to.IsRegExp = IsRegExp; |
560 to.RegExpExec = DoRegExpExec; | |
561 to.RegExpInitialize = RegExpInitialize; | 204 to.RegExpInitialize = RegExpInitialize; |
562 to.RegExpLastMatchInfo = RegExpLastMatchInfo; | 205 to.RegExpLastMatchInfo = RegExpLastMatchInfo; |
563 }); | 206 }); |
564 | 207 |
565 }) | 208 }) |
OLD | NEW |