OLD | NEW |
1 // Copyright 2012 the V8 project authors. All rights reserved. | 1 // Copyright 2012 the V8 project authors. All rights reserved. |
2 // Use of this source code is governed by a BSD-style license that can be | 2 // Use of this source code is governed by a BSD-style license that can be |
3 // found in the LICENSE file. | 3 // found in the LICENSE file. |
4 | 4 |
5 (function(global, utils) { | 5 (function(global, utils) { |
6 | 6 |
7 'use strict'; | 7 'use strict'; |
8 | 8 |
9 %CheckIsBootstrapping(); | 9 %CheckIsBootstrapping(); |
10 | 10 |
11 // ------------------------------------------------------------------- | 11 // ------------------------------------------------------------------- |
12 // Imports | 12 // Imports |
13 | 13 |
14 var GlobalArray = global.Array; | |
15 var GlobalObject = global.Object; | |
16 var GlobalRegExp = global.RegExp; | 14 var GlobalRegExp = global.RegExp; |
17 var GlobalRegExpPrototype = GlobalRegExp.prototype; | 15 var GlobalRegExpPrototype = GlobalRegExp.prototype; |
18 var InternalArray = utils.InternalArray; | |
19 var MaxSimple; | |
20 var MinSimple; | |
21 var RegExpExecJS = GlobalRegExp.prototype.exec; | 16 var RegExpExecJS = GlobalRegExp.prototype.exec; |
22 var matchSymbol = utils.ImportNow("match_symbol"); | 17 var matchSymbol = utils.ImportNow("match_symbol"); |
23 var replaceSymbol = utils.ImportNow("replace_symbol"); | |
24 var searchSymbol = utils.ImportNow("search_symbol"); | |
25 var speciesSymbol = utils.ImportNow("species_symbol"); | |
26 var splitSymbol = utils.ImportNow("split_symbol"); | |
27 var SpeciesConstructor; | |
28 | |
29 utils.Import(function(from) { | |
30 MaxSimple = from.MaxSimple; | |
31 MinSimple = from.MinSimple; | |
32 SpeciesConstructor = from.SpeciesConstructor; | |
33 }); | |
34 | 18 |
35 // ------------------------------------------------------------------- | 19 // ------------------------------------------------------------------- |
36 | 20 |
37 // Property of the builtins object for recording the result of the last | 21 // Property of the builtins object for recording the result of the last |
38 // regexp match. The property RegExpLastMatchInfo includes the matchIndices | 22 // regexp match. The property RegExpLastMatchInfo includes the matchIndices |
39 // array of the last successful regexp match (an array of start/end index | 23 // array of the last successful regexp match (an array of start/end index |
40 // pairs for the match and all the captured substrings), the invariant is | 24 // pairs for the match and all the captured substrings), the invariant is |
41 // that there are at least two capture indices. The array also contains | 25 // that there are at least two capture indices. The array also contains |
42 // the subject string for the last successful match. | 26 // the subject string for the last successful match. |
43 // We use a JSObject rather than a JSArray so we don't have to manually update | 27 // We use a JSObject rather than a JSArray so we don't have to manually update |
(...skipping 20 matching lines...) Expand all Loading... |
64 // ES#sec-regexpinitialize | 48 // ES#sec-regexpinitialize |
65 // Runtime Semantics: RegExpInitialize ( obj, pattern, flags ) | 49 // Runtime Semantics: RegExpInitialize ( obj, pattern, flags ) |
66 function RegExpInitialize(object, pattern, flags) { | 50 function RegExpInitialize(object, pattern, flags) { |
67 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern); | 51 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern); |
68 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags); | 52 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags); |
69 %RegExpInitializeAndCompile(object, pattern, flags); | 53 %RegExpInitializeAndCompile(object, pattern, flags); |
70 return object; | 54 return object; |
71 } | 55 } |
72 | 56 |
73 | 57 |
74 function DoRegExpExec(regexp, string, index) { | |
75 return %_RegExpExec(regexp, string, index, RegExpLastMatchInfo); | |
76 } | |
77 | |
78 | |
79 // This is kind of performance sensitive, so we want to avoid unnecessary | 58 // This is kind of performance sensitive, so we want to avoid unnecessary |
80 // type checks on inputs. But we also don't want to inline it several times | 59 // type checks on inputs. But we also don't want to inline it several times |
81 // manually, so we use a macro :-) | 60 // manually, so we use a macro :-) |
82 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) | 61 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) |
83 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; | 62 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; |
84 var start = MATCHINFO[CAPTURE0]; | 63 var start = MATCHINFO[CAPTURE0]; |
85 var end = MATCHINFO[CAPTURE1]; | 64 var end = MATCHINFO[CAPTURE1]; |
86 // Calculate the substring of the first match before creating the result array | 65 // Calculate the substring of the first match before creating the result array |
87 // to avoid an unnecessary write barrier storing the first result. | 66 // to avoid an unnecessary write barrier storing the first result. |
88 var first = %_SubString(STRING, start, end); | 67 var first = %_SubString(STRING, start, end); |
89 var result = %_RegExpConstructResult(numResults, start, STRING); | 68 var result = %_RegExpConstructResult(numResults, start, STRING); |
90 result[0] = first; | 69 result[0] = first; |
91 if (numResults == 1) return result; | 70 if (numResults == 1) return result; |
92 var j = REGEXP_FIRST_CAPTURE + 2; | 71 var j = REGEXP_FIRST_CAPTURE + 2; |
93 for (var i = 1; i < numResults; i++) { | 72 for (var i = 1; i < numResults; i++) { |
94 start = MATCHINFO[j++]; | 73 start = MATCHINFO[j++]; |
95 if (start != -1) { | 74 if (start != -1) { |
96 end = MATCHINFO[j]; | 75 end = MATCHINFO[j]; |
97 result[i] = %_SubString(STRING, start, end); | 76 result[i] = %_SubString(STRING, start, end); |
98 } | 77 } |
99 j++; | 78 j++; |
100 } | 79 } |
101 return result; | 80 return result; |
102 endmacro | 81 endmacro |
103 | 82 |
104 | |
105 // ES#sec-regexpexec Runtime Semantics: RegExpExec ( R, S ) | |
106 // Also takes an optional exec method in case our caller | |
107 // has already fetched exec. | |
108 function RegExpSubclassExec(regexp, string, exec) { | |
109 if (IS_UNDEFINED(exec)) { | |
110 exec = regexp.exec; | |
111 } | |
112 if (IS_CALLABLE(exec)) { | |
113 var result = %_Call(exec, regexp, string); | |
114 if (!IS_RECEIVER(result) && !IS_NULL(result)) { | |
115 throw %make_type_error(kInvalidRegExpExecResult); | |
116 } | |
117 return result; | |
118 } | |
119 return %_Call(RegExpExecJS, regexp, string); | |
120 } | |
121 %SetForceInlineFlag(RegExpSubclassExec); | |
122 | |
123 | |
124 // Legacy implementation of RegExp.prototype[Symbol.replace] which | |
125 // doesn't properly call the underlying exec method. | |
126 | |
127 // TODO(lrn): This array will survive indefinitely if replace is never | |
128 // called again. However, it will be empty, since the contents are cleared | |
129 // in the finally block. | |
130 var reusableReplaceArray = new InternalArray(4); | |
131 | |
132 // Helper function for replacing regular expressions with the result of a | |
133 // function application in String.prototype.replace. | |
134 function StringReplaceGlobalRegExpWithFunction(subject, regexp, replace) { | |
135 var resultArray = reusableReplaceArray; | |
136 if (resultArray) { | |
137 reusableReplaceArray = null; | |
138 } else { | |
139 // Inside a nested replace (replace called from the replacement function | |
140 // of another replace) or we have failed to set the reusable array | |
141 // back due to an exception in a replacement function. Create a new | |
142 // array to use in the future, or until the original is written back. | |
143 resultArray = new InternalArray(16); | |
144 } | |
145 var res = %RegExpExecMultiple(regexp, | |
146 subject, | |
147 RegExpLastMatchInfo, | |
148 resultArray); | |
149 regexp.lastIndex = 0; | |
150 if (IS_NULL(res)) { | |
151 // No matches at all. | |
152 reusableReplaceArray = resultArray; | |
153 return subject; | |
154 } | |
155 var len = res.length; | |
156 if (NUMBER_OF_CAPTURES(RegExpLastMatchInfo) == 2) { | |
157 // If the number of captures is two then there are no explicit captures in | |
158 // the regexp, just the implicit capture that captures the whole match. In | |
159 // this case we can simplify quite a bit and end up with something faster. | |
160 // The builder will consist of some integers that indicate slices of the | |
161 // input string and some replacements that were returned from the replace | |
162 // function. | |
163 var match_start = 0; | |
164 for (var i = 0; i < len; i++) { | |
165 var elem = res[i]; | |
166 if (%_IsSmi(elem)) { | |
167 // Integers represent slices of the original string. | |
168 if (elem > 0) { | |
169 match_start = (elem >> 11) + (elem & 0x7ff); | |
170 } else { | |
171 match_start = res[++i] - elem; | |
172 } | |
173 } else { | |
174 var func_result = replace(elem, match_start, subject); | |
175 // Overwrite the i'th element in the results with the string we got | |
176 // back from the callback function. | |
177 res[i] = TO_STRING(func_result); | |
178 match_start += elem.length; | |
179 } | |
180 } | |
181 } else { | |
182 for (var i = 0; i < len; i++) { | |
183 var elem = res[i]; | |
184 if (!%_IsSmi(elem)) { | |
185 // elem must be an Array. | |
186 // Use the apply argument as backing for global RegExp properties. | |
187 var func_result = %reflect_apply(replace, UNDEFINED, elem); | |
188 // Overwrite the i'th element in the results with the string we got | |
189 // back from the callback function. | |
190 res[i] = TO_STRING(func_result); | |
191 } | |
192 } | |
193 } | |
194 var result = %StringBuilderConcat(res, len, subject); | |
195 resultArray.length = 0; | |
196 reusableReplaceArray = resultArray; | |
197 return result; | |
198 } | |
199 | |
200 | |
201 // Compute the string of a given regular expression capture. | |
202 function CaptureString(string, lastCaptureInfo, index) { | |
203 // Scale the index. | |
204 var scaled = index << 1; | |
205 // Compute start and end. | |
206 var start = lastCaptureInfo[CAPTURE(scaled)]; | |
207 // If start isn't valid, return undefined. | |
208 if (start < 0) return; | |
209 var end = lastCaptureInfo[CAPTURE(scaled + 1)]; | |
210 return %_SubString(string, start, end); | |
211 } | |
212 | |
213 | |
214 function StringReplaceNonGlobalRegExpWithFunction(subject, regexp, replace) { | |
215 var matchInfo = DoRegExpExec(regexp, subject, 0); | |
216 if (IS_NULL(matchInfo)) { | |
217 regexp.lastIndex = 0; | |
218 return subject; | |
219 } | |
220 var index = matchInfo[CAPTURE0]; | |
221 var result = %_SubString(subject, 0, index); | |
222 var endOfMatch = matchInfo[CAPTURE1]; | |
223 // Compute the parameter list consisting of the match, captures, index, | |
224 // and subject for the replace function invocation. | |
225 // The number of captures plus one for the match. | |
226 var m = NUMBER_OF_CAPTURES(matchInfo) >> 1; | |
227 var replacement; | |
228 if (m == 1) { | |
229 // No captures, only the match, which is always valid. | |
230 var s = %_SubString(subject, index, endOfMatch); | |
231 // Don't call directly to avoid exposing the built-in global object. | |
232 replacement = replace(s, index, subject); | |
233 } else { | |
234 var parameters = new InternalArray(m + 2); | |
235 for (var j = 0; j < m; j++) { | |
236 parameters[j] = CaptureString(subject, matchInfo, j); | |
237 } | |
238 parameters[j] = index; | |
239 parameters[j + 1] = subject; | |
240 | |
241 replacement = %reflect_apply(replace, UNDEFINED, parameters); | |
242 } | |
243 | |
244 result += replacement; // The add method converts to string if necessary. | |
245 // Can't use matchInfo any more from here, since the function could | |
246 // overwrite it. | |
247 return result + %_SubString(subject, endOfMatch, subject.length); | |
248 } | |
249 | |
250 // Wraps access to matchInfo's captures into a format understood by | |
251 // GetSubstitution. | |
252 function MatchInfoCaptureWrapper(matches, subject) { | |
253 this.length = NUMBER_OF_CAPTURES(matches) >> 1; | |
254 this.match = matches; | |
255 this.subject = subject; | |
256 } | |
257 | |
258 MatchInfoCaptureWrapper.prototype.at = function(ix) { | |
259 const match = this.match; | |
260 const start = match[CAPTURE(ix << 1)]; | |
261 if (start < 0) return UNDEFINED; | |
262 return %_SubString(this.subject, start, match[CAPTURE((ix << 1) + 1)]); | |
263 }; | |
264 %SetForceInlineFlag(MatchInfoCaptureWrapper.prototype.at); | |
265 | |
266 function ArrayCaptureWrapper(array) { | |
267 this.length = array.length; | |
268 this.array = array; | |
269 } | |
270 | |
271 ArrayCaptureWrapper.prototype.at = function(ix) { | |
272 return this.array[ix]; | |
273 }; | |
274 %SetForceInlineFlag(ArrayCaptureWrapper.prototype.at); | |
275 | |
276 function RegExpReplace(string, replace) { | |
277 if (!IS_REGEXP(this)) { | |
278 throw %make_type_error(kIncompatibleMethodReceiver, | |
279 "RegExp.prototype.@@replace", this); | |
280 } | |
281 var subject = TO_STRING(string); | |
282 var search = this; | |
283 | |
284 if (!IS_CALLABLE(replace)) { | |
285 replace = TO_STRING(replace); | |
286 | |
287 if (!REGEXP_GLOBAL(search)) { | |
288 // Non-global regexp search, string replace. | |
289 var match = DoRegExpExec(search, subject, 0); | |
290 if (match == null) { | |
291 search.lastIndex = 0 | |
292 return subject; | |
293 } | |
294 if (replace.length == 0) { | |
295 return %_SubString(subject, 0, match[CAPTURE0]) + | |
296 %_SubString(subject, match[CAPTURE1], subject.length) | |
297 } | |
298 const captures = new MatchInfoCaptureWrapper(match, subject); | |
299 const start = match[CAPTURE0]; | |
300 const end = match[CAPTURE1]; | |
301 | |
302 const prefix = %_SubString(subject, 0, start); | |
303 const matched = %_SubString(subject, start, end); | |
304 const suffix = %_SubString(subject, end, subject.length); | |
305 | |
306 return prefix + | |
307 GetSubstitution(matched, subject, start, captures, replace) + | |
308 suffix; | |
309 } | |
310 | |
311 // Global regexp search, string replace. | |
312 search.lastIndex = 0; | |
313 return %StringReplaceGlobalRegExpWithString( | |
314 subject, search, replace, RegExpLastMatchInfo); | |
315 } | |
316 | |
317 if (REGEXP_GLOBAL(search)) { | |
318 // Global regexp search, function replace. | |
319 return StringReplaceGlobalRegExpWithFunction(subject, search, replace); | |
320 } | |
321 // Non-global regexp search, function replace. | |
322 return StringReplaceNonGlobalRegExpWithFunction(subject, search, replace); | |
323 } | |
324 | |
325 | |
326 // ES#sec-getsubstitution | 83 // ES#sec-getsubstitution |
327 // GetSubstitution(matched, str, position, captures, replacement) | 84 // GetSubstitution(matched, str, position, captures, replacement) |
328 // Expand the $-expressions in the string and return a new string with | 85 // Expand the $-expressions in the string and return a new string with |
329 // the result. | 86 // the result. |
330 function GetSubstitution(matched, string, position, captures, replacement) { | 87 function GetSubstitution(matched, string, position, captures, replacement) { |
331 var matchLength = matched.length; | 88 var matchLength = matched.length; |
332 var stringLength = string.length; | 89 var stringLength = string.length; |
333 var capturesLength = captures.length; | 90 var capturesLength = captures.length; |
334 var tailPos = position + matchLength; | 91 var tailPos = position + matchLength; |
335 var result = ""; | 92 var result = ""; |
(...skipping 65 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
401 } | 158 } |
402 | 159 |
403 // Append substring between the previous and the next $ character. | 160 // Append substring between the previous and the next $ character. |
404 if (next > pos) { | 161 if (next > pos) { |
405 result += %_SubString(replacement, pos, next); | 162 result += %_SubString(replacement, pos, next); |
406 } | 163 } |
407 } | 164 } |
408 return result; | 165 return result; |
409 } | 166 } |
410 | 167 |
411 | |
412 // ES#sec-advancestringindex | |
413 // AdvanceStringIndex ( S, index, unicode ) | |
414 function AdvanceStringIndex(string, index, unicode) { | |
415 var increment = 1; | |
416 if (unicode) { | |
417 var first = %_StringCharCodeAt(string, index); | |
418 if (first >= 0xD800 && first <= 0xDBFF && string.length > index + 1) { | |
419 var second = %_StringCharCodeAt(string, index + 1); | |
420 if (second >= 0xDC00 && second <= 0xDFFF) { | |
421 increment = 2; | |
422 } | |
423 } | |
424 } | |
425 return increment; | |
426 } | |
427 | |
428 | |
429 function SetAdvancedStringIndex(regexp, string, unicode) { | |
430 var lastIndex = regexp.lastIndex; | |
431 regexp.lastIndex = lastIndex + | |
432 AdvanceStringIndex(string, lastIndex, unicode); | |
433 } | |
434 | |
435 | |
436 // ES#sec-regexp.prototype-@@replace | |
437 // RegExp.prototype [ @@replace ] ( string, replaceValue ) | |
438 function RegExpSubclassReplace(string, replace) { | |
439 if (!IS_RECEIVER(this)) { | |
440 throw %make_type_error(kIncompatibleMethodReceiver, | |
441 "RegExp.prototype.@@replace", this); | |
442 } | |
443 string = TO_STRING(string); | |
444 var length = string.length; | |
445 var functionalReplace = IS_CALLABLE(replace); | |
446 if (!functionalReplace) replace = TO_STRING(replace); | |
447 var global = TO_BOOLEAN(this.global); | |
448 if (global) { | |
449 var unicode = TO_BOOLEAN(this.unicode); | |
450 this.lastIndex = 0; | |
451 } | |
452 | |
453 // TODO(adamk): this fast path is wrong as we doesn't ensure that 'exec' | |
454 // is actually a data property on RegExp.prototype. | |
455 var exec; | |
456 if (IS_REGEXP(this)) { | |
457 exec = this.exec; | |
458 if (exec === RegExpExecJS) { | |
459 return %_Call(RegExpReplace, this, string, replace); | |
460 } | |
461 } | |
462 | |
463 var results = new InternalArray(); | |
464 var result, replacement; | |
465 while (true) { | |
466 result = RegExpSubclassExec(this, string, exec); | |
467 // Ensure exec will be read again on the next loop through. | |
468 exec = UNDEFINED; | |
469 if (IS_NULL(result)) { | |
470 break; | |
471 } else { | |
472 results.push(result); | |
473 if (!global) break; | |
474 var matchStr = TO_STRING(result[0]); | |
475 if (matchStr === "") SetAdvancedStringIndex(this, string, unicode); | |
476 } | |
477 } | |
478 var accumulatedResult = ""; | |
479 var nextSourcePosition = 0; | |
480 for (var i = 0; i < results.length; i++) { | |
481 result = results[i]; | |
482 var capturesLength = MaxSimple(TO_LENGTH(result.length), 0); | |
483 var matched = TO_STRING(result[0]); | |
484 var matchedLength = matched.length; | |
485 var position = MaxSimple(MinSimple(TO_INTEGER(result.index), length), 0); | |
486 var captures = new InternalArray(); | |
487 for (var n = 0; n < capturesLength; n++) { | |
488 var capture = result[n]; | |
489 if (!IS_UNDEFINED(capture)) capture = TO_STRING(capture); | |
490 captures[n] = capture; | |
491 } | |
492 if (functionalReplace) { | |
493 var parameters = new InternalArray(capturesLength + 2); | |
494 for (var j = 0; j < capturesLength; j++) { | |
495 parameters[j] = captures[j]; | |
496 } | |
497 parameters[j] = position; | |
498 parameters[j + 1] = string; | |
499 replacement = %reflect_apply(replace, UNDEFINED, parameters, 0, | |
500 parameters.length); | |
501 } else { | |
502 const capturesWrapper = new ArrayCaptureWrapper(captures); | |
503 replacement = GetSubstitution(matched, string, position, capturesWrapper, | |
504 replace); | |
505 } | |
506 if (position >= nextSourcePosition) { | |
507 accumulatedResult += | |
508 %_SubString(string, nextSourcePosition, position) + replacement; | |
509 nextSourcePosition = position + matchedLength; | |
510 } | |
511 } | |
512 if (nextSourcePosition >= length) return accumulatedResult; | |
513 return accumulatedResult + %_SubString(string, nextSourcePosition, length); | |
514 } | |
515 %FunctionRemovePrototype(RegExpSubclassReplace); | |
516 | |
517 | |
518 | |
519 // ------------------------------------------------------------------- | 168 // ------------------------------------------------------------------- |
520 | 169 |
521 utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [ | |
522 replaceSymbol, RegExpSubclassReplace, | |
523 ]); | |
524 | |
525 %InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]); | 170 %InstallToContext(["regexp_last_match_info", RegExpLastMatchInfo]); |
526 | 171 |
527 // ------------------------------------------------------------------- | 172 // ------------------------------------------------------------------- |
528 // Internal | 173 // Internal |
529 | 174 |
530 var InternalRegExpMatchInfo = { | 175 var InternalRegExpMatchInfo = { |
531 REGEXP_NUMBER_OF_CAPTURES: 2, | 176 REGEXP_NUMBER_OF_CAPTURES: 2, |
532 REGEXP_LAST_SUBJECT: "", | 177 REGEXP_LAST_SUBJECT: "", |
533 REGEXP_LAST_INPUT: UNDEFINED, | 178 REGEXP_LAST_INPUT: UNDEFINED, |
534 CAPTURE0: 0, | 179 CAPTURE0: 0, |
(...skipping 14 matching lines...) Expand all Loading... |
549 } | 194 } |
550 | 195 |
551 // ------------------------------------------------------------------- | 196 // ------------------------------------------------------------------- |
552 // Exports | 197 // Exports |
553 | 198 |
554 utils.Export(function(to) { | 199 utils.Export(function(to) { |
555 to.GetSubstitution = GetSubstitution; | 200 to.GetSubstitution = GetSubstitution; |
556 to.InternalRegExpMatch = InternalRegExpMatch; | 201 to.InternalRegExpMatch = InternalRegExpMatch; |
557 to.InternalRegExpReplace = InternalRegExpReplace; | 202 to.InternalRegExpReplace = InternalRegExpReplace; |
558 to.IsRegExp = IsRegExp; | 203 to.IsRegExp = IsRegExp; |
559 to.RegExpExec = DoRegExpExec; | |
560 to.RegExpInitialize = RegExpInitialize; | 204 to.RegExpInitialize = RegExpInitialize; |
561 to.RegExpLastMatchInfo = RegExpLastMatchInfo; | 205 to.RegExpLastMatchInfo = RegExpLastMatchInfo; |
562 }); | 206 }); |
563 | 207 |
564 }) | 208 }) |
OLD | NEW |