OLD | NEW |
| (Empty) |
1 // Copyright 2012 the V8 project authors. All rights reserved. | |
2 // Use of this source code is governed by a BSD-style license that can be | |
3 // found in the LICENSE file. | |
4 | |
5 var $regexpLastMatchInfoOverride; | |
6 | |
7 (function(global, utils) { | |
8 | |
9 %CheckIsBootstrapping(); | |
10 | |
11 // ------------------------------------------------------------------- | |
12 // Imports | |
13 | |
14 var FLAG_harmony_regexps; | |
15 var FLAG_harmony_unicode_regexps; | |
16 var GlobalRegExp = global.RegExp; | |
17 var InternalPackedArray = utils.InternalPackedArray; | |
18 | |
19 utils.ImportFromExperimental(function(from) { | |
20 FLAG_harmony_regexps = from.FLAG_harmony_regexps; | |
21 FLAG_harmony_unicode_regexps = from.FLAG_harmony_unicode_regexps; | |
22 }); | |
23 | |
24 // ------------------------------------------------------------------- | |
25 | |
26 // Property of the builtins object for recording the result of the last | |
27 // regexp match. The property RegExpLastMatchInfo includes the matchIndices | |
28 // array of the last successful regexp match (an array of start/end index | |
29 // pairs for the match and all the captured substrings), the invariant is | |
30 // that there are at least two capture indeces. The array also contains | |
31 // the subject string for the last successful match. | |
32 var RegExpLastMatchInfo = new InternalPackedArray( | |
33 2, // REGEXP_NUMBER_OF_CAPTURES | |
34 "", // Last subject. | |
35 UNDEFINED, // Last input - settable with RegExpSetInput. | |
36 0, // REGEXP_FIRST_CAPTURE + 0 | |
37 0 // REGEXP_FIRST_CAPTURE + 1 | |
38 ); | |
39 | |
40 // Override last match info with an array of actual substrings. | |
41 // Used internally by replace regexp with function. | |
42 // The array has the format of an "apply" argument for a replacement | |
43 // function. | |
44 $regexpLastMatchInfoOverride = null; | |
45 | |
46 // ------------------------------------------------------------------- | |
47 | |
48 // A recursive descent parser for Patterns according to the grammar of | |
49 // ECMA-262 15.10.1, with deviations noted below. | |
50 function DoConstructRegExp(object, pattern, flags) { | |
51 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. | |
52 if (IS_REGEXP(pattern)) { | |
53 if (!IS_UNDEFINED(flags)) throw MakeTypeError(kRegExpFlags); | |
54 flags = (pattern.global ? 'g' : '') | |
55 + (pattern.ignoreCase ? 'i' : '') | |
56 + (pattern.multiline ? 'm' : ''); | |
57 if (FLAG_harmony_unicode_regexps) | |
58 flags += (pattern.unicode ? 'u' : ''); | |
59 if (FLAG_harmony_regexps) | |
60 flags += (pattern.sticky ? 'y' : ''); | |
61 pattern = pattern.source; | |
62 } | |
63 | |
64 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern); | |
65 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags); | |
66 | |
67 %RegExpInitializeAndCompile(object, pattern, flags); | |
68 } | |
69 | |
70 | |
71 function RegExpConstructor(pattern, flags) { | |
72 if (%_IsConstructCall()) { | |
73 DoConstructRegExp(this, pattern, flags); | |
74 } else { | |
75 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. | |
76 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { | |
77 return pattern; | |
78 } | |
79 return new GlobalRegExp(pattern, flags); | |
80 } | |
81 } | |
82 | |
83 // Deprecated RegExp.prototype.compile method. We behave like the constructor | |
84 // were called again. In SpiderMonkey, this method returns the regexp object. | |
85 // In JSC, it returns undefined. For compatibility with JSC, we match their | |
86 // behavior. | |
87 function RegExpCompileJS(pattern, flags) { | |
88 // Both JSC and SpiderMonkey treat a missing pattern argument as the | |
89 // empty subject string, and an actual undefined value passed as the | |
90 // pattern as the string 'undefined'. Note that JSC is inconsistent | |
91 // here, treating undefined values differently in | |
92 // RegExp.prototype.compile and in the constructor, where they are | |
93 // the empty string. For compatibility with JSC, we match their | |
94 // behavior. | |
95 if (this == GlobalRegExp.prototype) { | |
96 // We don't allow recompiling RegExp.prototype. | |
97 throw MakeTypeError(kIncompatibleMethodReceiver, | |
98 'RegExp.prototype.compile', this); | |
99 } | |
100 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { | |
101 DoConstructRegExp(this, 'undefined', flags); | |
102 } else { | |
103 DoConstructRegExp(this, pattern, flags); | |
104 } | |
105 } | |
106 | |
107 | |
108 function DoRegExpExec(regexp, string, index) { | |
109 var result = %_RegExpExec(regexp, string, index, RegExpLastMatchInfo); | |
110 if (result !== null) $regexpLastMatchInfoOverride = null; | |
111 return result; | |
112 } | |
113 | |
114 | |
115 // This is kind of performance sensitive, so we want to avoid unnecessary | |
116 // type checks on inputs. But we also don't want to inline it several times | |
117 // manually, so we use a macro :-) | |
118 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) | |
119 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; | |
120 var start = MATCHINFO[CAPTURE0]; | |
121 var end = MATCHINFO[CAPTURE1]; | |
122 // Calculate the substring of the first match before creating the result array | |
123 // to avoid an unnecessary write barrier storing the first result. | |
124 var first = %_SubString(STRING, start, end); | |
125 var result = %_RegExpConstructResult(numResults, start, STRING); | |
126 result[0] = first; | |
127 if (numResults == 1) return result; | |
128 var j = REGEXP_FIRST_CAPTURE + 2; | |
129 for (var i = 1; i < numResults; i++) { | |
130 start = MATCHINFO[j++]; | |
131 if (start != -1) { | |
132 end = MATCHINFO[j]; | |
133 result[i] = %_SubString(STRING, start, end); | |
134 } | |
135 j++; | |
136 } | |
137 return result; | |
138 endmacro | |
139 | |
140 | |
141 function RegExpExecNoTests(regexp, string, start) { | |
142 // Must be called with RegExp, string and positive integer as arguments. | |
143 var matchInfo = %_RegExpExec(regexp, string, start, RegExpLastMatchInfo); | |
144 if (matchInfo !== null) { | |
145 $regexpLastMatchInfoOverride = null; | |
146 // ES6 21.2.5.2.2 step 18. | |
147 if (FLAG_harmony_regexps && regexp.sticky) { | |
148 regexp.lastIndex = matchInfo[CAPTURE1]; | |
149 } | |
150 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string); | |
151 } | |
152 regexp.lastIndex = 0; | |
153 return null; | |
154 } | |
155 | |
156 | |
157 function RegExpExecJS(string) { | |
158 if (!IS_REGEXP(this)) { | |
159 throw MakeTypeError(kIncompatibleMethodReceiver, | |
160 'RegExp.prototype.exec', this); | |
161 } | |
162 | |
163 string = TO_STRING(string); | |
164 var lastIndex = this.lastIndex; | |
165 | |
166 // Conversion is required by the ES5 specification (RegExp.prototype.exec | |
167 // algorithm, step 5) even if the value is discarded for non-global RegExps. | |
168 var i = TO_INTEGER(lastIndex); | |
169 | |
170 var updateLastIndex = this.global || (FLAG_harmony_regexps && this.sticky); | |
171 if (updateLastIndex) { | |
172 if (i < 0 || i > string.length) { | |
173 this.lastIndex = 0; | |
174 return null; | |
175 } | |
176 } else { | |
177 i = 0; | |
178 } | |
179 | |
180 // matchIndices is either null or the RegExpLastMatchInfo array. | |
181 var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo); | |
182 | |
183 if (IS_NULL(matchIndices)) { | |
184 this.lastIndex = 0; | |
185 return null; | |
186 } | |
187 | |
188 // Successful match. | |
189 $regexpLastMatchInfoOverride = null; | |
190 if (updateLastIndex) { | |
191 this.lastIndex = RegExpLastMatchInfo[CAPTURE1]; | |
192 } | |
193 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string); | |
194 } | |
195 | |
196 | |
197 // One-element cache for the simplified test regexp. | |
198 var regexp_key; | |
199 var regexp_val; | |
200 | |
201 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be | |
202 // that test is defined in terms of String.prototype.exec. However, it probably | |
203 // means the original value of String.prototype.exec, which is what everybody | |
204 // else implements. | |
205 function RegExpTest(string) { | |
206 if (!IS_REGEXP(this)) { | |
207 throw MakeTypeError(kIncompatibleMethodReceiver, | |
208 'RegExp.prototype.test', this); | |
209 } | |
210 string = TO_STRING(string); | |
211 | |
212 var lastIndex = this.lastIndex; | |
213 | |
214 // Conversion is required by the ES5 specification (RegExp.prototype.exec | |
215 // algorithm, step 5) even if the value is discarded for non-global RegExps. | |
216 var i = TO_INTEGER(lastIndex); | |
217 | |
218 if (this.global || (FLAG_harmony_regexps && this.sticky)) { | |
219 if (i < 0 || i > string.length) { | |
220 this.lastIndex = 0; | |
221 return false; | |
222 } | |
223 // matchIndices is either null or the RegExpLastMatchInfo array. | |
224 var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo); | |
225 if (IS_NULL(matchIndices)) { | |
226 this.lastIndex = 0; | |
227 return false; | |
228 } | |
229 $regexpLastMatchInfoOverride = null; | |
230 this.lastIndex = RegExpLastMatchInfo[CAPTURE1]; | |
231 return true; | |
232 } else { | |
233 // Non-global, non-sticky regexp. | |
234 // Remove irrelevant preceeding '.*' in a test regexp. The expression | |
235 // checks whether this.source starts with '.*' and that the third char is | |
236 // not a '?'. But see https://code.google.com/p/v8/issues/detail?id=3560 | |
237 var regexp = this; | |
238 if (regexp.source.length >= 3 && | |
239 %_StringCharCodeAt(regexp.source, 0) == 46 && // '.' | |
240 %_StringCharCodeAt(regexp.source, 1) == 42 && // '*' | |
241 %_StringCharCodeAt(regexp.source, 2) != 63) { // '?' | |
242 regexp = TrimRegExp(regexp); | |
243 } | |
244 // matchIndices is either null or the RegExpLastMatchInfo array. | |
245 var matchIndices = %_RegExpExec(regexp, string, 0, RegExpLastMatchInfo); | |
246 if (IS_NULL(matchIndices)) { | |
247 this.lastIndex = 0; | |
248 return false; | |
249 } | |
250 $regexpLastMatchInfoOverride = null; | |
251 return true; | |
252 } | |
253 } | |
254 | |
255 function TrimRegExp(regexp) { | |
256 if (!%_ObjectEquals(regexp_key, regexp)) { | |
257 regexp_key = regexp; | |
258 regexp_val = | |
259 new GlobalRegExp(%_SubString(regexp.source, 2, regexp.source.length), | |
260 (regexp.ignoreCase ? regexp.multiline ? "im" : "i" | |
261 : regexp.multiline ? "m" : "")); | |
262 } | |
263 return regexp_val; | |
264 } | |
265 | |
266 | |
267 function RegExpToString() { | |
268 if (!IS_REGEXP(this)) { | |
269 throw MakeTypeError(kIncompatibleMethodReceiver, | |
270 'RegExp.prototype.toString', this); | |
271 } | |
272 var result = '/' + this.source + '/'; | |
273 if (this.global) result += 'g'; | |
274 if (this.ignoreCase) result += 'i'; | |
275 if (this.multiline) result += 'm'; | |
276 if (FLAG_harmony_unicode_regexps && this.unicode) result += 'u'; | |
277 if (FLAG_harmony_regexps && this.sticky) result += 'y'; | |
278 return result; | |
279 } | |
280 | |
281 | |
282 // Getters for the static properties lastMatch, lastParen, leftContext, and | |
283 // rightContext of the RegExp constructor. The properties are computed based | |
284 // on the captures array of the last successful match and the subject string | |
285 // of the last successful match. | |
286 function RegExpGetLastMatch() { | |
287 if ($regexpLastMatchInfoOverride !== null) { | |
288 return OVERRIDE_MATCH($regexpLastMatchInfoOverride); | |
289 } | |
290 var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo); | |
291 return %_SubString(regExpSubject, | |
292 RegExpLastMatchInfo[CAPTURE0], | |
293 RegExpLastMatchInfo[CAPTURE1]); | |
294 } | |
295 | |
296 | |
297 function RegExpGetLastParen() { | |
298 if ($regexpLastMatchInfoOverride) { | |
299 var override = $regexpLastMatchInfoOverride; | |
300 if (override.length <= 3) return ''; | |
301 return override[override.length - 3]; | |
302 } | |
303 var length = NUMBER_OF_CAPTURES(RegExpLastMatchInfo); | |
304 if (length <= 2) return ''; // There were no captures. | |
305 // We match the SpiderMonkey behavior: return the substring defined by the | |
306 // last pair (after the first pair) of elements of the capture array even if | |
307 // it is empty. | |
308 var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo); | |
309 var start = RegExpLastMatchInfo[CAPTURE(length - 2)]; | |
310 var end = RegExpLastMatchInfo[CAPTURE(length - 1)]; | |
311 if (start != -1 && end != -1) { | |
312 return %_SubString(regExpSubject, start, end); | |
313 } | |
314 return ""; | |
315 } | |
316 | |
317 | |
318 function RegExpGetLeftContext() { | |
319 var start_index; | |
320 var subject; | |
321 if (!$regexpLastMatchInfoOverride) { | |
322 start_index = RegExpLastMatchInfo[CAPTURE0]; | |
323 subject = LAST_SUBJECT(RegExpLastMatchInfo); | |
324 } else { | |
325 var override = $regexpLastMatchInfoOverride; | |
326 start_index = OVERRIDE_POS(override); | |
327 subject = OVERRIDE_SUBJECT(override); | |
328 } | |
329 return %_SubString(subject, 0, start_index); | |
330 } | |
331 | |
332 | |
333 function RegExpGetRightContext() { | |
334 var start_index; | |
335 var subject; | |
336 if (!$regexpLastMatchInfoOverride) { | |
337 start_index = RegExpLastMatchInfo[CAPTURE1]; | |
338 subject = LAST_SUBJECT(RegExpLastMatchInfo); | |
339 } else { | |
340 var override = $regexpLastMatchInfoOverride; | |
341 subject = OVERRIDE_SUBJECT(override); | |
342 var match = OVERRIDE_MATCH(override); | |
343 start_index = OVERRIDE_POS(override) + match.length; | |
344 } | |
345 return %_SubString(subject, start_index, subject.length); | |
346 } | |
347 | |
348 | |
349 // The properties $1..$9 are the first nine capturing substrings of the last | |
350 // successful match, or ''. The function RegExpMakeCaptureGetter will be | |
351 // called with indices from 1 to 9. | |
352 function RegExpMakeCaptureGetter(n) { | |
353 return function foo() { | |
354 if ($regexpLastMatchInfoOverride) { | |
355 if (n < $regexpLastMatchInfoOverride.length - 2) { | |
356 return OVERRIDE_CAPTURE($regexpLastMatchInfoOverride, n); | |
357 } | |
358 return ''; | |
359 } | |
360 var index = n * 2; | |
361 if (index >= NUMBER_OF_CAPTURES(RegExpLastMatchInfo)) return ''; | |
362 var matchStart = RegExpLastMatchInfo[CAPTURE(index)]; | |
363 var matchEnd = RegExpLastMatchInfo[CAPTURE(index + 1)]; | |
364 if (matchStart == -1 || matchEnd == -1) return ''; | |
365 return %_SubString(LAST_SUBJECT(RegExpLastMatchInfo), matchStart, matchEnd); | |
366 }; | |
367 } | |
368 | |
369 // ------------------------------------------------------------------- | |
370 | |
371 %FunctionSetInstanceClassName(GlobalRegExp, 'RegExp'); | |
372 %AddNamedProperty( | |
373 GlobalRegExp.prototype, 'constructor', GlobalRegExp, DONT_ENUM); | |
374 %SetCode(GlobalRegExp, RegExpConstructor); | |
375 | |
376 utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [ | |
377 "exec", RegExpExecJS, | |
378 "test", RegExpTest, | |
379 "toString", RegExpToString, | |
380 "compile", RegExpCompileJS | |
381 ]); | |
382 | |
383 // The length of compile is 1 in SpiderMonkey. | |
384 %FunctionSetLength(GlobalRegExp.prototype.compile, 1); | |
385 | |
386 // The properties `input` and `$_` are aliases for each other. When this | |
387 // value is set the value it is set to is coerced to a string. | |
388 // Getter and setter for the input. | |
389 var RegExpGetInput = function() { | |
390 var regExpInput = LAST_INPUT(RegExpLastMatchInfo); | |
391 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; | |
392 }; | |
393 var RegExpSetInput = function(string) { | |
394 LAST_INPUT(RegExpLastMatchInfo) = TO_STRING(string); | |
395 }; | |
396 | |
397 %OptimizeObjectForAddingMultipleProperties(GlobalRegExp, 22); | |
398 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'input', RegExpGetInput, | |
399 RegExpSetInput, DONT_DELETE); | |
400 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$_', RegExpGetInput, | |
401 RegExpSetInput, DONT_ENUM | DONT_DELETE); | |
402 | |
403 // The properties multiline and $* are aliases for each other. When this | |
404 // value is set in SpiderMonkey, the value it is set to is coerced to a | |
405 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey | |
406 // the value of the expression 'RegExp.multiline = null' (for instance) is the | |
407 // boolean false (i.e., the value after coercion), while in V8 it is the value | |
408 // null (i.e., the value before coercion). | |
409 | |
410 // Getter and setter for multiline. | |
411 var multiline = false; | |
412 var RegExpGetMultiline = function() { return multiline; }; | |
413 var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; }; | |
414 | |
415 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'multiline', RegExpGetMultiline, | |
416 RegExpSetMultiline, DONT_DELETE); | |
417 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$*', RegExpGetMultiline, | |
418 RegExpSetMultiline, | |
419 DONT_ENUM | DONT_DELETE); | |
420 | |
421 | |
422 var NoOpSetter = function(ignored) {}; | |
423 | |
424 | |
425 // Static properties set by a successful match. | |
426 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'lastMatch', RegExpGetLastMatch, | |
427 NoOpSetter, DONT_DELETE); | |
428 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$&', RegExpGetLastMatch, | |
429 NoOpSetter, DONT_ENUM | DONT_DELETE); | |
430 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'lastParen', RegExpGetLastParen, | |
431 NoOpSetter, DONT_DELETE); | |
432 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$+', RegExpGetLastParen, | |
433 NoOpSetter, DONT_ENUM | DONT_DELETE); | |
434 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'leftContext', | |
435 RegExpGetLeftContext, NoOpSetter, | |
436 DONT_DELETE); | |
437 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$`', RegExpGetLeftContext, | |
438 NoOpSetter, DONT_ENUM | DONT_DELETE); | |
439 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'rightContext', | |
440 RegExpGetRightContext, NoOpSetter, | |
441 DONT_DELETE); | |
442 %DefineAccessorPropertyUnchecked(GlobalRegExp, "$'", RegExpGetRightContext, | |
443 NoOpSetter, DONT_ENUM | DONT_DELETE); | |
444 | |
445 for (var i = 1; i < 10; ++i) { | |
446 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$' + i, | |
447 RegExpMakeCaptureGetter(i), NoOpSetter, | |
448 DONT_DELETE); | |
449 } | |
450 %ToFastProperties(GlobalRegExp); | |
451 | |
452 // ------------------------------------------------------------------- | |
453 // Exports | |
454 | |
455 utils.Export(function(to) { | |
456 to.RegExpExec = DoRegExpExec; | |
457 to.RegExpExecNoTests = RegExpExecNoTests; | |
458 to.RegExpLastMatchInfo = RegExpLastMatchInfo; | |
459 to.RegExpTest = RegExpTest; | |
460 }); | |
461 | |
462 }) | |
OLD | NEW |