OLD | NEW |
| (Empty) |
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | |
2 // Redistribution and use in source and binary forms, with or without | |
3 // modification, are permitted provided that the following conditions are | |
4 // met: | |
5 // | |
6 // * Redistributions of source code must retain the above copyright | |
7 // notice, this list of conditions and the following disclaimer. | |
8 // * Redistributions in binary form must reproduce the above | |
9 // copyright notice, this list of conditions and the following | |
10 // disclaimer in the documentation and/or other materials provided | |
11 // with the distribution. | |
12 // * Neither the name of Google Inc. nor the names of its | |
13 // contributors may be used to endorse or promote products derived | |
14 // from this software without specific prior written permission. | |
15 // | |
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | |
17 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | |
18 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | |
19 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | |
20 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | |
21 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | |
22 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | |
23 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
24 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | |
25 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
26 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
27 | |
28 // Expect $Object = global.Object; | |
29 // Expect $Array = global.Array; | |
30 | |
31 const $RegExp = global.RegExp; | |
32 | |
33 // A recursive descent parser for Patterns according to the grammar of | |
34 // ECMA-262 15.10.1, with deviations noted below. | |
35 function DoConstructRegExp(object, pattern, flags, isConstructorCall) { | |
36 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. | |
37 if (IS_REGEXP(pattern)) { | |
38 if (!IS_UNDEFINED(flags)) { | |
39 throw MakeTypeError('regexp_flags', []); | |
40 } | |
41 flags = (pattern.global ? 'g' : '') | |
42 + (pattern.ignoreCase ? 'i' : '') | |
43 + (pattern.multiline ? 'm' : ''); | |
44 pattern = pattern.source; | |
45 } | |
46 | |
47 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); | |
48 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); | |
49 | |
50 var global = false; | |
51 var ignoreCase = false; | |
52 var multiline = false; | |
53 | |
54 for (var i = 0; i < flags.length; i++) { | |
55 var c = StringCharAt.call(flags, i); | |
56 switch (c) { | |
57 case 'g': | |
58 // Allow duplicate flags to be consistent with JSC and others. | |
59 global = true; | |
60 break; | |
61 case 'i': | |
62 ignoreCase = true; | |
63 break; | |
64 case 'm': | |
65 multiline = true; | |
66 break; | |
67 default: | |
68 // Ignore flags that have no meaning to be consistent with | |
69 // JSC. | |
70 break; | |
71 } | |
72 } | |
73 | |
74 if (isConstructorCall) { | |
75 // ECMA-262, section 15.10.7.1. | |
76 %SetProperty(object, 'source', pattern, | |
77 DONT_DELETE | READ_ONLY | DONT_ENUM); | |
78 | |
79 // ECMA-262, section 15.10.7.2. | |
80 %SetProperty(object, 'global', global, DONT_DELETE | READ_ONLY | DONT_ENUM); | |
81 | |
82 // ECMA-262, section 15.10.7.3. | |
83 %SetProperty(object, 'ignoreCase', ignoreCase, | |
84 DONT_DELETE | READ_ONLY | DONT_ENUM); | |
85 | |
86 // ECMA-262, section 15.10.7.4. | |
87 %SetProperty(object, 'multiline', multiline, | |
88 DONT_DELETE | READ_ONLY | DONT_ENUM); | |
89 | |
90 // ECMA-262, section 15.10.7.5. | |
91 %SetProperty(object, 'lastIndex', 0, DONT_DELETE | DONT_ENUM); | |
92 } else { // RegExp is being recompiled via RegExp.prototype.compile. | |
93 %IgnoreAttributesAndSetProperty(object, 'source', pattern); | |
94 %IgnoreAttributesAndSetProperty(object, 'global', global); | |
95 %IgnoreAttributesAndSetProperty(object, 'ignoreCase', ignoreCase); | |
96 %IgnoreAttributesAndSetProperty(object, 'multiline', multiline); | |
97 %IgnoreAttributesAndSetProperty(object, 'lastIndex', 0); | |
98 regExpCache.type = 'none'; | |
99 } | |
100 | |
101 // Call internal function to compile the pattern. | |
102 %RegExpCompile(object, pattern, flags); | |
103 } | |
104 | |
105 | |
106 function RegExpConstructor(pattern, flags) { | |
107 if (%_IsConstructCall()) { | |
108 DoConstructRegExp(this, pattern, flags, true); | |
109 } else { | |
110 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. | |
111 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { | |
112 return pattern; | |
113 } | |
114 return new $RegExp(pattern, flags); | |
115 } | |
116 } | |
117 | |
118 | |
119 // Deprecated RegExp.prototype.compile method. We behave like the constructor | |
120 // were called again. In SpiderMonkey, this method returns the regexp object. | |
121 // In JSC, it returns undefined. For compatibility with JSC, we match their | |
122 // behavior. | |
123 function CompileRegExp(pattern, flags) { | |
124 // Both JSC and SpiderMonkey treat a missing pattern argument as the | |
125 // empty subject string, and an actual undefined value passed as the | |
126 // pattern as the string 'undefined'. Note that JSC is inconsistent | |
127 // here, treating undefined values differently in | |
128 // RegExp.prototype.compile and in the constructor, where they are | |
129 // the empty string. For compatibility with JSC, we match their | |
130 // behavior. | |
131 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { | |
132 DoConstructRegExp(this, 'undefined', flags, false); | |
133 } else { | |
134 DoConstructRegExp(this, pattern, flags, false); | |
135 } | |
136 } | |
137 | |
138 | |
139 function DoRegExpExec(regexp, string, index) { | |
140 return %_RegExpExec(regexp, string, index, lastMatchInfo); | |
141 } | |
142 | |
143 | |
144 function RegExpCache() { | |
145 this.type = 'none'; | |
146 this.regExp = 0; | |
147 this.subject = 0; | |
148 this.replaceString = 0; | |
149 this.lastIndex = 0; | |
150 this.answer = 0; | |
151 } | |
152 | |
153 | |
154 var regExpCache = new RegExpCache(); | |
155 | |
156 | |
157 function CloneRegexpAnswer(array) { | |
158 var len = array.length; | |
159 var answer = new $Array(len); | |
160 for (var i = 0; i < len; i++) { | |
161 answer[i] = array[i]; | |
162 } | |
163 answer.index = array.index; | |
164 answer.input = array.input; | |
165 return answer; | |
166 } | |
167 | |
168 | |
169 function RegExpExec(string) { | |
170 if (!IS_REGEXP(this)) { | |
171 throw MakeTypeError('incompatible_method_receiver', | |
172 ['RegExp.prototype.exec', this]); | |
173 } | |
174 | |
175 var cache = regExpCache; | |
176 | |
177 if (%_ObjectEquals(cache.type, 'exec') && | |
178 %_ObjectEquals(cache.lastIndex, this.lastIndex) && | |
179 %_ObjectEquals(cache.regExp, this) && | |
180 %_ObjectEquals(cache.subject, string)) { | |
181 var last = cache.answer; | |
182 if (last == null) { | |
183 return last; | |
184 } else { | |
185 return CloneRegexpAnswer(last); | |
186 } | |
187 } | |
188 | |
189 if (%_ArgumentsLength() == 0) { | |
190 var regExpInput = LAST_INPUT(lastMatchInfo); | |
191 if (IS_UNDEFINED(regExpInput)) { | |
192 throw MakeError('no_input_to_regexp', [this]); | |
193 } | |
194 string = regExpInput; | |
195 } | |
196 var s; | |
197 if (IS_STRING(string)) { | |
198 s = string; | |
199 } else { | |
200 s = ToString(string); | |
201 } | |
202 var lastIndex = this.lastIndex; | |
203 | |
204 var i = this.global ? TO_INTEGER(lastIndex) : 0; | |
205 | |
206 if (i < 0 || i > s.length) { | |
207 this.lastIndex = 0; | |
208 return null; | |
209 } | |
210 | |
211 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); | |
212 // matchIndices is either null or the lastMatchInfo array. | |
213 var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo); | |
214 | |
215 if (matchIndices == null) { | |
216 if (this.global) this.lastIndex = 0; | |
217 cache.lastIndex = lastIndex; | |
218 cache.regExp = this; | |
219 cache.subject = s; | |
220 cache.answer = matchIndices; // Null. | |
221 cache.type = 'exec'; | |
222 return matchIndices; // No match. | |
223 } | |
224 | |
225 var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; | |
226 var result; | |
227 if (numResults === 1) { | |
228 var matchStart = lastMatchInfo[CAPTURE(0)]; | |
229 var matchEnd = lastMatchInfo[CAPTURE(1)]; | |
230 result = [SubString(s, matchStart, matchEnd)]; | |
231 } else { | |
232 result = new $Array(numResults); | |
233 for (var i = 0; i < numResults; i++) { | |
234 var matchStart = lastMatchInfo[CAPTURE(i << 1)]; | |
235 var matchEnd = lastMatchInfo[CAPTURE((i << 1) + 1)]; | |
236 if (matchStart != -1 && matchEnd != -1) { | |
237 result[i] = SubString(s, matchStart, matchEnd); | |
238 } else { | |
239 // Make sure the element is present. Avoid reading the undefined | |
240 // property from the global object since this may change. | |
241 result[i] = void 0; | |
242 } | |
243 } | |
244 } | |
245 | |
246 result.index = lastMatchInfo[CAPTURE0]; | |
247 result.input = s; | |
248 if (this.global) { | |
249 this.lastIndex = lastMatchInfo[CAPTURE1]; | |
250 return result; | |
251 } else { | |
252 cache.regExp = this; | |
253 cache.subject = s; | |
254 cache.lastIndex = lastIndex; | |
255 cache.answer = result; | |
256 cache.type = 'exec'; | |
257 return CloneRegexpAnswer(result); | |
258 } | |
259 } | |
260 | |
261 | |
262 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be | |
263 // that test is defined in terms of String.prototype.exec. However, it probably | |
264 // means the original value of String.prototype.exec, which is what everybody | |
265 // else implements. | |
266 function RegExpTest(string) { | |
267 if (!IS_REGEXP(this)) { | |
268 throw MakeTypeError('incompatible_method_receiver', | |
269 ['RegExp.prototype.test', this]); | |
270 } | |
271 if (%_ArgumentsLength() == 0) { | |
272 var regExpInput = LAST_INPUT(lastMatchInfo); | |
273 if (IS_UNDEFINED(regExpInput)) { | |
274 throw MakeError('no_input_to_regexp', [this]); | |
275 } | |
276 string = regExpInput; | |
277 } | |
278 var s; | |
279 if (IS_STRING(string)) { | |
280 s = string; | |
281 } else { | |
282 s = ToString(string); | |
283 } | |
284 | |
285 var lastIndex = this.lastIndex; | |
286 | |
287 var cache = regExpCache; | |
288 | |
289 if (%_ObjectEquals(cache.type, 'test') && | |
290 %_ObjectEquals(cache.regExp, this) && | |
291 %_ObjectEquals(cache.subject, string) && | |
292 %_ObjectEquals(cache.lastIndex, lastIndex)) { | |
293 return cache.answer; | |
294 } | |
295 | |
296 var length = s.length; | |
297 var i = this.global ? TO_INTEGER(lastIndex) : 0; | |
298 | |
299 cache.type = 'test'; | |
300 cache.regExp = this; | |
301 cache.subject = s; | |
302 cache.lastIndex = i; | |
303 | |
304 if (i < 0 || i > s.length) { | |
305 this.lastIndex = 0; | |
306 cache.answer = false; | |
307 return false; | |
308 } | |
309 | |
310 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); | |
311 // matchIndices is either null or the lastMatchInfo array. | |
312 var matchIndices = %_RegExpExec(this, s, i, lastMatchInfo); | |
313 | |
314 if (matchIndices == null) { | |
315 if (this.global) this.lastIndex = 0; | |
316 cache.answer = false; | |
317 return false; | |
318 } | |
319 | |
320 if (this.global) this.lastIndex = lastMatchInfo[CAPTURE1]; | |
321 cache.answer = true; | |
322 return true; | |
323 } | |
324 | |
325 | |
326 function RegExpToString() { | |
327 // If this.source is an empty string, output /(?:)/. | |
328 // http://bugzilla.mozilla.org/show_bug.cgi?id=225550 | |
329 // ecma_2/RegExp/properties-001.js. | |
330 var src = this.source ? this.source : '(?:)'; | |
331 var result = '/' + src + '/'; | |
332 if (this.global) | |
333 result += 'g'; | |
334 if (this.ignoreCase) | |
335 result += 'i'; | |
336 if (this.multiline) | |
337 result += 'm'; | |
338 return result; | |
339 } | |
340 | |
341 | |
342 // Getters for the static properties lastMatch, lastParen, leftContext, and | |
343 // rightContext of the RegExp constructor. The properties are computed based | |
344 // on the captures array of the last successful match and the subject string | |
345 // of the last successful match. | |
346 function RegExpGetLastMatch() { | |
347 var regExpSubject = LAST_SUBJECT(lastMatchInfo); | |
348 return SubString(regExpSubject, | |
349 lastMatchInfo[CAPTURE0], | |
350 lastMatchInfo[CAPTURE1]); | |
351 } | |
352 | |
353 | |
354 function RegExpGetLastParen() { | |
355 var length = NUMBER_OF_CAPTURES(lastMatchInfo); | |
356 if (length <= 2) return ''; // There were no captures. | |
357 // We match the SpiderMonkey behavior: return the substring defined by the | |
358 // last pair (after the first pair) of elements of the capture array even if | |
359 // it is empty. | |
360 var regExpSubject = LAST_SUBJECT(lastMatchInfo); | |
361 var start = lastMatchInfo[CAPTURE(length - 2)]; | |
362 var end = lastMatchInfo[CAPTURE(length - 1)]; | |
363 if (start != -1 && end != -1) { | |
364 return SubString(regExpSubject, start, end); | |
365 } | |
366 return ""; | |
367 } | |
368 | |
369 | |
370 function RegExpGetLeftContext() { | |
371 return SubString(LAST_SUBJECT(lastMatchInfo), | |
372 0, | |
373 lastMatchInfo[CAPTURE0]); | |
374 } | |
375 | |
376 | |
377 function RegExpGetRightContext() { | |
378 var subject = LAST_SUBJECT(lastMatchInfo); | |
379 return SubString(subject, | |
380 lastMatchInfo[CAPTURE1], | |
381 subject.length); | |
382 } | |
383 | |
384 | |
385 // The properties $1..$9 are the first nine capturing substrings of the last | |
386 // successful match, or ''. The function RegExpMakeCaptureGetter will be | |
387 // called with indices from 1 to 9. | |
388 function RegExpMakeCaptureGetter(n) { | |
389 return function() { | |
390 var index = n * 2; | |
391 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; | |
392 var matchStart = lastMatchInfo[CAPTURE(index)]; | |
393 var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; | |
394 if (matchStart == -1 || matchEnd == -1) return ''; | |
395 return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); | |
396 }; | |
397 } | |
398 | |
399 | |
400 // Property of the builtins object for recording the result of the last | |
401 // regexp match. The property lastMatchInfo includes the matchIndices | |
402 // array of the last successful regexp match (an array of start/end index | |
403 // pairs for the match and all the captured substrings), the invariant is | |
404 // that there are at least two capture indeces. The array also contains | |
405 // the subject string for the last successful match. | |
406 var lastMatchInfo = [ | |
407 2, // REGEXP_NUMBER_OF_CAPTURES | |
408 "", // Last subject. | |
409 void 0, // Last input - settable with RegExpSetInput. | |
410 0, // REGEXP_FIRST_CAPTURE + 0 | |
411 0, // REGEXP_FIRST_CAPTURE + 1 | |
412 ]; | |
413 | |
414 // ------------------------------------------------------------------- | |
415 | |
416 function SetupRegExp() { | |
417 %FunctionSetInstanceClassName($RegExp, 'RegExp'); | |
418 %FunctionSetPrototype($RegExp, new $Object()); | |
419 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); | |
420 %SetCode($RegExp, RegExpConstructor); | |
421 | |
422 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( | |
423 "exec", RegExpExec, | |
424 "test", RegExpTest, | |
425 "toString", RegExpToString, | |
426 "compile", CompileRegExp | |
427 )); | |
428 | |
429 // The length of compile is 1 in SpiderMonkey. | |
430 %FunctionSetLength($RegExp.prototype.compile, 1); | |
431 | |
432 // The properties input, $input, and $_ are aliases for each other. When this | |
433 // value is set the value it is set to is coerced to a string. | |
434 // Getter and setter for the input. | |
435 function RegExpGetInput() { | |
436 var regExpInput = LAST_INPUT(lastMatchInfo); | |
437 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; | |
438 } | |
439 function RegExpSetInput(string) { | |
440 regExpCache.type = 'none'; | |
441 LAST_INPUT(lastMatchInfo) = ToString(string); | |
442 }; | |
443 | |
444 %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE); | |
445 %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE); | |
446 %DefineAccessor($RegExp, '$_', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE
); | |
447 %DefineAccessor($RegExp, '$_', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE
); | |
448 %DefineAccessor($RegExp, '$input', GETTER, RegExpGetInput, DONT_ENUM | DONT_DE
LETE); | |
449 %DefineAccessor($RegExp, '$input', SETTER, RegExpSetInput, DONT_ENUM | DONT_DE
LETE); | |
450 | |
451 // The properties multiline and $* are aliases for each other. When this | |
452 // value is set in SpiderMonkey, the value it is set to is coerced to a | |
453 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey | |
454 // the value of the expression 'RegExp.multiline = null' (for instance) is the | |
455 // boolean false (ie, the value after coercion), while in V8 it is the value | |
456 // null (ie, the value before coercion). | |
457 | |
458 // Getter and setter for multiline. | |
459 var multiline = false; | |
460 function RegExpGetMultiline() { return multiline; }; | |
461 function RegExpSetMultiline(flag) { multiline = flag ? true : false; }; | |
462 | |
463 %DefineAccessor($RegExp, 'multiline', GETTER, RegExpGetMultiline, DONT_DELETE)
; | |
464 %DefineAccessor($RegExp, 'multiline', SETTER, RegExpSetMultiline, DONT_DELETE)
; | |
465 %DefineAccessor($RegExp, '$*', GETTER, RegExpGetMultiline, DONT_ENUM | DONT_DE
LETE); | |
466 %DefineAccessor($RegExp, '$*', SETTER, RegExpSetMultiline, DONT_ENUM | DONT_DE
LETE); | |
467 | |
468 | |
469 function NoOpSetter(ignored) {} | |
470 | |
471 | |
472 // Static properties set by a successful match. | |
473 %DefineAccessor($RegExp, 'lastMatch', GETTER, RegExpGetLastMatch, DONT_DELETE)
; | |
474 %DefineAccessor($RegExp, 'lastMatch', SETTER, NoOpSetter, DONT_DELETE); | |
475 %DefineAccessor($RegExp, '$&', GETTER, RegExpGetLastMatch, DONT_ENUM | DONT_DE
LETE); | |
476 %DefineAccessor($RegExp, '$&', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); | |
477 %DefineAccessor($RegExp, 'lastParen', GETTER, RegExpGetLastParen, DONT_DELETE)
; | |
478 %DefineAccessor($RegExp, 'lastParen', SETTER, NoOpSetter, DONT_DELETE); | |
479 %DefineAccessor($RegExp, '$+', GETTER, RegExpGetLastParen, DONT_ENUM | DONT_DE
LETE); | |
480 %DefineAccessor($RegExp, '$+', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); | |
481 %DefineAccessor($RegExp, 'leftContext', GETTER, RegExpGetLeftContext, DONT_DEL
ETE); | |
482 %DefineAccessor($RegExp, 'leftContext', SETTER, NoOpSetter, DONT_DELETE); | |
483 %DefineAccessor($RegExp, '$`', GETTER, RegExpGetLeftContext, DONT_ENUM | DONT_
DELETE); | |
484 %DefineAccessor($RegExp, '$`', SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); | |
485 %DefineAccessor($RegExp, 'rightContext', GETTER, RegExpGetRightContext, DONT_D
ELETE); | |
486 %DefineAccessor($RegExp, 'rightContext', SETTER, NoOpSetter, DONT_DELETE); | |
487 %DefineAccessor($RegExp, "$'", GETTER, RegExpGetRightContext, DONT_ENUM | DONT
_DELETE); | |
488 %DefineAccessor($RegExp, "$'", SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); | |
489 | |
490 for (var i = 1; i < 10; ++i) { | |
491 %DefineAccessor($RegExp, '$' + i, GETTER, RegExpMakeCaptureGetter(i), DONT_D
ELETE); | |
492 %DefineAccessor($RegExp, '$' + i, SETTER, NoOpSetter, DONT_DELETE); | |
493 } | |
494 } | |
495 | |
496 | |
497 SetupRegExp(); | |
OLD | NEW |