OLD | NEW |
1 // Copyright 2006-2009 the V8 project authors. All rights reserved. | 1 // Copyright 2006-2008 the V8 project authors. All rights reserved. |
2 // Redistribution and use in source and binary forms, with or without | 2 // Redistribution and use in source and binary forms, with or without |
3 // modification, are permitted provided that the following conditions are | 3 // modification, are permitted provided that the following conditions are |
4 // met: | 4 // met: |
5 // | 5 // |
6 // * Redistributions of source code must retain the above copyright | 6 // * Redistributions of source code must retain the above copyright |
7 // notice, this list of conditions and the following disclaimer. | 7 // notice, this list of conditions and the following disclaimer. |
8 // * Redistributions in binary form must reproduce the above | 8 // * Redistributions in binary form must reproduce the above |
9 // copyright notice, this list of conditions and the following | 9 // copyright notice, this list of conditions and the following |
10 // disclaimer in the documentation and/or other materials provided | 10 // disclaimer in the documentation and/or other materials provided |
11 // with the distribution. | 11 // with the distribution. |
(...skipping 33 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
45 } | 45 } |
46 | 46 |
47 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); | 47 pattern = IS_UNDEFINED(pattern) ? '' : ToString(pattern); |
48 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); | 48 flags = IS_UNDEFINED(flags) ? '' : ToString(flags); |
49 | 49 |
50 var global = false; | 50 var global = false; |
51 var ignoreCase = false; | 51 var ignoreCase = false; |
52 var multiline = false; | 52 var multiline = false; |
53 | 53 |
54 for (var i = 0; i < flags.length; i++) { | 54 for (var i = 0; i < flags.length; i++) { |
55 var c = StringCharAt.call(flags, i); | 55 var c = flags.charAt(i); |
56 switch (c) { | 56 switch (c) { |
57 case 'g': | 57 case 'g': |
58 // Allow duplicate flags to be consistent with JSC and others. | 58 // Allow duplicate flags to be consistent with JSC and others. |
59 global = true; | 59 global = true; |
60 break; | 60 break; |
61 case 'i': | 61 case 'i': |
62 ignoreCase = true; | 62 ignoreCase = true; |
63 break; | 63 break; |
64 case 'm': | 64 case 'm': |
65 multiline = true; | 65 multiline = true; |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
110 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { | 110 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { |
111 return pattern; | 111 return pattern; |
112 } | 112 } |
113 return new $RegExp(pattern, flags); | 113 return new $RegExp(pattern, flags); |
114 } | 114 } |
115 } | 115 } |
116 | 116 |
117 | 117 |
118 // Deprecated RegExp.prototype.compile method. We behave like the constructor | 118 // Deprecated RegExp.prototype.compile method. We behave like the constructor |
119 // were called again. In SpiderMonkey, this method returns the regexp object. | 119 // were called again. In SpiderMonkey, this method returns the regexp object. |
120 // In JSC, it returns undefined. For compatibility with JSC, we match their | 120 // In KJS, it returns undefined. For compatibility with KJS, we match their |
121 // behavior. | 121 // behavior. |
122 function CompileRegExp(pattern, flags) { | 122 function CompileRegExp(pattern, flags) { |
123 // Both JSC and SpiderMonkey treat a missing pattern argument as the | 123 // Both KJS and SpiderMonkey treat a missing pattern argument as the |
124 // empty subject string, and an actual undefined value passed as the | 124 // empty subject string, and an actual undefined value passed as the |
125 // pattern as the string 'undefined'. Note that JSC is inconsistent | 125 // patter as the string 'undefined'. Note that KJS is inconsistent |
126 // here, treating undefined values differently in | 126 // here, treating undefined values differently in |
127 // RegExp.prototype.compile and in the constructor, where they are | 127 // RegExp.prototype.compile and in the constructor, where they are |
128 // the empty string. For compatibility with JSC, we match their | 128 // the empty string. For compatibility with KJS, we match their |
129 // behavior. | 129 // behavior. |
130 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { | 130 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { |
131 DoConstructRegExp(this, 'undefined', flags, false); | 131 DoConstructRegExp(this, 'undefined', flags, false); |
132 } else { | 132 } else { |
133 DoConstructRegExp(this, pattern, flags, false); | 133 DoConstructRegExp(this, pattern, flags, false); |
134 } | 134 } |
135 } | 135 } |
136 | 136 |
137 | 137 |
| 138 // DoRegExpExec and DoRegExpExecGlobal are wrappers around the runtime |
| 139 // %RegExp and %RegExpGlobal functions that ensure that the static |
| 140 // properties of the RegExp constructor are set. |
138 function DoRegExpExec(regexp, string, index) { | 141 function DoRegExpExec(regexp, string, index) { |
139 return %RegExpExec(regexp, string, index, lastMatchInfo); | 142 var matchIndices = %RegExpExec(regexp, string, index); |
| 143 if (!IS_NULL(matchIndices)) { |
| 144 regExpCaptures = matchIndices; |
| 145 regExpSubject = regExpInput = string; |
| 146 } |
| 147 return matchIndices; |
140 } | 148 } |
141 | 149 |
142 | 150 |
143 function DoRegExpExecGlobal(regexp, string) { | 151 function DoRegExpExecGlobal(regexp, string) { |
144 // Returns an array of arrays of substring indices. | 152 // Here, matchIndices is an array of arrays of substring indices. |
145 return %RegExpExecGlobal(regexp, string, lastMatchInfo); | 153 var matchIndices = %RegExpExecGlobal(regexp, string); |
| 154 if (matchIndices.length != 0) { |
| 155 regExpCaptures = matchIndices[matchIndices.length - 1]; |
| 156 regExpSubject = regExpInput = string; |
| 157 } |
| 158 return matchIndices; |
146 } | 159 } |
147 | 160 |
148 | 161 |
149 function RegExpExec(string) { | 162 function RegExpExec(string) { |
150 if (%_ArgumentsLength() == 0) { | 163 if (%_ArgumentsLength() == 0) { |
151 var regExpInput = LAST_INPUT(lastMatchInfo); | |
152 if (IS_UNDEFINED(regExpInput)) { | 164 if (IS_UNDEFINED(regExpInput)) { |
153 throw MakeError('no_input_to_regexp', [this]); | 165 throw MakeError('no_input_to_regexp', [this]); |
154 } | 166 } |
155 string = regExpInput; | 167 string = regExpInput; |
156 } | 168 } |
157 var s = ToString(string); | 169 var s = ToString(string); |
158 var length = s.length; | 170 var length = s.length; |
159 var lastIndex = this.lastIndex; | 171 var lastIndex = this.lastIndex; |
160 var i = this.global ? TO_INTEGER(lastIndex) : 0; | 172 var i = this.global ? TO_INTEGER(lastIndex) : 0; |
161 | 173 |
162 if (i < 0 || i > s.length) { | 174 if (i < 0 || i > s.length) { |
163 this.lastIndex = 0; | 175 this.lastIndex = 0; |
164 return null; | 176 return null; |
165 } | 177 } |
166 | 178 |
167 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); | 179 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); |
168 // matchIndices is either null or the lastMatchInfo array. | 180 // matchIndices is an array of integers with length of captures*2, |
169 var matchIndices = %RegExpExec(this, s, i, lastMatchInfo); | 181 // each pair of integers specified the start and the end of index |
| 182 // in the string. |
| 183 var matchIndices = DoRegExpExec(this, s, i); |
170 | 184 |
171 if (matchIndices == null) { | 185 if (matchIndices == null) { |
172 if (this.global) this.lastIndex = 0; | 186 if (this.global) this.lastIndex = 0; |
173 return matchIndices; // no match | 187 return matchIndices; // no match |
174 } | 188 } |
175 | 189 |
176 var numResults = NUMBER_OF_CAPTURES(lastMatchInfo) >> 1; | 190 var numResults = matchIndices.length >> 1; |
177 var result = new $Array(numResults); | 191 var result = new $Array(numResults); |
178 for (var i = 0; i < numResults; i++) { | 192 for (var i = 0; i < numResults; i++) { |
179 var matchStart = lastMatchInfo[CAPTURE(i << 1)]; | 193 var matchStart = matchIndices[2*i]; |
180 var matchEnd = lastMatchInfo[CAPTURE((i << 1) + 1)]; | 194 var matchEnd = matchIndices[2*i + 1]; |
181 if (matchStart != -1 && matchEnd != -1) { | 195 if (matchStart != -1 && matchEnd != -1) { |
182 result[i] = SubString(s, matchStart, matchEnd); | 196 result[i] = s.slice(matchStart, matchEnd); |
183 } else { | 197 } else { |
184 // Make sure the element is present. Avoid reading the undefined | 198 // Make sure the element is present. Avoid reading the undefined |
185 // property from the global object since this may change. | 199 // property from the global object since this may change. |
186 result[i] = void 0; | 200 result[i] = void 0; |
187 } | 201 } |
188 } | 202 } |
189 | 203 |
190 if (this.global) | 204 if (this.global) |
191 this.lastIndex = lastMatchInfo[CAPTURE1]; | 205 this.lastIndex = matchIndices[1]; |
192 result.index = lastMatchInfo[CAPTURE0]; | 206 result.index = matchIndices[0]; |
193 result.input = s; | 207 result.input = s; |
194 return result; | 208 return result; |
195 } | 209 } |
196 | 210 |
197 | 211 |
198 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be | |
199 // that test is defined in terms of String.prototype.exec even if the method is | |
200 // called on a non-RegExp object. However, it probably means the original | |
201 // value of String.prototype.exec, which is what everybody else implements. | |
202 function RegExpTest(string) { | 212 function RegExpTest(string) { |
203 if (%_ArgumentsLength() == 0) { | 213 var result = (%_ArgumentsLength() == 0) ? this.exec() : this.exec(string); |
204 var regExpInput = LAST_INPUT(lastMatchInfo); | 214 return result != null; |
205 if (IS_UNDEFINED(regExpInput)) { | |
206 throw MakeError('no_input_to_regexp', [this]); | |
207 } | |
208 string = regExpInput; | |
209 } | |
210 var s = ToString(string); | |
211 var length = s.length; | |
212 var lastIndex = this.lastIndex; | |
213 var i = this.global ? TO_INTEGER(lastIndex) : 0; | |
214 | |
215 if (i < 0 || i > s.length) { | |
216 this.lastIndex = 0; | |
217 return false; | |
218 } | |
219 | |
220 %_Log('regexp', 'regexp-exec,%0r,%1S,%2i', [this, s, lastIndex]); | |
221 // matchIndices is either null or the lastMatchInfo array. | |
222 var matchIndices = %RegExpExec(this, s, i, lastMatchInfo); | |
223 | |
224 if (matchIndices == null) { | |
225 if (this.global) this.lastIndex = 0; | |
226 return false; | |
227 } | |
228 | |
229 if (this.global) this.lastIndex = lastMatchInfo[CAPTURE1]; | |
230 return true; | |
231 } | 215 } |
232 | 216 |
233 | 217 |
234 function RegExpToString() { | 218 function RegExpToString() { |
235 // If this.source is an empty string, output /(?:)/. | 219 // If this.source is an empty string, output /(?:)/. |
236 // http://bugzilla.mozilla.org/show_bug.cgi?id=225550 | 220 // http://bugzilla.mozilla.org/show_bug.cgi?id=225550 |
237 // ecma_2/RegExp/properties-001.js. | 221 // ecma_2/RegExp/properties-001.js. |
238 var src = this.source ? this.source : '(?:)'; | 222 var src = this.source ? this.source : '(?:)'; |
239 var result = '/' + src + '/'; | 223 var result = '/' + src + '/'; |
240 if (this.global) | 224 if (this.global) |
241 result += 'g'; | 225 result += 'g'; |
242 if (this.ignoreCase) | 226 if (this.ignoreCase) |
243 result += 'i'; | 227 result += 'i'; |
244 if (this.multiline) | 228 if (this.multiline) |
245 result += 'm'; | 229 result += 'm'; |
246 return result; | 230 return result; |
247 } | 231 } |
248 | 232 |
249 | 233 |
250 // Getters for the static properties lastMatch, lastParen, leftContext, and | 234 // Getters for the static properties lastMatch, lastParen, leftContext, and |
251 // rightContext of the RegExp constructor. The properties are computed based | 235 // rightContext of the RegExp constructor. The properties are computed based |
252 // on the captures array of the last successful match and the subject string | 236 // on the captures array of the last successful match and the subject string |
253 // of the last successful match. | 237 // of the last successful match. |
254 function RegExpGetLastMatch() { | 238 function RegExpGetLastMatch() { |
255 var regExpSubject = LAST_SUBJECT(lastMatchInfo); | 239 return regExpSubject.slice(regExpCaptures[0], regExpCaptures[1]); |
256 return SubString(regExpSubject, | |
257 lastMatchInfo[CAPTURE0], | |
258 lastMatchInfo[CAPTURE1]); | |
259 } | 240 } |
260 | 241 |
261 | 242 |
262 function RegExpGetLastParen() { | 243 function RegExpGetLastParen() { |
263 var length = NUMBER_OF_CAPTURES(lastMatchInfo); | 244 var length = regExpCaptures.length; |
264 if (length <= 2) return ''; // There were no captures. | 245 if (length <= 2) return ''; // There were no captures. |
265 // We match the SpiderMonkey behavior: return the substring defined by the | 246 // We match the SpiderMonkey behavior: return the substring defined by the |
266 // last pair (after the first pair) of elements of the capture array even if | 247 // last pair (after the first pair) of elements of the capture array even if |
267 // it is empty. | 248 // it is empty. |
268 var regExpSubject = LAST_SUBJECT(lastMatchInfo); | 249 return regExpSubject.slice(regExpCaptures[length - 2], |
269 return SubString(regExpSubject, | 250 regExpCaptures[length - 1]); |
270 lastMatchInfo[CAPTURE(length - 2)], | |
271 lastMatchInfo[CAPTURE(length - 1)]); | |
272 } | 251 } |
273 | 252 |
274 | 253 |
275 function RegExpGetLeftContext() { | 254 function RegExpGetLeftContext() { |
276 return SubString(LAST_SUBJECT(lastMatchInfo), | 255 return regExpSubject.slice(0, regExpCaptures[0]); |
277 0, | |
278 lastMatchInfo[CAPTURE0]); | |
279 } | 256 } |
280 | 257 |
281 | 258 |
282 function RegExpGetRightContext() { | 259 function RegExpGetRightContext() { |
283 var subject = LAST_SUBJECT(lastMatchInfo); | 260 return regExpSubject.slice(regExpCaptures[1], regExpSubject.length); |
284 return SubString(subject, | |
285 lastMatchInfo[CAPTURE1], | |
286 subject.length); | |
287 } | 261 } |
288 | 262 |
289 | 263 |
290 // The properties $1..$9 are the first nine capturing substrings of the last | 264 // The properties $1..$9 are the first nine capturing substrings of the last |
291 // successful match, or ''. The function RegExpMakeCaptureGetter will be | 265 // successful match, or ''. The function RegExpMakeCaptureGetter will be |
292 // called with indeces from 1 to 9. | 266 // called with an index greater than or equal to 1 but it actually works for |
| 267 // any non-negative index. |
293 function RegExpMakeCaptureGetter(n) { | 268 function RegExpMakeCaptureGetter(n) { |
294 return function() { | 269 return function() { |
295 var index = n * 2; | 270 var index = n * 2; |
296 if (index >= NUMBER_OF_CAPTURES(lastMatchInfo)) return ''; | 271 if (index >= regExpCaptures.length) return ''; |
297 var matchStart = lastMatchInfo[CAPTURE(index)]; | 272 var matchStart = regExpCaptures[index]; |
298 var matchEnd = lastMatchInfo[CAPTURE(index + 1)]; | 273 var matchEnd = regExpCaptures[index + 1]; |
299 if (matchStart == -1 || matchEnd == -1) return ''; | 274 if (matchStart == -1 || matchEnd == -1) return ''; |
300 return SubString(LAST_SUBJECT(lastMatchInfo), matchStart, matchEnd); | 275 return regExpSubject.slice(matchStart, matchEnd); |
301 }; | 276 }; |
302 } | 277 } |
303 | 278 |
304 | 279 |
305 // Property of the builtins object for recording the result of the last | 280 // Properties of the builtins object for recording the result of the last |
306 // regexp match. The property lastMatchInfo includes the matchIndices | 281 // regexp match. The property regExpCaptures is the matchIndices array of the |
307 // array of the last successful regexp match (an array of start/end index | 282 // last successful regexp match (an array of start/end index pairs for the |
308 // pairs for the match and all the captured substrings), the invariant is | 283 // match and all the captured substrings), the invariant is that there is at |
309 // that there are at least two capture indeces. The array also contains | 284 // least two elements. The property regExpSubject is the subject string for |
310 // the subject string for the last successful match. | 285 // the last successful match. |
311 var lastMatchInfo = [ | 286 var regExpCaptures = [0, 0]; |
312 2, // REGEXP_NUMBER_OF_CAPTURES | 287 var regExpSubject = ''; |
313 0, // REGEXP_FIRST_CAPTURE + 0 | 288 var regExpInput; |
314 0, // REGEXP_FIRST_CAPTURE + 1 | |
315 "", // Last subject. | |
316 void 0, // Last input - settable with RegExpSetInput. | |
317 ]; | |
318 | 289 |
319 // ------------------------------------------------------------------- | 290 // ------------------------------------------------------------------- |
320 | 291 |
321 function SetupRegExp() { | 292 function SetupRegExp() { |
322 %FunctionSetInstanceClassName($RegExp, 'RegExp'); | 293 %FunctionSetInstanceClassName($RegExp, 'RegExp'); |
323 %FunctionSetPrototype($RegExp, new $Object()); | 294 %FunctionSetPrototype($RegExp, new $Object()); |
324 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); | 295 %SetProperty($RegExp.prototype, 'constructor', $RegExp, DONT_ENUM); |
325 %SetCode($RegExp, RegExpConstructor); | 296 %SetCode($RegExp, RegExpConstructor); |
326 | 297 |
327 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( | 298 InstallFunctions($RegExp.prototype, DONT_ENUM, $Array( |
328 "exec", RegExpExec, | 299 "exec", RegExpExec, |
329 "test", RegExpTest, | 300 "test", RegExpTest, |
330 "toString", RegExpToString, | 301 "toString", RegExpToString, |
331 "compile", CompileRegExp | 302 "compile", CompileRegExp |
332 )); | 303 )); |
333 | 304 |
334 // The spec says nothing about the length of exec and test, but | 305 // The spec says nothing about the length of exec and test, but |
335 // SpiderMonkey and JSC have length equal to 0. | 306 // SpiderMonkey and KJS have length equal to 0. |
336 %FunctionSetLength($RegExp.prototype.exec, 0); | 307 %FunctionSetLength($RegExp.prototype.exec, 0); |
337 %FunctionSetLength($RegExp.prototype.test, 0); | 308 %FunctionSetLength($RegExp.prototype.test, 0); |
338 // The length of compile is 1 in SpiderMonkey. | 309 // The length of compile is 1 in SpiderMonkey. |
339 %FunctionSetLength($RegExp.prototype.compile, 1); | 310 %FunctionSetLength($RegExp.prototype.compile, 1); |
340 | 311 |
341 // The properties input, $input, and $_ are aliases for each other. When this | 312 // The properties input, $input, and $_ are aliases for each other. When this |
342 // value is set the value it is set to is coerced to a string. | 313 // value is set the value it is set to is coerced to a string. |
343 // Getter and setter for the input. | 314 // Getter and setter for the input. |
344 function RegExpGetInput() { | 315 function RegExpGetInput() { |
345 var regExpInput = LAST_INPUT(lastMatchInfo); | |
346 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; | 316 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; |
347 } | 317 } |
348 function RegExpSetInput(string) { | 318 function RegExpSetInput(string) { regExpInput = ToString(string); } |
349 lastMatchInfo[lastMatchInfo[REGEXP_NUMBER_OF_CAPTURES] + 2] = | |
350 ToString(string); | |
351 }; | |
352 | 319 |
353 %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE); | 320 %DefineAccessor($RegExp, 'input', GETTER, RegExpGetInput, DONT_DELETE); |
354 %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE); | 321 %DefineAccessor($RegExp, 'input', SETTER, RegExpSetInput, DONT_DELETE); |
355 %DefineAccessor($RegExp, '$_', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE
); | 322 %DefineAccessor($RegExp, '$_', GETTER, RegExpGetInput, DONT_ENUM | DONT_DELETE
); |
356 %DefineAccessor($RegExp, '$_', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE
); | 323 %DefineAccessor($RegExp, '$_', SETTER, RegExpSetInput, DONT_ENUM | DONT_DELETE
); |
357 %DefineAccessor($RegExp, '$input', GETTER, RegExpGetInput, DONT_ENUM | DONT_DE
LETE); | 324 %DefineAccessor($RegExp, '$input', GETTER, RegExpGetInput, DONT_ENUM | DONT_DE
LETE); |
358 %DefineAccessor($RegExp, '$input', SETTER, RegExpSetInput, DONT_ENUM | DONT_DE
LETE); | 325 %DefineAccessor($RegExp, '$input', SETTER, RegExpSetInput, DONT_ENUM | DONT_DE
LETE); |
359 | 326 |
360 // The properties multiline and $* are aliases for each other. When this | 327 // The properties multiline and $* are aliases for each other. When this |
361 // value is set in SpiderMonkey, the value it is set to is coerced to a | 328 // value is set in SpiderMonkey, the value it is set to is coerced to a |
(...skipping 35 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
397 %DefineAccessor($RegExp, "$'", SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); | 364 %DefineAccessor($RegExp, "$'", SETTER, NoOpSetter, DONT_ENUM | DONT_DELETE); |
398 | 365 |
399 for (var i = 1; i < 10; ++i) { | 366 for (var i = 1; i < 10; ++i) { |
400 %DefineAccessor($RegExp, '$' + i, GETTER, RegExpMakeCaptureGetter(i), DONT_D
ELETE); | 367 %DefineAccessor($RegExp, '$' + i, GETTER, RegExpMakeCaptureGetter(i), DONT_D
ELETE); |
401 %DefineAccessor($RegExp, '$' + i, SETTER, NoOpSetter, DONT_DELETE); | 368 %DefineAccessor($RegExp, '$' + i, SETTER, NoOpSetter, DONT_DELETE); |
402 } | 369 } |
403 } | 370 } |
404 | 371 |
405 | 372 |
406 SetupRegExp(); | 373 SetupRegExp(); |
OLD | NEW |