| OLD | NEW |
| (Empty) |
| 1 // Copyright 2012 the V8 project authors. All rights reserved. | |
| 2 // Use of this source code is governed by a BSD-style license that can be | |
| 3 // found in the LICENSE file. | |
| 4 | |
| 5 var $regexpLastMatchInfoOverride; | |
| 6 | |
| 7 (function(global, utils) { | |
| 8 | |
| 9 %CheckIsBootstrapping(); | |
| 10 | |
| 11 // ------------------------------------------------------------------- | |
| 12 // Imports | |
| 13 | |
| 14 var FLAG_harmony_regexps; | |
| 15 var FLAG_harmony_unicode_regexps; | |
| 16 var GlobalRegExp = global.RegExp; | |
| 17 var InternalPackedArray = utils.InternalPackedArray; | |
| 18 | |
| 19 utils.ImportFromExperimental(function(from) { | |
| 20 FLAG_harmony_regexps = from.FLAG_harmony_regexps; | |
| 21 FLAG_harmony_unicode_regexps = from.FLAG_harmony_unicode_regexps; | |
| 22 }); | |
| 23 | |
| 24 // ------------------------------------------------------------------- | |
| 25 | |
| 26 // Property of the builtins object for recording the result of the last | |
| 27 // regexp match. The property RegExpLastMatchInfo includes the matchIndices | |
| 28 // array of the last successful regexp match (an array of start/end index | |
| 29 // pairs for the match and all the captured substrings), the invariant is | |
| 30 // that there are at least two capture indeces. The array also contains | |
| 31 // the subject string for the last successful match. | |
| 32 var RegExpLastMatchInfo = new InternalPackedArray( | |
| 33 2, // REGEXP_NUMBER_OF_CAPTURES | |
| 34 "", // Last subject. | |
| 35 UNDEFINED, // Last input - settable with RegExpSetInput. | |
| 36 0, // REGEXP_FIRST_CAPTURE + 0 | |
| 37 0 // REGEXP_FIRST_CAPTURE + 1 | |
| 38 ); | |
| 39 | |
| 40 // Override last match info with an array of actual substrings. | |
| 41 // Used internally by replace regexp with function. | |
| 42 // The array has the format of an "apply" argument for a replacement | |
| 43 // function. | |
| 44 $regexpLastMatchInfoOverride = null; | |
| 45 | |
| 46 // ------------------------------------------------------------------- | |
| 47 | |
| 48 // A recursive descent parser for Patterns according to the grammar of | |
| 49 // ECMA-262 15.10.1, with deviations noted below. | |
| 50 function DoConstructRegExp(object, pattern, flags) { | |
| 51 // RegExp : Called as constructor; see ECMA-262, section 15.10.4. | |
| 52 if (IS_REGEXP(pattern)) { | |
| 53 if (!IS_UNDEFINED(flags)) throw MakeTypeError(kRegExpFlags); | |
| 54 flags = (pattern.global ? 'g' : '') | |
| 55 + (pattern.ignoreCase ? 'i' : '') | |
| 56 + (pattern.multiline ? 'm' : ''); | |
| 57 if (FLAG_harmony_unicode_regexps) | |
| 58 flags += (pattern.unicode ? 'u' : ''); | |
| 59 if (FLAG_harmony_regexps) | |
| 60 flags += (pattern.sticky ? 'y' : ''); | |
| 61 pattern = pattern.source; | |
| 62 } | |
| 63 | |
| 64 pattern = IS_UNDEFINED(pattern) ? '' : TO_STRING(pattern); | |
| 65 flags = IS_UNDEFINED(flags) ? '' : TO_STRING(flags); | |
| 66 | |
| 67 %RegExpInitializeAndCompile(object, pattern, flags); | |
| 68 } | |
| 69 | |
| 70 | |
| 71 function RegExpConstructor(pattern, flags) { | |
| 72 if (%_IsConstructCall()) { | |
| 73 DoConstructRegExp(this, pattern, flags); | |
| 74 } else { | |
| 75 // RegExp : Called as function; see ECMA-262, section 15.10.3.1. | |
| 76 if (IS_REGEXP(pattern) && IS_UNDEFINED(flags)) { | |
| 77 return pattern; | |
| 78 } | |
| 79 return new GlobalRegExp(pattern, flags); | |
| 80 } | |
| 81 } | |
| 82 | |
| 83 // Deprecated RegExp.prototype.compile method. We behave like the constructor | |
| 84 // were called again. In SpiderMonkey, this method returns the regexp object. | |
| 85 // In JSC, it returns undefined. For compatibility with JSC, we match their | |
| 86 // behavior. | |
| 87 function RegExpCompileJS(pattern, flags) { | |
| 88 // Both JSC and SpiderMonkey treat a missing pattern argument as the | |
| 89 // empty subject string, and an actual undefined value passed as the | |
| 90 // pattern as the string 'undefined'. Note that JSC is inconsistent | |
| 91 // here, treating undefined values differently in | |
| 92 // RegExp.prototype.compile and in the constructor, where they are | |
| 93 // the empty string. For compatibility with JSC, we match their | |
| 94 // behavior. | |
| 95 if (this == GlobalRegExp.prototype) { | |
| 96 // We don't allow recompiling RegExp.prototype. | |
| 97 throw MakeTypeError(kIncompatibleMethodReceiver, | |
| 98 'RegExp.prototype.compile', this); | |
| 99 } | |
| 100 if (IS_UNDEFINED(pattern) && %_ArgumentsLength() != 0) { | |
| 101 DoConstructRegExp(this, 'undefined', flags); | |
| 102 } else { | |
| 103 DoConstructRegExp(this, pattern, flags); | |
| 104 } | |
| 105 } | |
| 106 | |
| 107 | |
| 108 function DoRegExpExec(regexp, string, index) { | |
| 109 var result = %_RegExpExec(regexp, string, index, RegExpLastMatchInfo); | |
| 110 if (result !== null) $regexpLastMatchInfoOverride = null; | |
| 111 return result; | |
| 112 } | |
| 113 | |
| 114 | |
| 115 // This is kind of performance sensitive, so we want to avoid unnecessary | |
| 116 // type checks on inputs. But we also don't want to inline it several times | |
| 117 // manually, so we use a macro :-) | |
| 118 macro RETURN_NEW_RESULT_FROM_MATCH_INFO(MATCHINFO, STRING) | |
| 119 var numResults = NUMBER_OF_CAPTURES(MATCHINFO) >> 1; | |
| 120 var start = MATCHINFO[CAPTURE0]; | |
| 121 var end = MATCHINFO[CAPTURE1]; | |
| 122 // Calculate the substring of the first match before creating the result array | |
| 123 // to avoid an unnecessary write barrier storing the first result. | |
| 124 var first = %_SubString(STRING, start, end); | |
| 125 var result = %_RegExpConstructResult(numResults, start, STRING); | |
| 126 result[0] = first; | |
| 127 if (numResults == 1) return result; | |
| 128 var j = REGEXP_FIRST_CAPTURE + 2; | |
| 129 for (var i = 1; i < numResults; i++) { | |
| 130 start = MATCHINFO[j++]; | |
| 131 if (start != -1) { | |
| 132 end = MATCHINFO[j]; | |
| 133 result[i] = %_SubString(STRING, start, end); | |
| 134 } | |
| 135 j++; | |
| 136 } | |
| 137 return result; | |
| 138 endmacro | |
| 139 | |
| 140 | |
| 141 function RegExpExecNoTests(regexp, string, start) { | |
| 142 // Must be called with RegExp, string and positive integer as arguments. | |
| 143 var matchInfo = %_RegExpExec(regexp, string, start, RegExpLastMatchInfo); | |
| 144 if (matchInfo !== null) { | |
| 145 $regexpLastMatchInfoOverride = null; | |
| 146 // ES6 21.2.5.2.2 step 18. | |
| 147 if (FLAG_harmony_regexps && regexp.sticky) { | |
| 148 regexp.lastIndex = matchInfo[CAPTURE1]; | |
| 149 } | |
| 150 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchInfo, string); | |
| 151 } | |
| 152 regexp.lastIndex = 0; | |
| 153 return null; | |
| 154 } | |
| 155 | |
| 156 | |
| 157 function RegExpExecJS(string) { | |
| 158 if (!IS_REGEXP(this)) { | |
| 159 throw MakeTypeError(kIncompatibleMethodReceiver, | |
| 160 'RegExp.prototype.exec', this); | |
| 161 } | |
| 162 | |
| 163 string = TO_STRING(string); | |
| 164 var lastIndex = this.lastIndex; | |
| 165 | |
| 166 // Conversion is required by the ES5 specification (RegExp.prototype.exec | |
| 167 // algorithm, step 5) even if the value is discarded for non-global RegExps. | |
| 168 var i = TO_INTEGER(lastIndex); | |
| 169 | |
| 170 var updateLastIndex = this.global || (FLAG_harmony_regexps && this.sticky); | |
| 171 if (updateLastIndex) { | |
| 172 if (i < 0 || i > string.length) { | |
| 173 this.lastIndex = 0; | |
| 174 return null; | |
| 175 } | |
| 176 } else { | |
| 177 i = 0; | |
| 178 } | |
| 179 | |
| 180 // matchIndices is either null or the RegExpLastMatchInfo array. | |
| 181 var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo); | |
| 182 | |
| 183 if (IS_NULL(matchIndices)) { | |
| 184 this.lastIndex = 0; | |
| 185 return null; | |
| 186 } | |
| 187 | |
| 188 // Successful match. | |
| 189 $regexpLastMatchInfoOverride = null; | |
| 190 if (updateLastIndex) { | |
| 191 this.lastIndex = RegExpLastMatchInfo[CAPTURE1]; | |
| 192 } | |
| 193 RETURN_NEW_RESULT_FROM_MATCH_INFO(matchIndices, string); | |
| 194 } | |
| 195 | |
| 196 | |
| 197 // One-element cache for the simplified test regexp. | |
| 198 var regexp_key; | |
| 199 var regexp_val; | |
| 200 | |
| 201 // Section 15.10.6.3 doesn't actually make sense, but the intention seems to be | |
| 202 // that test is defined in terms of String.prototype.exec. However, it probably | |
| 203 // means the original value of String.prototype.exec, which is what everybody | |
| 204 // else implements. | |
| 205 function RegExpTest(string) { | |
| 206 if (!IS_REGEXP(this)) { | |
| 207 throw MakeTypeError(kIncompatibleMethodReceiver, | |
| 208 'RegExp.prototype.test', this); | |
| 209 } | |
| 210 string = TO_STRING(string); | |
| 211 | |
| 212 var lastIndex = this.lastIndex; | |
| 213 | |
| 214 // Conversion is required by the ES5 specification (RegExp.prototype.exec | |
| 215 // algorithm, step 5) even if the value is discarded for non-global RegExps. | |
| 216 var i = TO_INTEGER(lastIndex); | |
| 217 | |
| 218 if (this.global || (FLAG_harmony_regexps && this.sticky)) { | |
| 219 if (i < 0 || i > string.length) { | |
| 220 this.lastIndex = 0; | |
| 221 return false; | |
| 222 } | |
| 223 // matchIndices is either null or the RegExpLastMatchInfo array. | |
| 224 var matchIndices = %_RegExpExec(this, string, i, RegExpLastMatchInfo); | |
| 225 if (IS_NULL(matchIndices)) { | |
| 226 this.lastIndex = 0; | |
| 227 return false; | |
| 228 } | |
| 229 $regexpLastMatchInfoOverride = null; | |
| 230 this.lastIndex = RegExpLastMatchInfo[CAPTURE1]; | |
| 231 return true; | |
| 232 } else { | |
| 233 // Non-global, non-sticky regexp. | |
| 234 // Remove irrelevant preceeding '.*' in a test regexp. The expression | |
| 235 // checks whether this.source starts with '.*' and that the third char is | |
| 236 // not a '?'. But see https://code.google.com/p/v8/issues/detail?id=3560 | |
| 237 var regexp = this; | |
| 238 if (regexp.source.length >= 3 && | |
| 239 %_StringCharCodeAt(regexp.source, 0) == 46 && // '.' | |
| 240 %_StringCharCodeAt(regexp.source, 1) == 42 && // '*' | |
| 241 %_StringCharCodeAt(regexp.source, 2) != 63) { // '?' | |
| 242 regexp = TrimRegExp(regexp); | |
| 243 } | |
| 244 // matchIndices is either null or the RegExpLastMatchInfo array. | |
| 245 var matchIndices = %_RegExpExec(regexp, string, 0, RegExpLastMatchInfo); | |
| 246 if (IS_NULL(matchIndices)) { | |
| 247 this.lastIndex = 0; | |
| 248 return false; | |
| 249 } | |
| 250 $regexpLastMatchInfoOverride = null; | |
| 251 return true; | |
| 252 } | |
| 253 } | |
| 254 | |
| 255 function TrimRegExp(regexp) { | |
| 256 if (!%_ObjectEquals(regexp_key, regexp)) { | |
| 257 regexp_key = regexp; | |
| 258 regexp_val = | |
| 259 new GlobalRegExp(%_SubString(regexp.source, 2, regexp.source.length), | |
| 260 (regexp.ignoreCase ? regexp.multiline ? "im" : "i" | |
| 261 : regexp.multiline ? "m" : "")); | |
| 262 } | |
| 263 return regexp_val; | |
| 264 } | |
| 265 | |
| 266 | |
| 267 function RegExpToString() { | |
| 268 if (!IS_REGEXP(this)) { | |
| 269 throw MakeTypeError(kIncompatibleMethodReceiver, | |
| 270 'RegExp.prototype.toString', this); | |
| 271 } | |
| 272 var result = '/' + this.source + '/'; | |
| 273 if (this.global) result += 'g'; | |
| 274 if (this.ignoreCase) result += 'i'; | |
| 275 if (this.multiline) result += 'm'; | |
| 276 if (FLAG_harmony_unicode_regexps && this.unicode) result += 'u'; | |
| 277 if (FLAG_harmony_regexps && this.sticky) result += 'y'; | |
| 278 return result; | |
| 279 } | |
| 280 | |
| 281 | |
| 282 // Getters for the static properties lastMatch, lastParen, leftContext, and | |
| 283 // rightContext of the RegExp constructor. The properties are computed based | |
| 284 // on the captures array of the last successful match and the subject string | |
| 285 // of the last successful match. | |
| 286 function RegExpGetLastMatch() { | |
| 287 if ($regexpLastMatchInfoOverride !== null) { | |
| 288 return OVERRIDE_MATCH($regexpLastMatchInfoOverride); | |
| 289 } | |
| 290 var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo); | |
| 291 return %_SubString(regExpSubject, | |
| 292 RegExpLastMatchInfo[CAPTURE0], | |
| 293 RegExpLastMatchInfo[CAPTURE1]); | |
| 294 } | |
| 295 | |
| 296 | |
| 297 function RegExpGetLastParen() { | |
| 298 if ($regexpLastMatchInfoOverride) { | |
| 299 var override = $regexpLastMatchInfoOverride; | |
| 300 if (override.length <= 3) return ''; | |
| 301 return override[override.length - 3]; | |
| 302 } | |
| 303 var length = NUMBER_OF_CAPTURES(RegExpLastMatchInfo); | |
| 304 if (length <= 2) return ''; // There were no captures. | |
| 305 // We match the SpiderMonkey behavior: return the substring defined by the | |
| 306 // last pair (after the first pair) of elements of the capture array even if | |
| 307 // it is empty. | |
| 308 var regExpSubject = LAST_SUBJECT(RegExpLastMatchInfo); | |
| 309 var start = RegExpLastMatchInfo[CAPTURE(length - 2)]; | |
| 310 var end = RegExpLastMatchInfo[CAPTURE(length - 1)]; | |
| 311 if (start != -1 && end != -1) { | |
| 312 return %_SubString(regExpSubject, start, end); | |
| 313 } | |
| 314 return ""; | |
| 315 } | |
| 316 | |
| 317 | |
| 318 function RegExpGetLeftContext() { | |
| 319 var start_index; | |
| 320 var subject; | |
| 321 if (!$regexpLastMatchInfoOverride) { | |
| 322 start_index = RegExpLastMatchInfo[CAPTURE0]; | |
| 323 subject = LAST_SUBJECT(RegExpLastMatchInfo); | |
| 324 } else { | |
| 325 var override = $regexpLastMatchInfoOverride; | |
| 326 start_index = OVERRIDE_POS(override); | |
| 327 subject = OVERRIDE_SUBJECT(override); | |
| 328 } | |
| 329 return %_SubString(subject, 0, start_index); | |
| 330 } | |
| 331 | |
| 332 | |
| 333 function RegExpGetRightContext() { | |
| 334 var start_index; | |
| 335 var subject; | |
| 336 if (!$regexpLastMatchInfoOverride) { | |
| 337 start_index = RegExpLastMatchInfo[CAPTURE1]; | |
| 338 subject = LAST_SUBJECT(RegExpLastMatchInfo); | |
| 339 } else { | |
| 340 var override = $regexpLastMatchInfoOverride; | |
| 341 subject = OVERRIDE_SUBJECT(override); | |
| 342 var match = OVERRIDE_MATCH(override); | |
| 343 start_index = OVERRIDE_POS(override) + match.length; | |
| 344 } | |
| 345 return %_SubString(subject, start_index, subject.length); | |
| 346 } | |
| 347 | |
| 348 | |
| 349 // The properties $1..$9 are the first nine capturing substrings of the last | |
| 350 // successful match, or ''. The function RegExpMakeCaptureGetter will be | |
| 351 // called with indices from 1 to 9. | |
| 352 function RegExpMakeCaptureGetter(n) { | |
| 353 return function foo() { | |
| 354 if ($regexpLastMatchInfoOverride) { | |
| 355 if (n < $regexpLastMatchInfoOverride.length - 2) { | |
| 356 return OVERRIDE_CAPTURE($regexpLastMatchInfoOverride, n); | |
| 357 } | |
| 358 return ''; | |
| 359 } | |
| 360 var index = n * 2; | |
| 361 if (index >= NUMBER_OF_CAPTURES(RegExpLastMatchInfo)) return ''; | |
| 362 var matchStart = RegExpLastMatchInfo[CAPTURE(index)]; | |
| 363 var matchEnd = RegExpLastMatchInfo[CAPTURE(index + 1)]; | |
| 364 if (matchStart == -1 || matchEnd == -1) return ''; | |
| 365 return %_SubString(LAST_SUBJECT(RegExpLastMatchInfo), matchStart, matchEnd); | |
| 366 }; | |
| 367 } | |
| 368 | |
| 369 // ------------------------------------------------------------------- | |
| 370 | |
| 371 %FunctionSetInstanceClassName(GlobalRegExp, 'RegExp'); | |
| 372 %AddNamedProperty( | |
| 373 GlobalRegExp.prototype, 'constructor', GlobalRegExp, DONT_ENUM); | |
| 374 %SetCode(GlobalRegExp, RegExpConstructor); | |
| 375 | |
| 376 utils.InstallFunctions(GlobalRegExp.prototype, DONT_ENUM, [ | |
| 377 "exec", RegExpExecJS, | |
| 378 "test", RegExpTest, | |
| 379 "toString", RegExpToString, | |
| 380 "compile", RegExpCompileJS | |
| 381 ]); | |
| 382 | |
| 383 // The length of compile is 1 in SpiderMonkey. | |
| 384 %FunctionSetLength(GlobalRegExp.prototype.compile, 1); | |
| 385 | |
| 386 // The properties `input` and `$_` are aliases for each other. When this | |
| 387 // value is set the value it is set to is coerced to a string. | |
| 388 // Getter and setter for the input. | |
| 389 var RegExpGetInput = function() { | |
| 390 var regExpInput = LAST_INPUT(RegExpLastMatchInfo); | |
| 391 return IS_UNDEFINED(regExpInput) ? "" : regExpInput; | |
| 392 }; | |
| 393 var RegExpSetInput = function(string) { | |
| 394 LAST_INPUT(RegExpLastMatchInfo) = TO_STRING(string); | |
| 395 }; | |
| 396 | |
| 397 %OptimizeObjectForAddingMultipleProperties(GlobalRegExp, 22); | |
| 398 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'input', RegExpGetInput, | |
| 399 RegExpSetInput, DONT_DELETE); | |
| 400 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$_', RegExpGetInput, | |
| 401 RegExpSetInput, DONT_ENUM | DONT_DELETE); | |
| 402 | |
| 403 // The properties multiline and $* are aliases for each other. When this | |
| 404 // value is set in SpiderMonkey, the value it is set to is coerced to a | |
| 405 // boolean. We mimic that behavior with a slight difference: in SpiderMonkey | |
| 406 // the value of the expression 'RegExp.multiline = null' (for instance) is the | |
| 407 // boolean false (i.e., the value after coercion), while in V8 it is the value | |
| 408 // null (i.e., the value before coercion). | |
| 409 | |
| 410 // Getter and setter for multiline. | |
| 411 var multiline = false; | |
| 412 var RegExpGetMultiline = function() { return multiline; }; | |
| 413 var RegExpSetMultiline = function(flag) { multiline = flag ? true : false; }; | |
| 414 | |
| 415 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'multiline', RegExpGetMultiline, | |
| 416 RegExpSetMultiline, DONT_DELETE); | |
| 417 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$*', RegExpGetMultiline, | |
| 418 RegExpSetMultiline, | |
| 419 DONT_ENUM | DONT_DELETE); | |
| 420 | |
| 421 | |
| 422 var NoOpSetter = function(ignored) {}; | |
| 423 | |
| 424 | |
| 425 // Static properties set by a successful match. | |
| 426 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'lastMatch', RegExpGetLastMatch, | |
| 427 NoOpSetter, DONT_DELETE); | |
| 428 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$&', RegExpGetLastMatch, | |
| 429 NoOpSetter, DONT_ENUM | DONT_DELETE); | |
| 430 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'lastParen', RegExpGetLastParen, | |
| 431 NoOpSetter, DONT_DELETE); | |
| 432 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$+', RegExpGetLastParen, | |
| 433 NoOpSetter, DONT_ENUM | DONT_DELETE); | |
| 434 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'leftContext', | |
| 435 RegExpGetLeftContext, NoOpSetter, | |
| 436 DONT_DELETE); | |
| 437 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$`', RegExpGetLeftContext, | |
| 438 NoOpSetter, DONT_ENUM | DONT_DELETE); | |
| 439 %DefineAccessorPropertyUnchecked(GlobalRegExp, 'rightContext', | |
| 440 RegExpGetRightContext, NoOpSetter, | |
| 441 DONT_DELETE); | |
| 442 %DefineAccessorPropertyUnchecked(GlobalRegExp, "$'", RegExpGetRightContext, | |
| 443 NoOpSetter, DONT_ENUM | DONT_DELETE); | |
| 444 | |
| 445 for (var i = 1; i < 10; ++i) { | |
| 446 %DefineAccessorPropertyUnchecked(GlobalRegExp, '$' + i, | |
| 447 RegExpMakeCaptureGetter(i), NoOpSetter, | |
| 448 DONT_DELETE); | |
| 449 } | |
| 450 %ToFastProperties(GlobalRegExp); | |
| 451 | |
| 452 // ------------------------------------------------------------------- | |
| 453 // Exports | |
| 454 | |
| 455 utils.Export(function(to) { | |
| 456 to.RegExpExec = DoRegExpExec; | |
| 457 to.RegExpExecNoTests = RegExpExecNoTests; | |
| 458 to.RegExpLastMatchInfo = RegExpLastMatchInfo; | |
| 459 to.RegExpTest = RegExpTest; | |
| 460 }); | |
| 461 | |
| 462 }) | |
| OLD | NEW |