| OLD | NEW |
| (Empty) |
| 1 /* This is JavaScriptCore's variant of the PCRE library. While this library | |
| 2 started out as a copy of PCRE, many of the features of PCRE have been | |
| 3 removed. This library now supports only the regular expression features | |
| 4 required by the JavaScript language specification, and has only the functions | |
| 5 needed by JavaScriptCore and the rest of WebKit. | |
| 6 | |
| 7 Originally written by Philip Hazel | |
| 8 Copyright (c) 1997-2006 University of Cambridge | |
| 9 Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved. | |
| 10 Copyright (C) 2007 Eric Seidel <eric@webkit.org> | |
| 11 | |
| 12 ----------------------------------------------------------------------------- | |
| 13 Redistribution and use in source and binary forms, with or without | |
| 14 modification, are permitted provided that the following conditions are met: | |
| 15 | |
| 16 * Redistributions of source code must retain the above copyright notice, | |
| 17 this list of conditions and the following disclaimer. | |
| 18 | |
| 19 * Redistributions in binary form must reproduce the above copyright | |
| 20 notice, this list of conditions and the following disclaimer in the | |
| 21 documentation and/or other materials provided with the distribution. | |
| 22 | |
| 23 * Neither the name of the University of Cambridge nor the names of its | |
| 24 contributors may be used to endorse or promote products derived from | |
| 25 this software without specific prior written permission. | |
| 26 | |
| 27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
| 28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
| 29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE | |
| 30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE | |
| 31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
| 32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF | |
| 33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS | |
| 34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN | |
| 35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) | |
| 36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
| 37 POSSIBILITY OF SUCH DAMAGE. | |
| 38 ----------------------------------------------------------------------------- | |
| 39 */ | |
| 40 | |
| 41 /* This module contains jsRegExpExecute(), the externally visible function | |
| 42 that does pattern matching using an NFA algorithm, following the rules from | |
| 43 the JavaScript specification. There are also some supporting functions. */ | |
| 44 | |
| 45 #include "config.h" | |
| 46 | |
| 47 #include "pcre_internal.h" | |
| 48 | |
| 49 #include "ASCIICType.h" | |
| 50 | |
| 51 #include <ctype.h> | |
| 52 #include <limits.h> | |
| 53 #include <string.h> /* for memcpy */ | |
| 54 | |
| 55 #ifdef __GNUC__ | |
| 56 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION | |
| 57 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP | |
| 58 #endif | |
| 59 | |
| 60 /* Avoid warnings on Windows. */ | |
| 61 #undef min | |
| 62 #undef max | |
| 63 | |
| 64 namespace dart { namespace jscre { | |
| 65 | |
| 66 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION | |
| 67 typedef int ReturnLocation; | |
| 68 #else | |
| 69 typedef void* ReturnLocation; | |
| 70 #endif | |
| 71 | |
| 72 /* Structure for building a chain of data for holding the values of | |
| 73 the subject pointer at the start of each bracket, used to detect when | |
| 74 an empty string has been matched by a bracket to break infinite loops. */ | |
| 75 struct BracketChainNode { | |
| 76 BracketChainNode* previousBracket; | |
| 77 const UChar* bracketStart; | |
| 78 }; | |
| 79 | |
| 80 struct MatchFrame { | |
| 81 ReturnLocation returnLocation; | |
| 82 struct MatchFrame* previousFrame; | |
| 83 | |
| 84 /* Function arguments that may change */ | |
| 85 struct { | |
| 86 const UChar* subjectPtr; | |
| 87 const unsigned char* instructionPtr; | |
| 88 int offsetTop; | |
| 89 BracketChainNode* bracketChain; | |
| 90 } args; | |
| 91 | |
| 92 | |
| 93 /* PCRE uses "fake" recursion built off of gotos, thus | |
| 94 stack-based local variables are not safe to use. Instead we have to | |
| 95 store local variables on the current MatchFrame. */ | |
| 96 struct { | |
| 97 const unsigned char* data; | |
| 98 const unsigned char* startOfRepeatingBracket; | |
| 99 const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stas
h away a subjectPtr here for later compare | |
| 100 const unsigned char* instructionPtrAtStartOfOnce; | |
| 101 | |
| 102 int repeatOthercase; | |
| 103 | |
| 104 int ctype; | |
| 105 int fc; | |
| 106 int fi; | |
| 107 int length; | |
| 108 int max; | |
| 109 int number; | |
| 110 int offset; | |
| 111 int saveOffset1; | |
| 112 int saveOffset2; | |
| 113 int saveOffset3; | |
| 114 | |
| 115 BracketChainNode bracketChainNode; | |
| 116 } locals; | |
| 117 }; | |
| 118 | |
| 119 /* Structure for passing "static" information around between the functions | |
| 120 doing traditional NFA matching, so that they are thread-safe. */ | |
| 121 | |
| 122 struct MatchData { | |
| 123 int* offsetVector; /* Offset vector */ | |
| 124 int offsetEnd; /* One past the end */ | |
| 125 int offsetMax; /* The maximum usable for return data */ | |
| 126 bool offsetOverflow; /* Set if too many extractions */ | |
| 127 const UChar* startSubject; /* Start of the subject string */ | |
| 128 const UChar* endSubject; /* End of the subject string */ | |
| 129 const UChar* endMatchPtr; /* Subject position at end match */ | |
| 130 int endOffsetTop; /* Highwater mark at end of match */ | |
| 131 bool multiline; | |
| 132 bool ignoreCase; | |
| 133 }; | |
| 134 | |
| 135 /* The maximum remaining length of subject we are prepared to search for a | |
| 136 req_byte match. */ | |
| 137 | |
| 138 #define REQ_BYTE_MAX 1000 | |
| 139 | |
| 140 /* The below limit restricts the number of "recursive" match calls in order to | |
| 141 avoid spending exponential time on complex regular expressions. */ | |
| 142 | |
| 143 static const unsigned matchLimit = 100000; | |
| 144 | |
| 145 #ifdef DEBUG | |
| 146 /************************************************* | |
| 147 * Debugging function to print chars * | |
| 148 *************************************************/ | |
| 149 | |
| 150 /* Print a sequence of chars in printable format, stopping at the end of the | |
| 151 subject if the requested. | |
| 152 | |
| 153 Arguments: | |
| 154 p points to characters | |
| 155 length number to print | |
| 156 isSubject true if printing from within md.startSubject | |
| 157 md pointer to matching data block, if isSubject is true | |
| 158 */ | |
| 159 | |
| 160 static void pchars(const UChar* p, int length, bool isSubject, const MatchData&
md) | |
| 161 { | |
| 162 if (isSubject && length > md.endSubject - p) | |
| 163 length = md.endSubject - p; | |
| 164 while (length-- > 0) { | |
| 165 int c; | |
| 166 if (isprint(c = *(p++))) | |
| 167 printf("%c", c); | |
| 168 else if (c < 256) | |
| 169 printf("\\x%02x", c); | |
| 170 else | |
| 171 printf("\\x{%x}", c); | |
| 172 } | |
| 173 } | |
| 174 #endif | |
| 175 | |
| 176 /************************************************* | |
| 177 * Match a back-reference * | |
| 178 *************************************************/ | |
| 179 | |
| 180 /* If a back reference hasn't been set, the length that is passed is greater | |
| 181 than the number of characters left in the string, so the match fails. | |
| 182 | |
| 183 Arguments: | |
| 184 offset index into the offset vector | |
| 185 subjectPtr points into the subject | |
| 186 length length to be matched | |
| 187 md points to match data block | |
| 188 | |
| 189 Returns: true if matched | |
| 190 */ | |
| 191 | |
| 192 static bool matchRef(int offset, const UChar* subjectPtr, int length, const Matc
hData& md) | |
| 193 { | |
| 194 const UChar* p = md.startSubject + md.offsetVector[offset]; | |
| 195 | |
| 196 #ifdef DEBUG | |
| 197 if (subjectPtr >= md.endSubject) | |
| 198 printf("matching subject <null>"); | |
| 199 else { | |
| 200 printf("matching subject "); | |
| 201 pchars(subjectPtr, length, true, md); | |
| 202 } | |
| 203 printf(" against backref "); | |
| 204 pchars(p, length, false, md); | |
| 205 printf("\n"); | |
| 206 #endif | |
| 207 | |
| 208 /* Always fail if not enough characters left */ | |
| 209 | |
| 210 if (length > md.endSubject - subjectPtr) | |
| 211 return false; | |
| 212 | |
| 213 /* Separate the caselesss case for speed */ | |
| 214 | |
| 215 if (md.ignoreCase) { | |
| 216 while (length-- > 0) { | |
| 217 UChar c = *p++; | |
| 218 int othercase = kjs_pcre_ucp_othercase(c); | |
| 219 UChar d = *subjectPtr++; | |
| 220 if (c != d && othercase != d) | |
| 221 return false; | |
| 222 } | |
| 223 } | |
| 224 else { | |
| 225 while (length-- > 0) | |
| 226 if (*p++ != *subjectPtr++) | |
| 227 return false; | |
| 228 } | |
| 229 | |
| 230 return true; | |
| 231 } | |
| 232 | |
| 233 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION | |
| 234 | |
| 235 /* Use numbered labels and switch statement at the bottom of the match function.
*/ | |
| 236 | |
| 237 #define RMATCH_WHERE(num) num | |
| 238 #define RRETURN_LABEL RRETURN_SWITCH | |
| 239 | |
| 240 #else | |
| 241 | |
| 242 /* Use GCC's computed goto extension. */ | |
| 243 | |
| 244 /* For one test case this is more than 40% faster than the switch statement. | |
| 245 We could avoid the use of the num argument entirely by using local labels, | |
| 246 but using it for the GCC case as well as the non-GCC case allows us to share | |
| 247 a bit more code and notice if we use conflicting numbers.*/ | |
| 248 | |
| 249 #define RMATCH_WHERE(num) &&RRETURN_##num | |
| 250 #define RRETURN_LABEL *stack.currentFrame->returnLocation | |
| 251 | |
| 252 #endif | |
| 253 | |
| 254 #define RECURSIVE_MATCH_COMMON(num) \ | |
| 255 goto RECURSE;\ | |
| 256 RRETURN_##num: \ | |
| 257 stack.popCurrentFrame(); | |
| 258 | |
| 259 #define RECURSIVE_MATCH(num, ra, rb) \ | |
| 260 do { \ | |
| 261 stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \ | |
| 262 RECURSIVE_MATCH_COMMON(num) \ | |
| 263 } while (0) | |
| 264 | |
| 265 #define RECURSIVE_MATCH_STARTNG_NEW_GROUP(num, ra, rb) \ | |
| 266 do { \ | |
| 267 stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \ | |
| 268 startNewGroup(stack.currentFrame); \ | |
| 269 RECURSIVE_MATCH_COMMON(num) \ | |
| 270 } while (0) | |
| 271 | |
| 272 #define RRETURN goto RRETURN_LABEL | |
| 273 | |
| 274 #define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0) | |
| 275 | |
| 276 /************************************************* | |
| 277 * Match from current position * | |
| 278 *************************************************/ | |
| 279 | |
| 280 /* On entry instructionPtr points to the first opcode, and subjectPtr to the fir
st character | |
| 281 in the subject string, while substringStart holds the value of subjectPtr at the
start of the | |
| 282 last bracketed group - used for breaking infinite loops matching zero-length | |
| 283 strings. This function is called recursively in many circumstances. Whenever it | |
| 284 returns a negative (error) response, the outer match() call must also return the | |
| 285 same response. | |
| 286 | |
| 287 Arguments: | |
| 288 subjectPtr pointer in subject | |
| 289 instructionPtr position in code | |
| 290 offsetTop current top pointer | |
| 291 md pointer to "static" info for the match | |
| 292 | |
| 293 Returns: 1 if matched ) these values are >= 0 | |
| 294 0 if failed to match ) | |
| 295 a negative error value if aborted by an error condition | |
| 296 (e.g. stopped by repeated call or recursion limit) | |
| 297 */ | |
| 298 | |
| 299 static const unsigned FRAMES_ON_STACK = 16; | |
| 300 | |
| 301 struct MatchStack { | |
| 302 MatchStack() | |
| 303 : framesEnd(frames + FRAMES_ON_STACK) | |
| 304 , currentFrame(frames) | |
| 305 , size(1) // match() creates accesses the first frame w/o calling pushNe
wFrame | |
| 306 { | |
| 307 ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK); | |
| 308 } | |
| 309 | |
| 310 MatchFrame frames[FRAMES_ON_STACK]; | |
| 311 MatchFrame* framesEnd; | |
| 312 MatchFrame* currentFrame; | |
| 313 unsigned size; | |
| 314 | |
| 315 inline bool canUseStackBufferForNextFrame() | |
| 316 { | |
| 317 return size < FRAMES_ON_STACK; | |
| 318 } | |
| 319 | |
| 320 inline MatchFrame* allocateNextFrame() | |
| 321 { | |
| 322 if (canUseStackBufferForNextFrame()) | |
| 323 return currentFrame + 1; | |
| 324 return new MatchFrame; | |
| 325 } | |
| 326 | |
| 327 inline void pushNewFrame(const unsigned char* instructionPtr, BracketChainNo
de* bracketChain, ReturnLocation returnLocation) | |
| 328 { | |
| 329 MatchFrame* newframe = allocateNextFrame(); | |
| 330 newframe->previousFrame = currentFrame; | |
| 331 | |
| 332 newframe->args.subjectPtr = currentFrame->args.subjectPtr; | |
| 333 newframe->args.offsetTop = currentFrame->args.offsetTop; | |
| 334 newframe->args.instructionPtr = instructionPtr; | |
| 335 newframe->args.bracketChain = bracketChain; | |
| 336 newframe->returnLocation = returnLocation; | |
| 337 size++; | |
| 338 | |
| 339 currentFrame = newframe; | |
| 340 } | |
| 341 | |
| 342 inline void popCurrentFrame() | |
| 343 { | |
| 344 MatchFrame* oldFrame = currentFrame; | |
| 345 currentFrame = currentFrame->previousFrame; | |
| 346 if (size > FRAMES_ON_STACK) | |
| 347 delete oldFrame; | |
| 348 size--; | |
| 349 } | |
| 350 | |
| 351 void popAllFrames() | |
| 352 { | |
| 353 while (size) | |
| 354 popCurrentFrame(); | |
| 355 } | |
| 356 }; | |
| 357 | |
| 358 static int matchError(int errorCode, MatchStack& stack) | |
| 359 { | |
| 360 stack.popAllFrames(); | |
| 361 return errorCode; | |
| 362 } | |
| 363 | |
| 364 /* Get the next UTF-8 character, not advancing the pointer, incrementing length | |
| 365 if there are extra bytes. This is called when we know we are in UTF-8 mode. */ | |
| 366 | |
| 367 static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* su
bjectPtr, int& len) | |
| 368 { | |
| 369 c = *subjectPtr; | |
| 370 if ((c & 0xc0) == 0xc0) { | |
| 371 int gcaa = kjs_pcre_utf8_table4[c & 0x3f]; /* Number of additional byte
s */ | |
| 372 int gcss = 6 * gcaa; | |
| 373 c = (c & kjs_pcre_utf8_table3[gcaa]) << gcss; | |
| 374 for (int gcii = 1; gcii <= gcaa; gcii++) { | |
| 375 gcss -= 6; | |
| 376 c |= (subjectPtr[gcii] & 0x3f) << gcss; | |
| 377 } | |
| 378 len += gcaa; | |
| 379 } | |
| 380 } | |
| 381 | |
| 382 static inline void startNewGroup(MatchFrame* currentFrame) | |
| 383 { | |
| 384 /* At the start of a bracketed group, add the current subject pointer to the | |
| 385 stack of such pointers, to be re-instated at the end of the group when we h
it | |
| 386 the closing ket. When match() is called in other circumstances, we don't ad
d to | |
| 387 this stack. */ | |
| 388 | |
| 389 currentFrame->locals.bracketChainNode.previousBracket = currentFrame->args.b
racketChain; | |
| 390 currentFrame->locals.bracketChainNode.bracketStart = currentFrame->args.subj
ectPtr; | |
| 391 currentFrame->args.bracketChain = ¤tFrame->locals.bracketChainNode; | |
| 392 } | |
| 393 | |
| 394 // FIXME: "minimize" means "not greedy", we should invert the callers to ask for
"greedy" to be less confusing | |
| 395 static inline void repeatInformationFromInstructionOffset(short instructionOffse
t, bool& minimize, int& minimumRepeats, int& maximumRepeats) | |
| 396 { | |
| 397 // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_
NOTSTAR | |
| 398 static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0
}; | |
| 399 static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX,
INT_MAX, INT_MAX, 1, 1 }; | |
| 400 | |
| 401 ASSERT(instructionOffset >= 0); | |
| 402 ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR)); | |
| 403 | |
| 404 minimize = (instructionOffset & 1); // this assumes ordering: Instruction, M
inimizeInstruction, Instruction2, MinimizeInstruction2 | |
| 405 minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset]; | |
| 406 maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset]; | |
| 407 } | |
| 408 | |
| 409 static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, i
nt offsetTop, MatchData& md) | |
| 410 { | |
| 411 bool isMatch = false; | |
| 412 int min; | |
| 413 bool minimize = false; /* Initialization not really needed, but some compile
rs think so. */ | |
| 414 unsigned matchCount = 0; | |
| 415 | |
| 416 MatchStack stack; | |
| 417 | |
| 418 /* The opcode jump table. */ | |
| 419 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP | |
| 420 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode, | |
| 421 static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY)
}; | |
| 422 #undef EMIT_JUMP_TABLE_ENTRY | |
| 423 #endif | |
| 424 | |
| 425 /* One-time setup of the opcode jump table. */ | |
| 426 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP | |
| 427 for (int i = 255; !opcodeJumpTable[i]; i--) | |
| 428 opcodeJumpTable[i] = &&CAPTURING_BRACKET; | |
| 429 #endif | |
| 430 | |
| 431 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION | |
| 432 // Shark shows this as a hot line | |
| 433 // Using a static const here makes this line disappear, but makes later acce
ss hotter (not sure why) | |
| 434 stack.currentFrame->returnLocation = &&RETURN; | |
| 435 #else | |
| 436 stack.currentFrame->returnLocation = 0; | |
| 437 #endif | |
| 438 stack.currentFrame->args.subjectPtr = subjectPtr; | |
| 439 stack.currentFrame->args.instructionPtr = instructionPtr; | |
| 440 stack.currentFrame->args.offsetTop = offsetTop; | |
| 441 stack.currentFrame->args.bracketChain = 0; | |
| 442 startNewGroup(stack.currentFrame); | |
| 443 | |
| 444 /* This is where control jumps back to to effect "recursion" */ | |
| 445 | |
| 446 RECURSE: | |
| 447 if (++matchCount > matchLimit) | |
| 448 return matchError(JSRegExpErrorHitLimit, stack); | |
| 449 | |
| 450 /* Now start processing the operations. */ | |
| 451 | |
| 452 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP | |
| 453 while (true) | |
| 454 #endif | |
| 455 { | |
| 456 | |
| 457 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP | |
| 458 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode | |
| 459 #define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionP
tr] | |
| 460 #else | |
| 461 #define BEGIN_OPCODE(opcode) case OP_##opcode | |
| 462 #define NEXT_OPCODE continue | |
| 463 #endif | |
| 464 | |
| 465 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP | |
| 466 NEXT_OPCODE; | |
| 467 #else | |
| 468 switch (*stack.currentFrame->args.instructionPtr) | |
| 469 #endif | |
| 470 { | |
| 471 /* Non-capturing bracket: optimized */ | |
| 472 | |
| 473 BEGIN_OPCODE(BRA): | |
| 474 NON_CAPTURING_BRACKET: | |
| 475 DPRINTF(("start bracket 0\n")); | |
| 476 do { | |
| 477 RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->arg
s.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain); | |
| 478 if (isMatch) | |
| 479 RRETURN; | |
| 480 stack.currentFrame->args.instructionPtr += getLinkValue(stac
k.currentFrame->args.instructionPtr + 1); | |
| 481 } while (*stack.currentFrame->args.instructionPtr == OP_ALT); | |
| 482 DPRINTF(("bracket 0 failed\n")); | |
| 483 RRETURN; | |
| 484 | |
| 485 /* Skip over large extraction number data if encountered. */ | |
| 486 | |
| 487 BEGIN_OPCODE(BRANUMBER): | |
| 488 stack.currentFrame->args.instructionPtr += 3; | |
| 489 NEXT_OPCODE; | |
| 490 | |
| 491 /* End of the pattern. */ | |
| 492 | |
| 493 BEGIN_OPCODE(END): | |
| 494 md.endMatchPtr = stack.currentFrame->args.subjectPtr; /
* Record where we ended */ | |
| 495 md.endOffsetTop = stack.currentFrame->args.offsetTop; /* and h
ow many extracts were taken */ | |
| 496 isMatch = true; | |
| 497 RRETURN; | |
| 498 | |
| 499 /* Assertion brackets. Check the alternative branches in turn - the | |
| 500 matching won't pass the KET for an assertion. If any one branch mat
ches, | |
| 501 the assertion is true. Lookbehind assertions have an OP_REVERSE ite
m at the | |
| 502 start of each branch to move the current point backwards, so the co
de at | |
| 503 this level is identical to the lookahead case. */ | |
| 504 | |
| 505 BEGIN_OPCODE(ASSERT): | |
| 506 do { | |
| 507 RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->arg
s.instructionPtr + 1 + LINK_SIZE, NULL); | |
| 508 if (isMatch) | |
| 509 break; | |
| 510 stack.currentFrame->args.instructionPtr += getLinkValue(stac
k.currentFrame->args.instructionPtr + 1); | |
| 511 } while (*stack.currentFrame->args.instructionPtr == OP_ALT); | |
| 512 if (*stack.currentFrame->args.instructionPtr == OP_KET) | |
| 513 RRETURN_NO_MATCH; | |
| 514 | |
| 515 /* Continue from after the assertion, updating the offsets high
water | |
| 516 mark, since extracts may have been taken during the assertion.
*/ | |
| 517 | |
| 518 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr); | |
| 519 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE; | |
| 520 stack.currentFrame->args.offsetTop = md.endOffsetTop; | |
| 521 NEXT_OPCODE; | |
| 522 | |
| 523 /* Negative assertion: all branches must fail to match */ | |
| 524 | |
| 525 BEGIN_OPCODE(ASSERT_NOT): | |
| 526 do { | |
| 527 RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->arg
s.instructionPtr + 1 + LINK_SIZE, NULL); | |
| 528 if (isMatch) | |
| 529 RRETURN_NO_MATCH; | |
| 530 stack.currentFrame->args.instructionPtr += getLinkValue(stac
k.currentFrame->args.instructionPtr + 1); | |
| 531 } while (*stack.currentFrame->args.instructionPtr == OP_ALT); | |
| 532 | |
| 533 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE; | |
| 534 NEXT_OPCODE; | |
| 535 | |
| 536 /* An alternation is the end of a branch; scan along to find the end
of the | |
| 537 bracketed group and go to there. */ | |
| 538 | |
| 539 BEGIN_OPCODE(ALT): | |
| 540 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr); | |
| 541 NEXT_OPCODE; | |
| 542 | |
| 543 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicat
ing | |
| 544 that it may occur zero times. It may repeat infinitely, or not at a
ll - | |
| 545 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upp
er | |
| 546 repeat limits are compiled as a number of copies, with the optional
ones | |
| 547 preceded by BRAZERO or BRAMINZERO. */ | |
| 548 | |
| 549 BEGIN_OPCODE(BRAZERO): { | |
| 550 stack.currentFrame->locals.startOfRepeatingBracket = stack.curre
ntFrame->args.instructionPtr + 1; | |
| 551 RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals
.startOfRepeatingBracket, stack.currentFrame->args.bracketChain); | |
| 552 if (isMatch) | |
| 553 RRETURN; | |
| 554 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatin
gBracket); | |
| 555 stack.currentFrame->args.instructionPtr = stack.currentFrame->lo
cals.startOfRepeatingBracket + 1 + LINK_SIZE; | |
| 556 NEXT_OPCODE; | |
| 557 } | |
| 558 | |
| 559 BEGIN_OPCODE(BRAMINZERO): { | |
| 560 stack.currentFrame->locals.startOfRepeatingBracket = stack.curre
ntFrame->args.instructionPtr + 1; | |
| 561 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatin
gBracket); | |
| 562 RECURSIVE_MATCH_STARTNG_NEW_GROUP(15, stack.currentFrame->locals
.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain)
; | |
| 563 if (isMatch) | |
| 564 RRETURN; | |
| 565 stack.currentFrame->args.instructionPtr++; | |
| 566 NEXT_OPCODE; | |
| 567 } | |
| 568 | |
| 569 /* End of a group, repeated or non-repeating. If we are at the end o
f | |
| 570 an assertion "group", stop matching and return 1, but record the | |
| 571 current high water mark for use by positive assertions. Do this als
o | |
| 572 for the "once" (not-backup up) groups. */ | |
| 573 | |
| 574 BEGIN_OPCODE(KET): | |
| 575 BEGIN_OPCODE(KETRMIN): | |
| 576 BEGIN_OPCODE(KETRMAX): | |
| 577 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.c
urrentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instruc
tionPtr + 1); | |
| 578 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stac
k.currentFrame->args.bracketChain->bracketStart; | |
| 579 | |
| 580 /* Back up the stack of bracket start pointers. */ | |
| 581 | |
| 582 stack.currentFrame->args.bracketChain = stack.currentFrame->args
.bracketChain->previousBracket; | |
| 583 | |
| 584 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == O
P_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT
_NOT) { | |
| 585 md.endOffsetTop = stack.currentFrame->args.offsetTop; | |
| 586 isMatch = true; | |
| 587 RRETURN; | |
| 588 } | |
| 589 | |
| 590 /* In all other cases except a conditional group we have to chec
k the | |
| 591 group number back at the start and if necessary complete handli
ng an | |
| 592 extraction by setting the offsets and bumping the high water ma
rk. */ | |
| 593 | |
| 594 stack.currentFrame->locals.number = *stack.currentFrame->locals.
instructionPtrAtStartOfOnce - OP_BRA; | |
| 595 | |
| 596 /* For extended extraction brackets (large number), we have to f
ish out | |
| 597 the number from a dummy opcode at the start. */ | |
| 598 | |
| 599 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX) | |
| 600 stack.currentFrame->locals.number = get2ByteValue(stack.curr
entFrame->locals.instructionPtrAtStartOfOnce + 2 + LINK_SIZE); | |
| 601 stack.currentFrame->locals.offset = stack.currentFrame->locals.n
umber << 1; | |
| 602 | |
| 603 #ifdef DEBUG | |
| 604 printf("end bracket %d", stack.currentFrame->locals.number); | |
| 605 printf("\n"); | |
| 606 #endif | |
| 607 | |
| 608 /* Test for a numbered group. This includes groups called as a r
esult | |
| 609 of recursion. Note that whole-pattern recursion is coded as a r
ecurse | |
| 610 into group 0, so it won't be picked up here. Instead, we catch
it when | |
| 611 the OP_END is reached. */ | |
| 612 | |
| 613 if (stack.currentFrame->locals.number > 0) { | |
| 614 if (stack.currentFrame->locals.offset >= md.offsetMax) | |
| 615 md.offsetOverflow = true; | |
| 616 else { | |
| 617 md.offsetVector[stack.currentFrame->locals.offset] = | |
| 618 md.offsetVector[md.offsetEnd - stack.currentFrame->local
s.number]; | |
| 619 md.offsetVector[stack.currentFrame->locals.offset+1] = s
tack.currentFrame->args.subjectPtr - md.startSubject; | |
| 620 if (stack.currentFrame->args.offsetTop <= stack.currentF
rame->locals.offset) | |
| 621 stack.currentFrame->args.offsetTop = stack.currentFr
ame->locals.offset + 2; | |
| 622 } | |
| 623 } | |
| 624 | |
| 625 /* For a non-repeating ket, just continue at this level. This al
so | |
| 626 happens for a repeating ket if no characters were matched in th
e group. | |
| 627 This is the forcible breaking of infinite loops as implemented
in Perl | |
| 628 5.005. If there is an options reset, it will get obeyed in the
normal | |
| 629 course of events. */ | |
| 630 | |
| 631 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.
currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfI
nstruction) { | |
| 632 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE; | |
| 633 NEXT_OPCODE; | |
| 634 } | |
| 635 | |
| 636 /* The repeating kets try the rest of the pattern or restart fro
m the | |
| 637 preceding bracket, in the appropriate order. */ | |
| 638 | |
| 639 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) { | |
| 640 RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr
+ 1 + LINK_SIZE, stack.currentFrame->args.bracketChain); | |
| 641 if (isMatch) | |
| 642 RRETURN; | |
| 643 RECURSIVE_MATCH_STARTNG_NEW_GROUP(17, stack.currentFrame->lo
cals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain); | |
| 644 if (isMatch) | |
| 645 RRETURN; | |
| 646 } else { /* OP_KETRMAX */ | |
| 647 RECURSIVE_MATCH_STARTNG_NEW_GROUP(18, stack.currentFrame->lo
cals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain); | |
| 648 if (isMatch) | |
| 649 RRETURN; | |
| 650 RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr
+ 1 + LINK_SIZE, stack.currentFrame->args.bracketChain); | |
| 651 if (isMatch) | |
| 652 RRETURN; | |
| 653 } | |
| 654 RRETURN; | |
| 655 | |
| 656 /* Start of subject. */ | |
| 657 | |
| 658 BEGIN_OPCODE(CIRC): | |
| 659 if (stack.currentFrame->args.subjectPtr != md.startSubject) | |
| 660 RRETURN_NO_MATCH; | |
| 661 stack.currentFrame->args.instructionPtr++; | |
| 662 NEXT_OPCODE; | |
| 663 | |
| 664 /* After internal newline if multiline. */ | |
| 665 | |
| 666 BEGIN_OPCODE(BOL): | |
| 667 if (stack.currentFrame->args.subjectPtr != md.startSubject && !i
sNewline(stack.currentFrame->args.subjectPtr[-1])) | |
| 668 RRETURN_NO_MATCH; | |
| 669 stack.currentFrame->args.instructionPtr++; | |
| 670 NEXT_OPCODE; | |
| 671 | |
| 672 /* End of subject. */ | |
| 673 | |
| 674 BEGIN_OPCODE(DOLL): | |
| 675 if (stack.currentFrame->args.subjectPtr < md.endSubject) | |
| 676 RRETURN_NO_MATCH; | |
| 677 stack.currentFrame->args.instructionPtr++; | |
| 678 NEXT_OPCODE; | |
| 679 | |
| 680 /* Before internal newline if multiline. */ | |
| 681 | |
| 682 BEGIN_OPCODE(EOL): | |
| 683 if (stack.currentFrame->args.subjectPtr < md.endSubject && !isNe
wline(*stack.currentFrame->args.subjectPtr)) | |
| 684 RRETURN_NO_MATCH; | |
| 685 stack.currentFrame->args.instructionPtr++; | |
| 686 NEXT_OPCODE; | |
| 687 | |
| 688 /* Word boundary assertions */ | |
| 689 | |
| 690 BEGIN_OPCODE(NOT_WORD_BOUNDARY): | |
| 691 BEGIN_OPCODE(WORD_BOUNDARY): { | |
| 692 bool currentCharIsWordChar = false; | |
| 693 bool previousCharIsWordChar = false; | |
| 694 | |
| 695 if (stack.currentFrame->args.subjectPtr > md.startSubject) | |
| 696 previousCharIsWordChar = isWordChar(stack.currentFrame->args
.subjectPtr[-1]); | |
| 697 if (stack.currentFrame->args.subjectPtr < md.endSubject) | |
| 698 currentCharIsWordChar = isWordChar(*stack.currentFrame->args
.subjectPtr); | |
| 699 | |
| 700 /* Now see if the situation is what we want */ | |
| 701 bool wordBoundaryDesired = (*stack.currentFrame->args.instructio
nPtr++ == OP_WORD_BOUNDARY); | |
| 702 if (wordBoundaryDesired ? currentCharIsWordChar == previousCharI
sWordChar : currentCharIsWordChar != previousCharIsWordChar) | |
| 703 RRETURN_NO_MATCH; | |
| 704 NEXT_OPCODE; | |
| 705 } | |
| 706 | |
| 707 /* Match a single character type; inline for speed */ | |
| 708 | |
| 709 BEGIN_OPCODE(NOT_NEWLINE): | |
| 710 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 711 RRETURN_NO_MATCH; | |
| 712 if (isNewline(*stack.currentFrame->args.subjectPtr++)) | |
| 713 RRETURN_NO_MATCH; | |
| 714 stack.currentFrame->args.instructionPtr++; | |
| 715 NEXT_OPCODE; | |
| 716 | |
| 717 BEGIN_OPCODE(NOT_DIGIT): | |
| 718 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 719 RRETURN_NO_MATCH; | |
| 720 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++)) | |
| 721 RRETURN_NO_MATCH; | |
| 722 stack.currentFrame->args.instructionPtr++; | |
| 723 NEXT_OPCODE; | |
| 724 | |
| 725 BEGIN_OPCODE(DIGIT): | |
| 726 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 727 RRETURN_NO_MATCH; | |
| 728 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++)) | |
| 729 RRETURN_NO_MATCH; | |
| 730 stack.currentFrame->args.instructionPtr++; | |
| 731 NEXT_OPCODE; | |
| 732 | |
| 733 BEGIN_OPCODE(NOT_WHITESPACE): | |
| 734 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 735 RRETURN_NO_MATCH; | |
| 736 if (isSpaceChar(*stack.currentFrame->args.subjectPtr++)) | |
| 737 RRETURN_NO_MATCH; | |
| 738 stack.currentFrame->args.instructionPtr++; | |
| 739 NEXT_OPCODE; | |
| 740 | |
| 741 BEGIN_OPCODE(WHITESPACE): | |
| 742 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 743 RRETURN_NO_MATCH; | |
| 744 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++)) | |
| 745 RRETURN_NO_MATCH; | |
| 746 stack.currentFrame->args.instructionPtr++; | |
| 747 NEXT_OPCODE; | |
| 748 | |
| 749 BEGIN_OPCODE(NOT_WORDCHAR): | |
| 750 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 751 RRETURN_NO_MATCH; | |
| 752 if (isWordChar(*stack.currentFrame->args.subjectPtr++)) | |
| 753 RRETURN_NO_MATCH; | |
| 754 stack.currentFrame->args.instructionPtr++; | |
| 755 NEXT_OPCODE; | |
| 756 | |
| 757 BEGIN_OPCODE(WORDCHAR): | |
| 758 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 759 RRETURN_NO_MATCH; | |
| 760 if (!isWordChar(*stack.currentFrame->args.subjectPtr++)) | |
| 761 RRETURN_NO_MATCH; | |
| 762 stack.currentFrame->args.instructionPtr++; | |
| 763 NEXT_OPCODE; | |
| 764 | |
| 765 /* Match a back reference, possibly repeatedly. Look past the end of
the | |
| 766 item to see if there is repeat information following. The code is s
imilar | |
| 767 to that for character classes, but repeated for efficiency. Then ob
ey | |
| 768 similar code to character type repeats - written out again for spee
d. | |
| 769 However, if the referenced string is the empty string, always treat | |
| 770 it as matched, any number of times (otherwise there could be infini
te | |
| 771 loops). */ | |
| 772 | |
| 773 BEGIN_OPCODE(REF): | |
| 774 stack.currentFrame->locals.offset = get2ByteValue(stack.currentF
rame->args.instructionPtr + 1) << 1; /* Doubled ref number */ | |
| 775 stack.currentFrame->args.instructionPtr += 3;
/* Advance past item */ | |
| 776 | |
| 777 /* If the reference is unset, set the length to be longer than t
he amount | |
| 778 of subject left; this ensures that every attempt at a match fai
ls. We | |
| 779 can't just fail here, because of the possibility of quantifiers
with zero | |
| 780 minima. */ | |
| 781 | |
| 782 if (stack.currentFrame->locals.offset >= stack.currentFrame->arg
s.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0) | |
| 783 stack.currentFrame->locals.length = 0; | |
| 784 else | |
| 785 stack.currentFrame->locals.length = md.offsetVector[stack.cu
rrentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset
]; | |
| 786 | |
| 787 /* Set up for repetition, or handle the non-repeated case */ | |
| 788 | |
| 789 switch (*stack.currentFrame->args.instructionPtr) { | |
| 790 case OP_CRSTAR: | |
| 791 case OP_CRMINSTAR: | |
| 792 case OP_CRPLUS: | |
| 793 case OP_CRMINPLUS: | |
| 794 case OP_CRQUERY: | |
| 795 case OP_CRMINQUERY: | |
| 796 repeatInformationFromInstructionOffset(*stack.currentFra
me->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals
.max); | |
| 797 break; | |
| 798 | |
| 799 case OP_CRRANGE: | |
| 800 case OP_CRMINRANGE: | |
| 801 minimize = (*stack.currentFrame->args.instructionPtr ==
OP_CRMINRANGE); | |
| 802 min = get2ByteValue(stack.currentFrame->args.instruction
Ptr + 1); | |
| 803 stack.currentFrame->locals.max = get2ByteValue(stack.cur
rentFrame->args.instructionPtr + 3); | |
| 804 if (stack.currentFrame->locals.max == 0) | |
| 805 stack.currentFrame->locals.max = INT_MAX; | |
| 806 stack.currentFrame->args.instructionPtr += 5; | |
| 807 break; | |
| 808 | |
| 809 default: /* No repeat follows */ | |
| 810 if (!matchRef(stack.currentFrame->locals.offset, stack.c
urrentFrame->args.subjectPtr, stack.currentFrame->locals.length, md)) | |
| 811 RRETURN_NO_MATCH; | |
| 812 stack.currentFrame->args.subjectPtr += stack.currentFram
e->locals.length; | |
| 813 NEXT_OPCODE; | |
| 814 } | |
| 815 | |
| 816 /* If the length of the reference is zero, just continue with th
e | |
| 817 main loop. */ | |
| 818 | |
| 819 if (stack.currentFrame->locals.length == 0) | |
| 820 NEXT_OPCODE; | |
| 821 | |
| 822 /* First, ensure the minimum number of matches are present. */ | |
| 823 | |
| 824 for (int i = 1; i <= min; i++) { | |
| 825 if (!matchRef(stack.currentFrame->locals.offset, stack.curre
ntFrame->args.subjectPtr, stack.currentFrame->locals.length, md)) | |
| 826 RRETURN_NO_MATCH; | |
| 827 stack.currentFrame->args.subjectPtr += stack.currentFrame->l
ocals.length; | |
| 828 } | |
| 829 | |
| 830 /* If min = max, continue at the same level without recursion. | |
| 831 They are not both allowed to be zero. */ | |
| 832 | |
| 833 if (min == stack.currentFrame->locals.max) | |
| 834 NEXT_OPCODE; | |
| 835 | |
| 836 /* If minimizing, keep trying and advancing the pointer */ | |
| 837 | |
| 838 if (minimize) { | |
| 839 for (stack.currentFrame->locals.fi = min;; stack.currentFram
e->locals.fi++) { | |
| 840 RECURSIVE_MATCH(20, stack.currentFrame->args.instruction
Ptr, stack.currentFrame->args.bracketChain); | |
| 841 if (isMatch) | |
| 842 RRETURN; | |
| 843 if (stack.currentFrame->locals.fi >= stack.currentFrame-
>locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame->
args.subjectPtr, stack.currentFrame->locals.length, md)) | |
| 844 RRETURN; | |
| 845 stack.currentFrame->args.subjectPtr += stack.currentFram
e->locals.length; | |
| 846 } | |
| 847 /* Control never reaches here */ | |
| 848 } | |
| 849 | |
| 850 /* If maximizing, find the longest string and work backwards */ | |
| 851 | |
| 852 else { | |
| 853 stack.currentFrame->locals.subjectPtrAtStartOfInstruction =
stack.currentFrame->args.subjectPtr; | |
| 854 for (int i = min; i < stack.currentFrame->locals.max; i++) { | |
| 855 if (!matchRef(stack.currentFrame->locals.offset, stack.c
urrentFrame->args.subjectPtr, stack.currentFrame->locals.length, md)) | |
| 856 break; | |
| 857 stack.currentFrame->args.subjectPtr += stack.currentFram
e->locals.length; | |
| 858 } | |
| 859 while (stack.currentFrame->args.subjectPtr >= stack.currentF
rame->locals.subjectPtrAtStartOfInstruction) { | |
| 860 RECURSIVE_MATCH(21, stack.currentFrame->args.instruction
Ptr, stack.currentFrame->args.bracketChain); | |
| 861 if (isMatch) | |
| 862 RRETURN; | |
| 863 stack.currentFrame->args.subjectPtr -= stack.currentFram
e->locals.length; | |
| 864 } | |
| 865 RRETURN_NO_MATCH; | |
| 866 } | |
| 867 /* Control never reaches here */ | |
| 868 | |
| 869 /* Match a bit-mapped character class, possibly repeatedly. This op
code is | |
| 870 used when all the characters in the class have values in the range
0-255, | |
| 871 and either the matching is caseful, or the characters are in the ra
nge | |
| 872 0-127 when UTF-8 processing is enabled. The only difference between | |
| 873 OP_CLASS and OP_NCLASS occurs when a data character outside the ran
ge is | |
| 874 encountered. | |
| 875 | |
| 876 First, look past the end of the item to see if there is repeat info
rmation | |
| 877 following. Then obey similar code to character type repeats - writt
en out | |
| 878 again for speed. */ | |
| 879 | |
| 880 BEGIN_OPCODE(NCLASS): | |
| 881 BEGIN_OPCODE(CLASS): | |
| 882 stack.currentFrame->locals.data = stack.currentFrame->args.instr
uctionPtr + 1; /* Save for matching */ | |
| 883 stack.currentFrame->args.instructionPtr += 33;
/* Advance past the item */ | |
| 884 | |
| 885 switch (*stack.currentFrame->args.instructionPtr) { | |
| 886 case OP_CRSTAR: | |
| 887 case OP_CRMINSTAR: | |
| 888 case OP_CRPLUS: | |
| 889 case OP_CRMINPLUS: | |
| 890 case OP_CRQUERY: | |
| 891 case OP_CRMINQUERY: | |
| 892 repeatInformationFromInstructionOffset(*stack.currentFra
me->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals
.max); | |
| 893 break; | |
| 894 | |
| 895 case OP_CRRANGE: | |
| 896 case OP_CRMINRANGE: | |
| 897 minimize = (*stack.currentFrame->args.instructionPtr ==
OP_CRMINRANGE); | |
| 898 min = get2ByteValue(stack.currentFrame->args.instruction
Ptr + 1); | |
| 899 stack.currentFrame->locals.max = get2ByteValue(stack.cur
rentFrame->args.instructionPtr + 3); | |
| 900 if (stack.currentFrame->locals.max == 0) | |
| 901 stack.currentFrame->locals.max = INT_MAX; | |
| 902 stack.currentFrame->args.instructionPtr += 5; | |
| 903 break; | |
| 904 | |
| 905 default: /* No repeat follows */ | |
| 906 min = stack.currentFrame->locals.max = 1; | |
| 907 break; | |
| 908 } | |
| 909 | |
| 910 /* First, ensure the minimum number of matches are present. */ | |
| 911 | |
| 912 for (int i = 1; i <= min; i++) { | |
| 913 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 914 RRETURN_NO_MATCH; | |
| 915 int c = *stack.currentFrame->args.subjectPtr++; | |
| 916 if (c > 255) { | |
| 917 if (stack.currentFrame->locals.data[-1] == OP_CLASS) | |
| 918 RRETURN_NO_MATCH; | |
| 919 } else { | |
| 920 if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c
& 7)))) | |
| 921 RRETURN_NO_MATCH; | |
| 922 } | |
| 923 } | |
| 924 | |
| 925 /* If max == min we can continue with the main loop without the | |
| 926 need to recurse. */ | |
| 927 | |
| 928 if (min == stack.currentFrame->locals.max) | |
| 929 NEXT_OPCODE; | |
| 930 | |
| 931 /* If minimizing, keep testing the rest of the expression and ad
vancing | |
| 932 the pointer while it matches the class. */ | |
| 933 if (minimize) { | |
| 934 for (stack.currentFrame->locals.fi = min;; stack.currentFram
e->locals.fi++) { | |
| 935 RECURSIVE_MATCH(22, stack.currentFrame->args.instruction
Ptr, stack.currentFrame->args.bracketChain); | |
| 936 if (isMatch) | |
| 937 RRETURN; | |
| 938 if (stack.currentFrame->locals.fi >= stack.currentFrame-
>locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 939 RRETURN; | |
| 940 int c = *stack.currentFrame->args.subjectPtr++; | |
| 941 if (c > 255) { | |
| 942 if (stack.currentFrame->locals.data[-1] == OP_CLASS) | |
| 943 RRETURN; | |
| 944 } else { | |
| 945 if ((stack.currentFrame->locals.data[c/8] & (1 << (c
&7))) == 0) | |
| 946 RRETURN; | |
| 947 } | |
| 948 } | |
| 949 /* Control never reaches here */ | |
| 950 } | |
| 951 /* If maximizing, find the longest possible run, then work backw
ards. */ | |
| 952 else { | |
| 953 stack.currentFrame->locals.subjectPtrAtStartOfInstruction =
stack.currentFrame->args.subjectPtr; | |
| 954 | |
| 955 for (int i = min; i < stack.currentFrame->locals.max; i++) { | |
| 956 if (stack.currentFrame->args.subjectPtr >= md.endSubject
) | |
| 957 break; | |
| 958 int c = *stack.currentFrame->args.subjectPtr; | |
| 959 if (c > 255) { | |
| 960 if (stack.currentFrame->locals.data[-1] == OP_CLASS) | |
| 961 break; | |
| 962 } else { | |
| 963 if (!(stack.currentFrame->locals.data[c / 8] & (1 <<
(c & 7)))) | |
| 964 break; | |
| 965 } | |
| 966 ++stack.currentFrame->args.subjectPtr; | |
| 967 } | |
| 968 for (;;) { | |
| 969 RECURSIVE_MATCH(24, stack.currentFrame->args.instruction
Ptr, stack.currentFrame->args.bracketChain); | |
| 970 if (isMatch) | |
| 971 RRETURN; | |
| 972 if (stack.currentFrame->args.subjectPtr-- == stack.curre
ntFrame->locals.subjectPtrAtStartOfInstruction) | |
| 973 break; /* Stop if tried at original pos */ | |
| 974 } | |
| 975 | |
| 976 RRETURN; | |
| 977 } | |
| 978 /* Control never reaches here */ | |
| 979 | |
| 980 /* Match an extended character class. */ | |
| 981 | |
| 982 BEGIN_OPCODE(XCLASS): | |
| 983 stack.currentFrame->locals.data = stack.currentFrame->args.instr
uctionPtr + 1 + LINK_SIZE; /* Save for matching */ | |
| 984 stack.currentFrame->args.instructionPtr += getLinkValue(stack.cu
rrentFrame->args.instructionPtr + 1); /* Advance past the i
tem */ | |
| 985 | |
| 986 switch (*stack.currentFrame->args.instructionPtr) { | |
| 987 case OP_CRSTAR: | |
| 988 case OP_CRMINSTAR: | |
| 989 case OP_CRPLUS: | |
| 990 case OP_CRMINPLUS: | |
| 991 case OP_CRQUERY: | |
| 992 case OP_CRMINQUERY: | |
| 993 repeatInformationFromInstructionOffset(*stack.currentFra
me->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals
.max); | |
| 994 break; | |
| 995 | |
| 996 case OP_CRRANGE: | |
| 997 case OP_CRMINRANGE: | |
| 998 minimize = (*stack.currentFrame->args.instructionPtr ==
OP_CRMINRANGE); | |
| 999 min = get2ByteValue(stack.currentFrame->args.instruction
Ptr + 1); | |
| 1000 stack.currentFrame->locals.max = get2ByteValue(stack.cur
rentFrame->args.instructionPtr + 3); | |
| 1001 if (stack.currentFrame->locals.max == 0) | |
| 1002 stack.currentFrame->locals.max = INT_MAX; | |
| 1003 stack.currentFrame->args.instructionPtr += 5; | |
| 1004 break; | |
| 1005 | |
| 1006 default: /* No repeat follows */ | |
| 1007 min = stack.currentFrame->locals.max = 1; | |
| 1008 } | |
| 1009 | |
| 1010 /* First, ensure the minimum number of matches are present. */ | |
| 1011 | |
| 1012 for (int i = 1; i <= min; i++) { | |
| 1013 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 1014 RRETURN_NO_MATCH; | |
| 1015 int c = *stack.currentFrame->args.subjectPtr++; | |
| 1016 if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data)) | |
| 1017 RRETURN_NO_MATCH; | |
| 1018 } | |
| 1019 | |
| 1020 /* If max == min we can continue with the main loop without the | |
| 1021 need to recurse. */ | |
| 1022 | |
| 1023 if (min == stack.currentFrame->locals.max) | |
| 1024 NEXT_OPCODE; | |
| 1025 | |
| 1026 /* If minimizing, keep testing the rest of the expression and ad
vancing | |
| 1027 the pointer while it matches the class. */ | |
| 1028 | |
| 1029 if (minimize) { | |
| 1030 for (stack.currentFrame->locals.fi = min;; stack.currentFram
e->locals.fi++) { | |
| 1031 RECURSIVE_MATCH(26, stack.currentFrame->args.instruction
Ptr, stack.currentFrame->args.bracketChain); | |
| 1032 if (isMatch) | |
| 1033 RRETURN; | |
| 1034 if (stack.currentFrame->locals.fi >= stack.currentFrame-
>locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 1035 RRETURN; | |
| 1036 int c = *stack.currentFrame->args.subjectPtr++; | |
| 1037 if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data)
) | |
| 1038 RRETURN; | |
| 1039 } | |
| 1040 /* Control never reaches here */ | |
| 1041 } | |
| 1042 | |
| 1043 /* If maximizing, find the longest possible run, then work backw
ards. */ | |
| 1044 | |
| 1045 else { | |
| 1046 stack.currentFrame->locals.subjectPtrAtStartOfInstruction =
stack.currentFrame->args.subjectPtr; | |
| 1047 for (int i = min; i < stack.currentFrame->locals.max; i++) { | |
| 1048 if (stack.currentFrame->args.subjectPtr >= md.endSubject
) | |
| 1049 break; | |
| 1050 int c = *stack.currentFrame->args.subjectPtr; | |
| 1051 if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data)
) | |
| 1052 break; | |
| 1053 ++stack.currentFrame->args.subjectPtr; | |
| 1054 } | |
| 1055 for(;;) { | |
| 1056 RECURSIVE_MATCH(27, stack.currentFrame->args.instruction
Ptr, stack.currentFrame->args.bracketChain); | |
| 1057 if (isMatch) | |
| 1058 RRETURN; | |
| 1059 if (stack.currentFrame->args.subjectPtr-- == stack.curre
ntFrame->locals.subjectPtrAtStartOfInstruction) | |
| 1060 break; /* Stop if tried at original pos */ | |
| 1061 } | |
| 1062 RRETURN; | |
| 1063 } | |
| 1064 | |
| 1065 /* Control never reaches here */ | |
| 1066 | |
| 1067 /* Match a single character, casefully */ | |
| 1068 | |
| 1069 BEGIN_OPCODE(CHAR): | |
| 1070 stack.currentFrame->locals.length = 1; | |
| 1071 stack.currentFrame->args.instructionPtr++; | |
| 1072 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, sta
ck.currentFrame->args.instructionPtr, stack.currentFrame->locals.length); | |
| 1073 stack.currentFrame->args.instructionPtr += stack.currentFrame->l
ocals.length; | |
| 1074 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 1075 RRETURN_NO_MATCH; | |
| 1076 if (stack.currentFrame->locals.fc != *stack.currentFrame->args.s
ubjectPtr++) | |
| 1077 RRETURN_NO_MATCH; | |
| 1078 NEXT_OPCODE; | |
| 1079 | |
| 1080 /* Match a single character, caselessly */ | |
| 1081 | |
| 1082 BEGIN_OPCODE(CHAR_IGNORING_CASE): { | |
| 1083 stack.currentFrame->locals.length = 1; | |
| 1084 stack.currentFrame->args.instructionPtr++; | |
| 1085 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, sta
ck.currentFrame->args.instructionPtr, stack.currentFrame->locals.length); | |
| 1086 stack.currentFrame->args.instructionPtr += stack.currentFrame->l
ocals.length; | |
| 1087 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 1088 RRETURN_NO_MATCH; | |
| 1089 int dc = *stack.currentFrame->args.subjectPtr++; | |
| 1090 if (stack.currentFrame->locals.fc != dc && kjs_pcre_ucp_othercas
e(stack.currentFrame->locals.fc) != dc) | |
| 1091 RRETURN_NO_MATCH; | |
| 1092 NEXT_OPCODE; | |
| 1093 } | |
| 1094 | |
| 1095 /* Match a single ASCII character. */ | |
| 1096 | |
| 1097 BEGIN_OPCODE(ASCII_CHAR): | |
| 1098 if (md.endSubject == stack.currentFrame->args.subjectPtr) | |
| 1099 RRETURN_NO_MATCH; | |
| 1100 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->
args.instructionPtr[1]) | |
| 1101 RRETURN_NO_MATCH; | |
| 1102 ++stack.currentFrame->args.subjectPtr; | |
| 1103 stack.currentFrame->args.instructionPtr += 2; | |
| 1104 NEXT_OPCODE; | |
| 1105 | |
| 1106 /* Match one of two cases of an ASCII letter. */ | |
| 1107 | |
| 1108 BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE): | |
| 1109 if (md.endSubject == stack.currentFrame->args.subjectPtr) | |
| 1110 RRETURN_NO_MATCH; | |
| 1111 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.curre
ntFrame->args.instructionPtr[1]) | |
| 1112 RRETURN_NO_MATCH; | |
| 1113 ++stack.currentFrame->args.subjectPtr; | |
| 1114 stack.currentFrame->args.instructionPtr += 2; | |
| 1115 NEXT_OPCODE; | |
| 1116 | |
| 1117 /* Match a single character repeatedly; different opcodes share code
. */ | |
| 1118 | |
| 1119 BEGIN_OPCODE(EXACT): | |
| 1120 min = stack.currentFrame->locals.max = get2ByteValue(stack.curre
ntFrame->args.instructionPtr + 1); | |
| 1121 minimize = false; | |
| 1122 stack.currentFrame->args.instructionPtr += 3; | |
| 1123 goto REPEATCHAR; | |
| 1124 | |
| 1125 BEGIN_OPCODE(UPTO): | |
| 1126 BEGIN_OPCODE(MINUPTO): | |
| 1127 min = 0; | |
| 1128 stack.currentFrame->locals.max = get2ByteValue(stack.currentFram
e->args.instructionPtr + 1); | |
| 1129 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPT
O; | |
| 1130 stack.currentFrame->args.instructionPtr += 3; | |
| 1131 goto REPEATCHAR; | |
| 1132 | |
| 1133 BEGIN_OPCODE(STAR): | |
| 1134 BEGIN_OPCODE(MINSTAR): | |
| 1135 BEGIN_OPCODE(PLUS): | |
| 1136 BEGIN_OPCODE(MINPLUS): | |
| 1137 BEGIN_OPCODE(QUERY): | |
| 1138 BEGIN_OPCODE(MINQUERY): | |
| 1139 repeatInformationFromInstructionOffset(*stack.currentFrame->args
.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max); | |
| 1140 | |
| 1141 /* Common code for all repeated single-character matches. We can
give | |
| 1142 up quickly if there are fewer than the minimum number of charac
ters left in | |
| 1143 the subject. */ | |
| 1144 | |
| 1145 REPEATCHAR: | |
| 1146 | |
| 1147 stack.currentFrame->locals.length = 1; | |
| 1148 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, sta
ck.currentFrame->args.instructionPtr, stack.currentFrame->locals.length); | |
| 1149 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.
endSubject - stack.currentFrame->args.subjectPtr) | |
| 1150 RRETURN_NO_MATCH; | |
| 1151 stack.currentFrame->args.instructionPtr += stack.currentFrame->l
ocals.length; | |
| 1152 | |
| 1153 if (stack.currentFrame->locals.fc <= 0xFFFF) { | |
| 1154 int othercase; | |
| 1155 othercase = md.ignoreCase ? kjs_pcre_ucp_othercase(stack.cur
rentFrame->locals.fc) : -1; | |
| 1156 | |
| 1157 for (int i = 1; i <= min; i++) { | |
| 1158 if (*stack.currentFrame->args.subjectPtr != stack.curren
tFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase) | |
| 1159 RRETURN_NO_MATCH; | |
| 1160 ++stack.currentFrame->args.subjectPtr; | |
| 1161 } | |
| 1162 | |
| 1163 if (min == stack.currentFrame->locals.max) | |
| 1164 NEXT_OPCODE; | |
| 1165 | |
| 1166 if (minimize) { | |
| 1167 stack.currentFrame->locals.repeatOthercase = othercase; | |
| 1168 for (stack.currentFrame->locals.fi = min;; stack.current
Frame->locals.fi++) { | |
| 1169 RECURSIVE_MATCH(28, stack.currentFrame->args.instruc
tionPtr, stack.currentFrame->args.bracketChain); | |
| 1170 if (isMatch) | |
| 1171 RRETURN; | |
| 1172 if (stack.currentFrame->locals.fi >= stack.currentFr
ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 1173 RRETURN; | |
| 1174 if (*stack.currentFrame->args.subjectPtr != stack.cu
rrentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFr
ame->locals.repeatOthercase) | |
| 1175 RRETURN; | |
| 1176 ++stack.currentFrame->args.subjectPtr; | |
| 1177 } | |
| 1178 /* Control never reaches here */ | |
| 1179 } else { | |
| 1180 stack.currentFrame->locals.subjectPtrAtStartOfInstructio
n = stack.currentFrame->args.subjectPtr; | |
| 1181 for (int i = min; i < stack.currentFrame->locals.max; i+
+) { | |
| 1182 if (stack.currentFrame->args.subjectPtr >= md.endSub
ject) | |
| 1183 break; | |
| 1184 if (*stack.currentFrame->args.subjectPtr != stack.cu
rrentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase) | |
| 1185 break; | |
| 1186 ++stack.currentFrame->args.subjectPtr; | |
| 1187 } | |
| 1188 while (stack.currentFrame->args.subjectPtr >= stack.curr
entFrame->locals.subjectPtrAtStartOfInstruction) { | |
| 1189 RECURSIVE_MATCH(29, stack.currentFrame->args.instruc
tionPtr, stack.currentFrame->args.bracketChain); | |
| 1190 if (isMatch) | |
| 1191 RRETURN; | |
| 1192 --stack.currentFrame->args.subjectPtr; | |
| 1193 } | |
| 1194 RRETURN_NO_MATCH; | |
| 1195 } | |
| 1196 /* Control never reaches here */ | |
| 1197 } else { | |
| 1198 /* No case on surrogate pairs, so no need to bother with "ot
hercase". */ | |
| 1199 | |
| 1200 for (int i = 1; i <= min; i++) { | |
| 1201 if (*stack.currentFrame->args.subjectPtr != stack.curren
tFrame->locals.fc) | |
| 1202 RRETURN_NO_MATCH; | |
| 1203 stack.currentFrame->args.subjectPtr += 2; | |
| 1204 } | |
| 1205 | |
| 1206 if (min == stack.currentFrame->locals.max) | |
| 1207 NEXT_OPCODE; | |
| 1208 | |
| 1209 if (minimize) { | |
| 1210 for (stack.currentFrame->locals.fi = min;; stack.current
Frame->locals.fi++) { | |
| 1211 RECURSIVE_MATCH(30, stack.currentFrame->args.instruc
tionPtr, stack.currentFrame->args.bracketChain); | |
| 1212 if (isMatch) | |
| 1213 RRETURN; | |
| 1214 if (stack.currentFrame->locals.fi >= stack.currentFr
ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 1215 RRETURN; | |
| 1216 if (*stack.currentFrame->args.subjectPtr != stack.cu
rrentFrame->locals.fc) | |
| 1217 RRETURN; | |
| 1218 stack.currentFrame->args.subjectPtr += 2; | |
| 1219 } | |
| 1220 /* Control never reaches here */ | |
| 1221 } else { | |
| 1222 stack.currentFrame->locals.subjectPtrAtStartOfInstructio
n = stack.currentFrame->args.subjectPtr; | |
| 1223 for (int i = min; i < stack.currentFrame->locals.max; i+
+) { | |
| 1224 if (stack.currentFrame->args.subjectPtr > md.endSubj
ect - 2) | |
| 1225 break; | |
| 1226 if (*stack.currentFrame->args.subjectPtr != stack.cu
rrentFrame->locals.fc) | |
| 1227 break; | |
| 1228 stack.currentFrame->args.subjectPtr += 2; | |
| 1229 } | |
| 1230 while (stack.currentFrame->args.subjectPtr >= stack.curr
entFrame->locals.subjectPtrAtStartOfInstruction) { | |
| 1231 RECURSIVE_MATCH(31, stack.currentFrame->args.instruc
tionPtr, stack.currentFrame->args.bracketChain); | |
| 1232 if (isMatch) | |
| 1233 RRETURN; | |
| 1234 stack.currentFrame->args.subjectPtr -= 2; | |
| 1235 } | |
| 1236 RRETURN_NO_MATCH; | |
| 1237 } | |
| 1238 /* Control never reaches here */ | |
| 1239 } | |
| 1240 /* Control never reaches here */ | |
| 1241 | |
| 1242 /* Match a negated single one-byte character. */ | |
| 1243 | |
| 1244 BEGIN_OPCODE(NOT): { | |
| 1245 if (stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 1246 RRETURN_NO_MATCH; | |
| 1247 stack.currentFrame->args.instructionPtr++; | |
| 1248 int c = *stack.currentFrame->args.subjectPtr++; | |
| 1249 if (md.ignoreCase) { | |
| 1250 if (c < 128) | |
| 1251 c = toLowerCase(c); | |
| 1252 if (toLowerCase(*stack.currentFrame->args.instructionPtr++)
== c) | |
| 1253 RRETURN_NO_MATCH; | |
| 1254 } else { | |
| 1255 if (*stack.currentFrame->args.instructionPtr++ == c) | |
| 1256 RRETURN_NO_MATCH; | |
| 1257 } | |
| 1258 NEXT_OPCODE; | |
| 1259 } | |
| 1260 | |
| 1261 /* Match a negated single one-byte character repeatedly. This is alm
ost a | |
| 1262 repeat of the code for a repeated single character, but I haven't f
ound a | |
| 1263 nice way of commoning these up that doesn't require a test of the | |
| 1264 positive/negative option for each character match. Maybe that would
n't add | |
| 1265 very much to the time taken, but character matching *is* what this
is all | |
| 1266 about... */ | |
| 1267 | |
| 1268 BEGIN_OPCODE(NOTEXACT): | |
| 1269 min = stack.currentFrame->locals.max = get2ByteValue(stack.curre
ntFrame->args.instructionPtr + 1); | |
| 1270 minimize = false; | |
| 1271 stack.currentFrame->args.instructionPtr += 3; | |
| 1272 goto REPEATNOTCHAR; | |
| 1273 | |
| 1274 BEGIN_OPCODE(NOTUPTO): | |
| 1275 BEGIN_OPCODE(NOTMINUPTO): | |
| 1276 min = 0; | |
| 1277 stack.currentFrame->locals.max = get2ByteValue(stack.currentFram
e->args.instructionPtr + 1); | |
| 1278 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMIN
UPTO; | |
| 1279 stack.currentFrame->args.instructionPtr += 3; | |
| 1280 goto REPEATNOTCHAR; | |
| 1281 | |
| 1282 BEGIN_OPCODE(NOTSTAR): | |
| 1283 BEGIN_OPCODE(NOTMINSTAR): | |
| 1284 BEGIN_OPCODE(NOTPLUS): | |
| 1285 BEGIN_OPCODE(NOTMINPLUS): | |
| 1286 BEGIN_OPCODE(NOTQUERY): | |
| 1287 BEGIN_OPCODE(NOTMINQUERY): | |
| 1288 repeatInformationFromInstructionOffset(*stack.currentFrame->args
.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max); | |
| 1289 | |
| 1290 /* Common code for all repeated single-byte matches. We can give up
quickly | |
| 1291 if there are fewer than the minimum number of bytes left in the | |
| 1292 subject. */ | |
| 1293 | |
| 1294 REPEATNOTCHAR: | |
| 1295 if (min > md.endSubject - stack.currentFrame->args.subjectPtr) | |
| 1296 RRETURN_NO_MATCH; | |
| 1297 stack.currentFrame->locals.fc = *stack.currentFrame->args.instru
ctionPtr++; | |
| 1298 | |
| 1299 /* The code is duplicated for the caseless and caseful cases, fo
r speed, | |
| 1300 since matching characters is likely to be quite common. First,
ensure the | |
| 1301 minimum number of matches are present. If min = max, continue a
t the same | |
| 1302 level without recursing. Otherwise, if minimizing, keep trying
the rest of | |
| 1303 the expression and advancing one matching character if failing,
up to the | |
| 1304 maximum. Alternatively, if maximizing, find the maximum number
of | |
| 1305 characters and work backwards. */ | |
| 1306 | |
| 1307 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->lo
cals.fc, min, stack.currentFrame->locals.max)); | |
| 1308 | |
| 1309 if (md.ignoreCase) { | |
| 1310 if (stack.currentFrame->locals.fc < 128) | |
| 1311 stack.currentFrame->locals.fc = toLowerCase(stack.curren
tFrame->locals.fc); | |
| 1312 | |
| 1313 for (int i = 1; i <= min; i++) { | |
| 1314 int d = *stack.currentFrame->args.subjectPtr++; | |
| 1315 if (d < 128) | |
| 1316 d = toLowerCase(d); | |
| 1317 if (stack.currentFrame->locals.fc == d) | |
| 1318 RRETURN_NO_MATCH; | |
| 1319 } | |
| 1320 | |
| 1321 if (min == stack.currentFrame->locals.max) | |
| 1322 NEXT_OPCODE; | |
| 1323 | |
| 1324 if (minimize) { | |
| 1325 for (stack.currentFrame->locals.fi = min;; stack.current
Frame->locals.fi++) { | |
| 1326 RECURSIVE_MATCH(38, stack.currentFrame->args.instruc
tionPtr, stack.currentFrame->args.bracketChain); | |
| 1327 if (isMatch) | |
| 1328 RRETURN; | |
| 1329 int d = *stack.currentFrame->args.subjectPtr++; | |
| 1330 if (d < 128) | |
| 1331 d = toLowerCase(d); | |
| 1332 if (stack.currentFrame->locals.fi >= stack.currentFr
ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack
.currentFrame->locals.fc == d) | |
| 1333 RRETURN; | |
| 1334 } | |
| 1335 /* Control never reaches here */ | |
| 1336 } | |
| 1337 | |
| 1338 /* Maximize case */ | |
| 1339 | |
| 1340 else { | |
| 1341 stack.currentFrame->locals.subjectPtrAtStartOfInstructio
n = stack.currentFrame->args.subjectPtr; | |
| 1342 | |
| 1343 for (int i = min; i < stack.currentFrame->locals.max; i+
+) { | |
| 1344 if (stack.currentFrame->args.subjectPtr >= md.endSub
ject) | |
| 1345 break; | |
| 1346 int d = *stack.currentFrame->args.subjectPtr; | |
| 1347 if (d < 128) | |
| 1348 d = toLowerCase(d); | |
| 1349 if (stack.currentFrame->locals.fc == d) | |
| 1350 break; | |
| 1351 ++stack.currentFrame->args.subjectPtr; | |
| 1352 } | |
| 1353 for (;;) { | |
| 1354 RECURSIVE_MATCH(40, stack.currentFrame->args.instruc
tionPtr, stack.currentFrame->args.bracketChain); | |
| 1355 if (isMatch) | |
| 1356 RRETURN; | |
| 1357 if (stack.currentFrame->args.subjectPtr-- == stack.c
urrentFrame->locals.subjectPtrAtStartOfInstruction) | |
| 1358 break; /* Stop if tried at original pos *
/ | |
| 1359 } | |
| 1360 | |
| 1361 RRETURN; | |
| 1362 } | |
| 1363 /* Control never reaches here */ | |
| 1364 } | |
| 1365 | |
| 1366 /* Caseful comparisons */ | |
| 1367 | |
| 1368 else { | |
| 1369 for (int i = 1; i <= min; i++) { | |
| 1370 int d = *stack.currentFrame->args.subjectPtr++; | |
| 1371 if (stack.currentFrame->locals.fc == d) | |
| 1372 RRETURN_NO_MATCH; | |
| 1373 } | |
| 1374 | |
| 1375 if (min == stack.currentFrame->locals.max) | |
| 1376 NEXT_OPCODE; | |
| 1377 | |
| 1378 if (minimize) { | |
| 1379 for (stack.currentFrame->locals.fi = min;; stack.current
Frame->locals.fi++) { | |
| 1380 RECURSIVE_MATCH(42, stack.currentFrame->args.instruc
tionPtr, stack.currentFrame->args.bracketChain); | |
| 1381 if (isMatch) | |
| 1382 RRETURN; | |
| 1383 int d = *stack.currentFrame->args.subjectPtr++; | |
| 1384 if (stack.currentFrame->locals.fi >= stack.currentFr
ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack
.currentFrame->locals.fc == d) | |
| 1385 RRETURN; | |
| 1386 } | |
| 1387 /* Control never reaches here */ | |
| 1388 } | |
| 1389 | |
| 1390 /* Maximize case */ | |
| 1391 | |
| 1392 else { | |
| 1393 stack.currentFrame->locals.subjectPtrAtStartOfInstructio
n = stack.currentFrame->args.subjectPtr; | |
| 1394 | |
| 1395 for (int i = min; i < stack.currentFrame->locals.max; i+
+) { | |
| 1396 if (stack.currentFrame->args.subjectPtr >= md.endSub
ject) | |
| 1397 break; | |
| 1398 int d = *stack.currentFrame->args.subjectPtr; | |
| 1399 if (stack.currentFrame->locals.fc == d) | |
| 1400 break; | |
| 1401 ++stack.currentFrame->args.subjectPtr; | |
| 1402 } | |
| 1403 for (;;) { | |
| 1404 RECURSIVE_MATCH(44, stack.currentFrame->args.instruc
tionPtr, stack.currentFrame->args.bracketChain); | |
| 1405 if (isMatch) | |
| 1406 RRETURN; | |
| 1407 if (stack.currentFrame->args.subjectPtr-- == stack.c
urrentFrame->locals.subjectPtrAtStartOfInstruction) | |
| 1408 break; /* Stop if tried at original pos *
/ | |
| 1409 } | |
| 1410 | |
| 1411 RRETURN; | |
| 1412 } | |
| 1413 } | |
| 1414 /* Control never reaches here */ | |
| 1415 | |
| 1416 /* Match a single character type repeatedly; several different opcod
es | |
| 1417 share code. This is very similar to the code for single characters,
but we | |
| 1418 repeat it in the interests of efficiency. */ | |
| 1419 | |
| 1420 BEGIN_OPCODE(TYPEEXACT): | |
| 1421 min = stack.currentFrame->locals.max = get2ByteValue(stack.curre
ntFrame->args.instructionPtr + 1); | |
| 1422 minimize = true; | |
| 1423 stack.currentFrame->args.instructionPtr += 3; | |
| 1424 goto REPEATTYPE; | |
| 1425 | |
| 1426 BEGIN_OPCODE(TYPEUPTO): | |
| 1427 BEGIN_OPCODE(TYPEMINUPTO): | |
| 1428 min = 0; | |
| 1429 stack.currentFrame->locals.max = get2ByteValue(stack.currentFram
e->args.instructionPtr + 1); | |
| 1430 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMI
NUPTO; | |
| 1431 stack.currentFrame->args.instructionPtr += 3; | |
| 1432 goto REPEATTYPE; | |
| 1433 | |
| 1434 BEGIN_OPCODE(TYPESTAR): | |
| 1435 BEGIN_OPCODE(TYPEMINSTAR): | |
| 1436 BEGIN_OPCODE(TYPEPLUS): | |
| 1437 BEGIN_OPCODE(TYPEMINPLUS): | |
| 1438 BEGIN_OPCODE(TYPEQUERY): | |
| 1439 BEGIN_OPCODE(TYPEMINQUERY): | |
| 1440 repeatInformationFromInstructionOffset(*stack.currentFrame->args
.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max); | |
| 1441 | |
| 1442 /* Common code for all repeated single character type matches. N
ote that | |
| 1443 in UTF-8 mode, '.' matches a character of any length, but for t
he other | |
| 1444 character types, the valid characters are all one-byte long. */ | |
| 1445 | |
| 1446 REPEATTYPE: | |
| 1447 stack.currentFrame->locals.ctype = *stack.currentFrame->args.ins
tructionPtr++; /* Code for the character type */ | |
| 1448 | |
| 1449 /* First, ensure the minimum number of matches are present. Use
inline | |
| 1450 code for maximizing the speed, and do the type test once at the
start | |
| 1451 (i.e. keep it out of the loop). Also we can test that there are
at least | |
| 1452 the minimum number of characters before we start. */ | |
| 1453 | |
| 1454 if (min > md.endSubject - stack.currentFrame->args.subjectPtr) | |
| 1455 RRETURN_NO_MATCH; | |
| 1456 if (min > 0) { | |
| 1457 switch (stack.currentFrame->locals.ctype) { | |
| 1458 case OP_NOT_NEWLINE: | |
| 1459 for (int i = 1; i <= min; i++) { | |
| 1460 if (isNewline(*stack.currentFrame->args.subjectP
tr)) | |
| 1461 RRETURN_NO_MATCH; | |
| 1462 ++stack.currentFrame->args.subjectPtr; | |
| 1463 } | |
| 1464 break; | |
| 1465 | |
| 1466 case OP_NOT_DIGIT: | |
| 1467 for (int i = 1; i <= min; i++) { | |
| 1468 if (isASCIIDigit(*stack.currentFrame->args.subje
ctPtr)) | |
| 1469 RRETURN_NO_MATCH; | |
| 1470 ++stack.currentFrame->args.subjectPtr; | |
| 1471 } | |
| 1472 break; | |
| 1473 | |
| 1474 case OP_DIGIT: | |
| 1475 for (int i = 1; i <= min; i++) { | |
| 1476 if (!isASCIIDigit(*stack.currentFrame->args.subj
ectPtr)) | |
| 1477 RRETURN_NO_MATCH; | |
| 1478 ++stack.currentFrame->args.subjectPtr; | |
| 1479 } | |
| 1480 break; | |
| 1481 | |
| 1482 case OP_NOT_WHITESPACE: | |
| 1483 for (int i = 1; i <= min; i++) { | |
| 1484 if (isSpaceChar(*stack.currentFrame->args.subjec
tPtr)) | |
| 1485 RRETURN_NO_MATCH; | |
| 1486 ++stack.currentFrame->args.subjectPtr; | |
| 1487 } | |
| 1488 break; | |
| 1489 | |
| 1490 case OP_WHITESPACE: | |
| 1491 for (int i = 1; i <= min; i++) { | |
| 1492 if (!isSpaceChar(*stack.currentFrame->args.subje
ctPtr)) | |
| 1493 RRETURN_NO_MATCH; | |
| 1494 ++stack.currentFrame->args.subjectPtr; | |
| 1495 } | |
| 1496 break; | |
| 1497 | |
| 1498 case OP_NOT_WORDCHAR: | |
| 1499 for (int i = 1; i <= min; i++) { | |
| 1500 if (isWordChar(*stack.currentFrame->args.subject
Ptr)) | |
| 1501 RRETURN_NO_MATCH; | |
| 1502 ++stack.currentFrame->args.subjectPtr; | |
| 1503 } | |
| 1504 break; | |
| 1505 | |
| 1506 case OP_WORDCHAR: | |
| 1507 for (int i = 1; i <= min; i++) { | |
| 1508 if (!isWordChar(*stack.currentFrame->args.subjec
tPtr)) | |
| 1509 RRETURN_NO_MATCH; | |
| 1510 ++stack.currentFrame->args.subjectPtr; | |
| 1511 } | |
| 1512 break; | |
| 1513 | |
| 1514 default: | |
| 1515 ASSERT_NOT_REACHED(); | |
| 1516 return matchError(JSRegExpErrorInternal, stack); | |
| 1517 } /* End switch(stack.currentFrame->locals.ctype) */ | |
| 1518 } | |
| 1519 | |
| 1520 /* If min = max, continue at the same level without recursing */ | |
| 1521 | |
| 1522 if (min == stack.currentFrame->locals.max) | |
| 1523 NEXT_OPCODE; | |
| 1524 | |
| 1525 /* If minimizing, we have to test the rest of the pattern before
each | |
| 1526 subsequent match. */ | |
| 1527 | |
| 1528 if (minimize) { | |
| 1529 for (stack.currentFrame->locals.fi = min;; stack.currentFram
e->locals.fi++) { | |
| 1530 RECURSIVE_MATCH(48, stack.currentFrame->args.instruction
Ptr, stack.currentFrame->args.bracketChain); | |
| 1531 if (isMatch) | |
| 1532 RRETURN; | |
| 1533 if (stack.currentFrame->locals.fi >= stack.currentFrame-
>locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject) | |
| 1534 RRETURN; | |
| 1535 | |
| 1536 int c = *stack.currentFrame->args.subjectPtr++; | |
| 1537 switch (stack.currentFrame->locals.ctype) { | |
| 1538 case OP_NOT_NEWLINE: | |
| 1539 if (isNewline(c)) | |
| 1540 RRETURN; | |
| 1541 break; | |
| 1542 | |
| 1543 case OP_NOT_DIGIT: | |
| 1544 if (isASCIIDigit(c)) | |
| 1545 RRETURN; | |
| 1546 break; | |
| 1547 | |
| 1548 case OP_DIGIT: | |
| 1549 if (!isASCIIDigit(c)) | |
| 1550 RRETURN; | |
| 1551 break; | |
| 1552 | |
| 1553 case OP_NOT_WHITESPACE: | |
| 1554 if (isSpaceChar(c)) | |
| 1555 RRETURN; | |
| 1556 break; | |
| 1557 | |
| 1558 case OP_WHITESPACE: | |
| 1559 if (!isSpaceChar(c)) | |
| 1560 RRETURN; | |
| 1561 break; | |
| 1562 | |
| 1563 case OP_NOT_WORDCHAR: | |
| 1564 if (isWordChar(c)) | |
| 1565 RRETURN; | |
| 1566 break; | |
| 1567 | |
| 1568 case OP_WORDCHAR: | |
| 1569 if (!isWordChar(c)) | |
| 1570 RRETURN; | |
| 1571 break; | |
| 1572 | |
| 1573 default: | |
| 1574 ASSERT_NOT_REACHED(); | |
| 1575 return matchError(JSRegExpErrorInternal, stack); | |
| 1576 } | |
| 1577 } | |
| 1578 /* Control never reaches here */ | |
| 1579 } | |
| 1580 | |
| 1581 /* If maximizing it is worth using inline code for speed, doing
the type | |
| 1582 test once at the start (i.e. keep it out of the loop). */ | |
| 1583 | |
| 1584 else { | |
| 1585 stack.currentFrame->locals.subjectPtrAtStartOfInstruction =
stack.currentFrame->args.subjectPtr; /* Remember where we started */ | |
| 1586 | |
| 1587 switch (stack.currentFrame->locals.ctype) { | |
| 1588 case OP_NOT_NEWLINE: | |
| 1589 for (int i = min; i < stack.currentFrame->locals.max
; i++) { | |
| 1590 if (stack.currentFrame->args.subjectPtr >= md.en
dSubject || isNewline(*stack.currentFrame->args.subjectPtr)) | |
| 1591 break; | |
| 1592 stack.currentFrame->args.subjectPtr++; | |
| 1593 } | |
| 1594 break; | |
| 1595 | |
| 1596 case OP_NOT_DIGIT: | |
| 1597 for (int i = min; i < stack.currentFrame->locals.max
; i++) { | |
| 1598 if (stack.currentFrame->args.subjectPtr >= md.en
dSubject) | |
| 1599 break; | |
| 1600 int c = *stack.currentFrame->args.subjectPtr; | |
| 1601 if (isASCIIDigit(c)) | |
| 1602 break; | |
| 1603 ++stack.currentFrame->args.subjectPtr; | |
| 1604 } | |
| 1605 break; | |
| 1606 | |
| 1607 case OP_DIGIT: | |
| 1608 for (int i = min; i < stack.currentFrame->locals.max
; i++) { | |
| 1609 if (stack.currentFrame->args.subjectPtr >= md.en
dSubject) | |
| 1610 break; | |
| 1611 int c = *stack.currentFrame->args.subjectPtr; | |
| 1612 if (!isASCIIDigit(c)) | |
| 1613 break; | |
| 1614 ++stack.currentFrame->args.subjectPtr; | |
| 1615 } | |
| 1616 break; | |
| 1617 | |
| 1618 case OP_NOT_WHITESPACE: | |
| 1619 for (int i = min; i < stack.currentFrame->locals.max
; i++) { | |
| 1620 if (stack.currentFrame->args.subjectPtr >= md.en
dSubject) | |
| 1621 break; | |
| 1622 int c = *stack.currentFrame->args.subjectPtr; | |
| 1623 if (isSpaceChar(c)) | |
| 1624 break; | |
| 1625 ++stack.currentFrame->args.subjectPtr; | |
| 1626 } | |
| 1627 break; | |
| 1628 | |
| 1629 case OP_WHITESPACE: | |
| 1630 for (int i = min; i < stack.currentFrame->locals.max
; i++) { | |
| 1631 if (stack.currentFrame->args.subjectPtr >= md.en
dSubject) | |
| 1632 break; | |
| 1633 int c = *stack.currentFrame->args.subjectPtr; | |
| 1634 if (!isSpaceChar(c)) | |
| 1635 break; | |
| 1636 ++stack.currentFrame->args.subjectPtr; | |
| 1637 } | |
| 1638 break; | |
| 1639 | |
| 1640 case OP_NOT_WORDCHAR: | |
| 1641 for (int i = min; i < stack.currentFrame->locals.max
; i++) { | |
| 1642 if (stack.currentFrame->args.subjectPtr >= md.en
dSubject) | |
| 1643 break; | |
| 1644 int c = *stack.currentFrame->args.subjectPtr; | |
| 1645 if (isWordChar(c)) | |
| 1646 break; | |
| 1647 ++stack.currentFrame->args.subjectPtr; | |
| 1648 } | |
| 1649 break; | |
| 1650 | |
| 1651 case OP_WORDCHAR: | |
| 1652 for (int i = min; i < stack.currentFrame->locals.max
; i++) { | |
| 1653 if (stack.currentFrame->args.subjectPtr >= md.en
dSubject) | |
| 1654 break; | |
| 1655 int c = *stack.currentFrame->args.subjectPtr; | |
| 1656 if (!isWordChar(c)) | |
| 1657 break; | |
| 1658 ++stack.currentFrame->args.subjectPtr; | |
| 1659 } | |
| 1660 break; | |
| 1661 | |
| 1662 default: | |
| 1663 ASSERT_NOT_REACHED(); | |
| 1664 return matchError(JSRegExpErrorInternal, stack); | |
| 1665 } | |
| 1666 | |
| 1667 /* stack.currentFrame->args.subjectPtr is now past the end o
f the maximum run */ | |
| 1668 | |
| 1669 for (;;) { | |
| 1670 RECURSIVE_MATCH(52, stack.currentFrame->args.instruction
Ptr, stack.currentFrame->args.bracketChain); | |
| 1671 if (isMatch) | |
| 1672 RRETURN; | |
| 1673 if (stack.currentFrame->args.subjectPtr-- == stack.curre
ntFrame->locals.subjectPtrAtStartOfInstruction) | |
| 1674 break; /* Stop if tried at original pos */ | |
| 1675 } | |
| 1676 | |
| 1677 /* Get here if we can't make it match with any permitted rep
etitions */ | |
| 1678 | |
| 1679 RRETURN; | |
| 1680 } | |
| 1681 /* Control never reaches here */ | |
| 1682 | |
| 1683 BEGIN_OPCODE(CRMINPLUS): | |
| 1684 BEGIN_OPCODE(CRMINQUERY): | |
| 1685 BEGIN_OPCODE(CRMINRANGE): | |
| 1686 BEGIN_OPCODE(CRMINSTAR): | |
| 1687 BEGIN_OPCODE(CRPLUS): | |
| 1688 BEGIN_OPCODE(CRQUERY): | |
| 1689 BEGIN_OPCODE(CRRANGE): | |
| 1690 BEGIN_OPCODE(CRSTAR): | |
| 1691 ASSERT_NOT_REACHED(); | |
| 1692 return matchError(JSRegExpErrorInternal, stack); | |
| 1693 | |
| 1694 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP | |
| 1695 CAPTURING_BRACKET: | |
| 1696 #else | |
| 1697 default: | |
| 1698 #endif | |
| 1699 /* Opening capturing bracket. If there is space in the offset ve
ctor, save | |
| 1700 the current subject position in the working slot at the top of
the vector. We | |
| 1701 mustn't change the current values of the data slot, because the
y may be set | |
| 1702 from a previous iteration of this group, and be referred to by
a reference | |
| 1703 inside the group. | |
| 1704 | |
| 1705 If the bracket fails to match, we need to restore this value an
d also the | |
| 1706 values of the final offsets, in case they were set by a previou
s iteration of | |
| 1707 the same bracket. | |
| 1708 | |
| 1709 If there isn't enough space in the offset vector, treat this as
if it were a | |
| 1710 non-capturing bracket. Don't worry about setting the flag for t
he error case | |
| 1711 here; that is handled in the code for KET. */ | |
| 1712 | |
| 1713 ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA); | |
| 1714 | |
| 1715 stack.currentFrame->locals.number = *stack.currentFrame->args.in
structionPtr - OP_BRA; | |
| 1716 | |
| 1717 /* For extended extraction brackets (large number), we have to f
ish out the | |
| 1718 number from a dummy opcode at the start. */ | |
| 1719 | |
| 1720 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX) | |
| 1721 stack.currentFrame->locals.number = get2ByteValue(stack.curr
entFrame->args.instructionPtr + 2 + LINK_SIZE); | |
| 1722 stack.currentFrame->locals.offset = stack.currentFrame->locals.n
umber << 1; | |
| 1723 | |
| 1724 #ifdef DEBUG | |
| 1725 printf("start bracket %d subject=", stack.currentFrame->locals.n
umber); | |
| 1726 pchars(stack.currentFrame->args.subjectPtr, 16, true, md); | |
| 1727 printf("\n"); | |
| 1728 #endif | |
| 1729 | |
| 1730 if (stack.currentFrame->locals.offset < md.offsetMax) { | |
| 1731 stack.currentFrame->locals.saveOffset1 = md.offsetVector[sta
ck.currentFrame->locals.offset]; | |
| 1732 stack.currentFrame->locals.saveOffset2 = md.offsetVector[sta
ck.currentFrame->locals.offset + 1]; | |
| 1733 stack.currentFrame->locals.saveOffset3 = md.offsetVector[md.
offsetEnd - stack.currentFrame->locals.number]; | |
| 1734 | |
| 1735 DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.sav
eOffset1, stack.currentFrame->locals.saveOffset2, stack.currentFrame->locals.sav
eOffset3)); | |
| 1736 md.offsetVector[md.offsetEnd - stack.currentFrame->locals.nu
mber] = stack.currentFrame->args.subjectPtr - md.startSubject; | |
| 1737 | |
| 1738 do { | |
| 1739 RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame-
>args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain); | |
| 1740 if (isMatch) | |
| 1741 RRETURN; | |
| 1742 stack.currentFrame->args.instructionPtr += getLinkValue(
stack.currentFrame->args.instructionPtr + 1); | |
| 1743 } while (*stack.currentFrame->args.instructionPtr == OP_ALT)
; | |
| 1744 | |
| 1745 DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.n
umber)); | |
| 1746 | |
| 1747 md.offsetVector[stack.currentFrame->locals.offset] = stack.c
urrentFrame->locals.saveOffset1; | |
| 1748 md.offsetVector[stack.currentFrame->locals.offset + 1] = sta
ck.currentFrame->locals.saveOffset2; | |
| 1749 md.offsetVector[md.offsetEnd - stack.currentFrame->locals.nu
mber] = stack.currentFrame->locals.saveOffset3; | |
| 1750 | |
| 1751 RRETURN; | |
| 1752 } | |
| 1753 | |
| 1754 /* Insufficient room for saving captured contents */ | |
| 1755 | |
| 1756 goto NON_CAPTURING_BRACKET; | |
| 1757 } | |
| 1758 | |
| 1759 /* Do not stick any code in here without much thought; it is assumed | |
| 1760 that "continue" in the code above comes out to here to repeat the main | |
| 1761 loop. */ | |
| 1762 | |
| 1763 } /* End of main loop */ | |
| 1764 | |
| 1765 ASSERT_NOT_REACHED(); | |
| 1766 | |
| 1767 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION | |
| 1768 | |
| 1769 RRETURN_SWITCH: | |
| 1770 switch (stack.currentFrame->returnLocation) { | |
| 1771 case 0: goto RETURN; | |
| 1772 case 1: goto RRETURN_1; | |
| 1773 case 2: goto RRETURN_2; | |
| 1774 case 6: goto RRETURN_6; | |
| 1775 case 7: goto RRETURN_7; | |
| 1776 case 14: goto RRETURN_14; | |
| 1777 case 15: goto RRETURN_15; | |
| 1778 case 16: goto RRETURN_16; | |
| 1779 case 17: goto RRETURN_17; | |
| 1780 case 18: goto RRETURN_18; | |
| 1781 case 19: goto RRETURN_19; | |
| 1782 case 20: goto RRETURN_20; | |
| 1783 case 21: goto RRETURN_21; | |
| 1784 case 22: goto RRETURN_22; | |
| 1785 case 24: goto RRETURN_24; | |
| 1786 case 26: goto RRETURN_26; | |
| 1787 case 27: goto RRETURN_27; | |
| 1788 case 28: goto RRETURN_28; | |
| 1789 case 29: goto RRETURN_29; | |
| 1790 case 30: goto RRETURN_30; | |
| 1791 case 31: goto RRETURN_31; | |
| 1792 case 38: goto RRETURN_38; | |
| 1793 case 40: goto RRETURN_40; | |
| 1794 case 42: goto RRETURN_42; | |
| 1795 case 44: goto RRETURN_44; | |
| 1796 case 48: goto RRETURN_48; | |
| 1797 case 52: goto RRETURN_52; | |
| 1798 } | |
| 1799 | |
| 1800 ASSERT_NOT_REACHED(); | |
| 1801 return matchError(JSRegExpErrorInternal, stack); | |
| 1802 | |
| 1803 #endif | |
| 1804 | |
| 1805 RETURN: | |
| 1806 return isMatch; | |
| 1807 } | |
| 1808 | |
| 1809 | |
| 1810 /************************************************* | |
| 1811 * Execute a Regular Expression * | |
| 1812 *************************************************/ | |
| 1813 | |
| 1814 /* This function applies a compiled re to a subject string and picks out | |
| 1815 portions of the string if it matches. Two elements in the vector are set for | |
| 1816 each substring: the offsets to the start and end of the substring. | |
| 1817 | |
| 1818 Arguments: | |
| 1819 re points to the compiled expression | |
| 1820 extra_data points to extra data or is NULL | |
| 1821 subject points to the subject string | |
| 1822 length length of subject string (may contain binary zeros) | |
| 1823 start_offset where to start in the subject string | |
| 1824 options option bits | |
| 1825 offsets points to a vector of ints to be filled in with offsets | |
| 1826 offsetcount the number of elements in the vector | |
| 1827 | |
| 1828 Returns: > 0 => success; value is the number of elements filled in | |
| 1829 = 0 => success, but offsets is not big enough | |
| 1830 -1 => failed to match | |
| 1831 < -1 => some kind of unexpected problem | |
| 1832 */ | |
| 1833 | |
| 1834 static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endS
ubject, int first_byte, bool first_byte_caseless, bool useMultiLineFirstCharOpti
mization, const UChar* originalSubjectStart) | |
| 1835 { | |
| 1836 // If first_byte is set, try scanning to the first instance of that byte | |
| 1837 // no need to try and match against any earlier part of the subject string. | |
| 1838 if (first_byte >= 0) { | |
| 1839 UChar first_char = first_byte; | |
| 1840 if (first_byte_caseless) | |
| 1841 while (subjectPtr < endSubject) { | |
| 1842 int c = *subjectPtr; | |
| 1843 if (c > 127) | |
| 1844 break; | |
| 1845 if (toLowerCase(c) == first_char) | |
| 1846 break; | |
| 1847 subjectPtr++; | |
| 1848 } | |
| 1849 else { | |
| 1850 while (subjectPtr < endSubject && *subjectPtr != first_char) | |
| 1851 subjectPtr++; | |
| 1852 } | |
| 1853 } else if (useMultiLineFirstCharOptimization) { | |
| 1854 /* Or to just after \n for a multiline match if possible */ | |
| 1855 // I'm not sure why this != originalSubjectStart check is necessary -- e
cs 11/18/07 | |
| 1856 if (subjectPtr > originalSubjectStart) { | |
| 1857 while (subjectPtr < endSubject && !isNewline(subjectPtr[-1])) | |
| 1858 subjectPtr++; | |
| 1859 } | |
| 1860 } | |
| 1861 } | |
| 1862 | |
| 1863 static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* e
ndSubject, int req_byte, int req_byte2, bool req_byte_caseless, bool hasFirstByt
e, const UChar*& reqBytePtr) | |
| 1864 { | |
| 1865 /* If req_byte is set, we know that that character must appear in the subjec
t | |
| 1866 for the match to succeed. If the first character is set, req_byte must be | |
| 1867 later in the subject; otherwise the test starts at the match point. This | |
| 1868 optimization can save a huge amount of backtracking in patterns with nested | |
| 1869 unlimited repeats that aren't going to match. Writing separate code for | |
| 1870 cased/caseless versions makes it go faster, as does using an autoincrement | |
| 1871 and backing off on a match. | |
| 1872 | |
| 1873 HOWEVER: when the subject string is very, very long, searching to its end c
an | |
| 1874 take a long time, and give bad performance on quite ordinary patterns. This | |
| 1875 showed up when somebody was matching /^C/ on a 32-megabyte string... so we | |
| 1876 don't do this when the string is sufficiently long. | |
| 1877 */ | |
| 1878 | |
| 1879 if (req_byte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) { | |
| 1880 const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0); | |
| 1881 | |
| 1882 /* We don't need to repeat the search if we haven't yet reached the | |
| 1883 place we found it at last time. */ | |
| 1884 | |
| 1885 if (p > reqBytePtr) { | |
| 1886 if (req_byte_caseless) { | |
| 1887 while (p < endSubject) { | |
| 1888 int pp = *p++; | |
| 1889 if (pp == req_byte || pp == req_byte2) { | |
| 1890 p--; | |
| 1891 break; | |
| 1892 } | |
| 1893 } | |
| 1894 } else { | |
| 1895 while (p < endSubject) { | |
| 1896 if (*p++ == req_byte) { | |
| 1897 p--; | |
| 1898 break; | |
| 1899 } | |
| 1900 } | |
| 1901 } | |
| 1902 | |
| 1903 /* If we can't find the required character, break the matching loop
*/ | |
| 1904 | |
| 1905 if (p >= endSubject) | |
| 1906 return true; | |
| 1907 | |
| 1908 /* If we have found the required character, save the point where we | |
| 1909 found it, so that we don't search again next time round the loop if | |
| 1910 the start hasn't passed this character yet. */ | |
| 1911 | |
| 1912 reqBytePtr = p; | |
| 1913 } | |
| 1914 } | |
| 1915 return false; | |
| 1916 } | |
| 1917 | |
| 1918 int jsRegExpExecute(const JSRegExp* re, | |
| 1919 const UChar* subject, int length, int start_offset, int* off
sets, | |
| 1920 int offsetcount) | |
| 1921 { | |
| 1922 ASSERT(re); | |
| 1923 ASSERT(subject); | |
| 1924 ASSERT(offsetcount >= 0); | |
| 1925 ASSERT(offsets || offsetcount == 0); | |
| 1926 | |
| 1927 MatchData matchBlock; | |
| 1928 matchBlock.startSubject = subject; | |
| 1929 matchBlock.endSubject = matchBlock.startSubject + length; | |
| 1930 const UChar* endSubject = matchBlock.endSubject; | |
| 1931 | |
| 1932 matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption); | |
| 1933 matchBlock.ignoreCase = (re->options & IgnoreCaseOption); | |
| 1934 | |
| 1935 /* If the expression has got more back references than the offsets supplied
can | |
| 1936 hold, we get a temporary chunk of working store to use during the matching. | |
| 1937 Otherwise, we can use the vector supplied, rounding down its size to a mult
iple | |
| 1938 of 3. */ | |
| 1939 | |
| 1940 int ocount = offsetcount - (offsetcount % 3); | |
| 1941 | |
| 1942 // FIXME: This is lame that we have to second-guess our caller here. | |
| 1943 // The API should change to either fail-hard when we don't have enough offse
t space | |
| 1944 // or that we shouldn't ask our callers to pre-allocate in the first place. | |
| 1945 bool using_temporary_offsets = false; | |
| 1946 if (re->top_backref > 0 && re->top_backref >= ocount/3) { | |
| 1947 ocount = re->top_backref * 3 + 3; | |
| 1948 matchBlock.offsetVector = new int[ocount]; | |
| 1949 if (!matchBlock.offsetVector) | |
| 1950 return JSRegExpErrorNoMemory; | |
| 1951 using_temporary_offsets = true; | |
| 1952 } else | |
| 1953 matchBlock.offsetVector = offsets; | |
| 1954 | |
| 1955 matchBlock.offsetEnd = ocount; | |
| 1956 matchBlock.offsetMax = (2*ocount)/3; | |
| 1957 matchBlock.offsetOverflow = false; | |
| 1958 | |
| 1959 /* Compute the minimum number of offsets that we need to reset each time. Do
ing | |
| 1960 this makes a huge difference to execution time when there aren't many brack
ets | |
| 1961 in the pattern. */ | |
| 1962 | |
| 1963 int resetcount = 2 + re->top_bracket * 2; | |
| 1964 if (resetcount > offsetcount) | |
| 1965 resetcount = ocount; | |
| 1966 | |
| 1967 /* Reset the working variable associated with each extraction. These should | |
| 1968 never be used unless previously set, but they get saved and restored, and s
o we | |
| 1969 initialize them to avoid reading uninitialized locations. */ | |
| 1970 | |
| 1971 if (matchBlock.offsetVector) { | |
| 1972 int* iptr = matchBlock.offsetVector + ocount; | |
| 1973 int* iend = iptr - resetcount/2 + 1; | |
| 1974 while (--iptr >= iend) | |
| 1975 *iptr = -1; | |
| 1976 } | |
| 1977 | |
| 1978 /* Set up the first character to match, if available. The first_byte value i
s | |
| 1979 never set for an anchored regular expression, but the anchoring may be forc
ed | |
| 1980 at run time, so we have to test for anchoring. The first char may be unset
for | |
| 1981 an unanchored pattern, of course. If there's no first char and the pattern
was | |
| 1982 studied, there may be a bitmap of possible first characters. */ | |
| 1983 | |
| 1984 bool first_byte_caseless = false; | |
| 1985 int first_byte = -1; | |
| 1986 if (re->options & UseFirstByteOptimizationOption) { | |
| 1987 first_byte = re->first_byte & 255; | |
| 1988 if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE))) | |
| 1989 first_byte = toLowerCase(first_byte); | |
| 1990 } | |
| 1991 | |
| 1992 /* For anchored or unanchored matches, there may be a "last known required | |
| 1993 character" set. */ | |
| 1994 | |
| 1995 bool req_byte_caseless = false; | |
| 1996 int req_byte = -1; | |
| 1997 int req_byte2 = -1; | |
| 1998 if (re->options & UseRequiredByteOptimizationOption) { | |
| 1999 req_byte = re->req_byte & 255; // FIXME: This optimization could be made
to work for UTF16 chars as well... | |
| 2000 req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE); | |
| 2001 req_byte2 = flipCase(req_byte); | |
| 2002 } | |
| 2003 | |
| 2004 /* Loop for handling unanchored repeated matching attempts; for anchored reg
exs | |
| 2005 the loop runs just once. */ | |
| 2006 | |
| 2007 const UChar* startMatch = subject + start_offset; | |
| 2008 const UChar* reqBytePtr = startMatch - 1; | |
| 2009 bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByte
OptimizationOption; | |
| 2010 | |
| 2011 do { | |
| 2012 /* Reset the maximum number of extractions we might see. */ | |
| 2013 if (matchBlock.offsetVector) { | |
| 2014 int* iptr = matchBlock.offsetVector; | |
| 2015 int* iend = iptr + resetcount; | |
| 2016 while (iptr < iend) | |
| 2017 *iptr++ = -1; | |
| 2018 } | |
| 2019 | |
| 2020 tryFirstByteOptimization(startMatch, endSubject, first_byte, first_byte_
caseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_off
set); | |
| 2021 if (tryRequiredByteOptimization(startMatch, endSubject, req_byte, req_by
te2, req_byte_caseless, first_byte >= 0, reqBytePtr)) | |
| 2022 break; | |
| 2023 | |
| 2024 /* When a match occurs, substrings will be set for all internal extracti
ons; | |
| 2025 we just need to set up the whole thing as substring 0 before returning.
If | |
| 2026 there were too many extractions, set the return code to zero. In the ca
se | |
| 2027 where we had to get some local store to hold offsets for backreferences
, copy | |
| 2028 those back references that we can. In this case there need not be overf
low | |
| 2029 if certain parts of the pattern were not used. */ | |
| 2030 | |
| 2031 /* The code starts after the JSRegExp block and the capture name table.
*/ | |
| 2032 const unsigned char* start_code = reinterpret_cast<const unsigned char*>
(re + 1); | |
| 2033 | |
| 2034 int returnCode = match(startMatch, start_code, 2, matchBlock); | |
| 2035 | |
| 2036 /* When the result is no match, advance the pointer to the next characte
r | |
| 2037 and continue. */ | |
| 2038 if (returnCode == 0) { | |
| 2039 startMatch++; | |
| 2040 continue; | |
| 2041 } | |
| 2042 | |
| 2043 if (returnCode != 1) { | |
| 2044 ASSERT(returnCode == JSRegExpErrorHitLimit || returnCode == JSRegExp
ErrorNoMemory); | |
| 2045 DPRINTF((">>>> error: returning %d\n", returnCode)); | |
| 2046 return returnCode; | |
| 2047 } | |
| 2048 | |
| 2049 /* We have a match! Copy the offset information from temporary store if | |
| 2050 necessary */ | |
| 2051 | |
| 2052 if (using_temporary_offsets) { | |
| 2053 if (offsetcount >= 4) { | |
| 2054 memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetcount -
2) * sizeof(int)); | |
| 2055 DPRINTF(("Copied offsets from temporary memory\n")); | |
| 2056 } | |
| 2057 if (matchBlock.endOffsetTop > offsetcount) | |
| 2058 matchBlock.offsetOverflow = true; | |
| 2059 | |
| 2060 DPRINTF(("Freeing temporary memory\n")); | |
| 2061 delete [] matchBlock.offsetVector; | |
| 2062 } | |
| 2063 | |
| 2064 returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2
; | |
| 2065 | |
| 2066 if (offsetcount < 2) | |
| 2067 returnCode = 0; | |
| 2068 else { | |
| 2069 offsets[0] = startMatch - matchBlock.startSubject; | |
| 2070 offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject; | |
| 2071 } | |
| 2072 | |
| 2073 DPRINTF((">>>> returning %d\n", returnCode)); | |
| 2074 return returnCode; | |
| 2075 } while (!(re->options & IsAnchoredOption) && startMatch <= endSubject); | |
| 2076 | |
| 2077 if (using_temporary_offsets) { | |
| 2078 DPRINTF(("Freeing temporary memory\n")); | |
| 2079 delete [] matchBlock.offsetVector; | |
| 2080 } | |
| 2081 | |
| 2082 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n")); | |
| 2083 return JSRegExpErrorNoMatch; | |
| 2084 } | |
| 2085 | |
| 2086 } } // namespace dart::jscre | |
| OLD | NEW |