Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(5)

Side by Side Diff: src/third_party/jscre/pcre_exec.cpp

Issue 21504: Remove JSCRE (Closed) Base URL: http://v8.googlecode.com/svn/branches/bleeding_edge/
Patch Set: Created 11 years, 10 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
6
7 Originally written by Philip Hazel
8 Copyright (c) 1997-2006 University of Cambridge
9 Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
10 Copyright (C) 2007 Eric Seidel <eric@webkit.org>
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains jsRegExpExecute(), the externally visible function
42 that does pattern matching using an NFA algorithm, following the rules from
43 the JavaScript specification. There are also some supporting functions. */
44
45 #include "config.h"
46
47 #include "pcre_internal.h"
48
49 #include "ASCIICType.h"
50
51 #include <ctype.h>
52 #include <limits.h>
53 #include <string.h> /* for memcpy */
54
55 #ifdef __GNUC__
56 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
57 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
58 #endif
59
60 /* Avoid warnings on Windows. */
61 #undef min
62 #undef max
63
64 namespace v8 { namespace jscre {
65
66 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
67 typedef int ReturnLocation;
68 #else
69 typedef void* ReturnLocation;
70 #endif
71
72 /* Structure for building a chain of data for holding the values of
73 the subject pointer at the start of each bracket, used to detect when
74 an empty string has been matched by a bracket to break infinite loops. */
75 struct BracketChainNode {
76 BracketChainNode* previousBracket;
77 const UChar* bracketStart;
78 };
79
80 struct MatchFrame {
81 ReturnLocation returnLocation;
82 struct MatchFrame* previousFrame;
83
84 /* Function arguments that may change */
85 struct {
86 const UChar* subjectPtr;
87 const unsigned char* instructionPtr;
88 int offsetTop;
89 BracketChainNode* bracketChain;
90 } args;
91
92
93 /* PCRE uses "fake" recursion built off of gotos, thus
94 stack-based local variables are not safe to use. Instead we have to
95 store local variables on the current MatchFrame. */
96 struct {
97 const unsigned char* data;
98 const unsigned char* startOfRepeatingBracket;
99 const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stas h away a subjectPtr here for later compare
100 const unsigned char* instructionPtrAtStartOfOnce;
101
102 int repeatOthercase;
103
104 int ctype;
105 int fc;
106 int fi;
107 int length;
108 int max;
109 int number;
110 int offset;
111 int saveOffset1;
112 int saveOffset2;
113 int saveOffset3;
114
115 BracketChainNode bracketChainNode;
116 } locals;
117 };
118
119 /* Structure for passing "static" information around between the functions
120 doing traditional NFA matching, so that they are thread-safe. */
121
122 struct MatchData {
123 int* offsetVector; /* Offset vector */
124 int offsetEnd; /* One past the end */
125 int offsetMax; /* The maximum usable for return data */
126 bool offsetOverflow; /* Set if too many extractions */
127 const UChar* startSubject; /* Start of the subject string */
128 const UChar* endSubject; /* End of the subject string */
129 const UChar* endMatchPtr; /* Subject position at end match */
130 int endOffsetTop; /* Highwater mark at end of match */
131 bool multiline;
132 bool ignoreCase;
133 };
134
135 /* The maximum remaining length of subject we are prepared to search for a
136 req_byte match. */
137
138 #define REQ_BYTE_MAX 1000
139
140 /* The below limit restricts the number of "recursive" match calls in order to
141 avoid spending exponential time on complex regular expressions. */
142
143 static const unsigned matchLimit = 100000;
144
145 #ifdef DEBUG
146 /*************************************************
147 * Debugging function to print chars *
148 *************************************************/
149
150 /* Print a sequence of chars in printable format, stopping at the end of the
151 subject if the requested.
152
153 Arguments:
154 p points to characters
155 length number to print
156 isSubject true if printing from within md.startSubject
157 md pointer to matching data block, if isSubject is true
158 */
159
160 static void pchars(const UChar* p, int length, bool isSubject, const MatchData& md)
161 {
162 if (isSubject && length > md.endSubject - p)
163 length = md.endSubject - p;
164 while (length-- > 0) {
165 int c;
166 if (isprint(c = *(p++)))
167 printf("%c", c);
168 else if (c < 256)
169 printf("\\x%02x", c);
170 else
171 printf("\\x{%x}", c);
172 }
173 }
174 #endif
175
176 /*************************************************
177 * Match a back-reference *
178 *************************************************/
179
180 /* If a back reference hasn't been set, the length that is passed is greater
181 than the number of characters left in the string, so the match fails.
182
183 Arguments:
184 offset index into the offset vector
185 subjectPtr points into the subject
186 length length to be matched
187 md points to match data block
188
189 Returns: true if matched
190 */
191
192 static bool matchRef(int offset, const UChar* subjectPtr, int length, const Matc hData& md)
193 {
194 const UChar* p = md.startSubject + md.offsetVector[offset];
195
196 #ifdef DEBUG
197 if (subjectPtr >= md.endSubject)
198 printf("matching subject <null>");
199 else {
200 printf("matching subject ");
201 pchars(subjectPtr, length, true, md);
202 }
203 printf(" against backref ");
204 pchars(p, length, false, md);
205 printf("\n");
206 #endif
207
208 /* Always fail if not enough characters left */
209
210 if (length > md.endSubject - subjectPtr)
211 return false;
212
213 /* Separate the caselesss case for speed */
214
215 if (md.ignoreCase) {
216 while (length-- > 0) {
217 UChar c = *p++;
218 int othercase = kjs_pcre_ucp_othercase(c);
219 UChar d = *subjectPtr++;
220 if (c != d && othercase != d)
221 return false;
222 }
223 }
224 else {
225 while (length-- > 0)
226 if (*p++ != *subjectPtr++)
227 return false;
228 }
229
230 return true;
231 }
232
233 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
234
235 /* Use numbered labels and switch statement at the bottom of the match function. */
236
237 #define RMATCH_WHERE(num) num
238 #define RRETURN_LABEL RRETURN_SWITCH
239
240 #else
241
242 /* Use GCC's computed goto extension. */
243
244 /* For one test case this is more than 40% faster than the switch statement.
245 We could avoid the use of the num argument entirely by using local labels,
246 but using it for the GCC case as well as the non-GCC case allows us to share
247 a bit more code and notice if we use conflicting numbers.*/
248
249 #define RMATCH_WHERE(num) &&RRETURN_##num
250 #define RRETURN_LABEL *stack.currentFrame->returnLocation
251
252 #endif
253
254 #define RECURSIVE_MATCH_COMMON(num) \
255 goto RECURSE;\
256 RRETURN_##num: \
257 stack.popCurrentFrame();
258
259 #define RECURSIVE_MATCH(num, ra, rb) \
260 do { \
261 stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
262 RECURSIVE_MATCH_COMMON(num) \
263 } while (0)
264
265 #define RECURSIVE_MATCH_STARTNG_NEW_GROUP(num, ra, rb) \
266 do { \
267 stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
268 startNewGroup(stack.currentFrame); \
269 RECURSIVE_MATCH_COMMON(num) \
270 } while (0)
271
272 #define RRETURN goto RRETURN_LABEL
273
274 #define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)
275
276 /*************************************************
277 * Match from current position *
278 *************************************************/
279
280 /* On entry instructionPtr points to the first opcode, and subjectPtr to the fir st character
281 in the subject string, while substringStart holds the value of subjectPtr at the start of the
282 last bracketed group - used for breaking infinite loops matching zero-length
283 strings. This function is called recursively in many circumstances. Whenever it
284 returns a negative (error) response, the outer match() call must also return the
285 same response.
286
287 Arguments:
288 subjectPtr pointer in subject
289 instructionPtr position in code
290 offsetTop current top pointer
291 md pointer to "static" info for the match
292
293 Returns: 1 if matched ) these values are >= 0
294 0 if failed to match )
295 a negative error value if aborted by an error condition
296 (e.g. stopped by repeated call or recursion limit)
297 */
298
299 static const unsigned FRAMES_ON_STACK = 16;
300
301 struct MatchStack {
302 MatchStack()
303 : framesEnd(frames + FRAMES_ON_STACK)
304 , currentFrame(frames)
305 , size(1) // match() creates accesses the first frame w/o calling pushNe wFrame
306 {
307 ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK);
308 }
309
310 MatchFrame frames[FRAMES_ON_STACK];
311 MatchFrame* framesEnd;
312 MatchFrame* currentFrame;
313 unsigned size;
314
315 inline bool canUseStackBufferForNextFrame()
316 {
317 return size < FRAMES_ON_STACK;
318 }
319
320 inline MatchFrame* allocateNextFrame()
321 {
322 if (canUseStackBufferForNextFrame())
323 return currentFrame + 1;
324 return new MatchFrame;
325 }
326
327 inline void pushNewFrame(const unsigned char* instructionPtr, BracketChainNo de* bracketChain, ReturnLocation returnLocation)
328 {
329 MatchFrame* newframe = allocateNextFrame();
330 newframe->previousFrame = currentFrame;
331
332 newframe->args.subjectPtr = currentFrame->args.subjectPtr;
333 newframe->args.offsetTop = currentFrame->args.offsetTop;
334 newframe->args.instructionPtr = instructionPtr;
335 newframe->args.bracketChain = bracketChain;
336 newframe->returnLocation = returnLocation;
337 size++;
338
339 currentFrame = newframe;
340 }
341
342 inline void popCurrentFrame()
343 {
344 MatchFrame* oldFrame = currentFrame;
345 currentFrame = currentFrame->previousFrame;
346 if (size > FRAMES_ON_STACK)
347 delete oldFrame;
348 size--;
349 }
350
351 void popAllFrames()
352 {
353 while (size)
354 popCurrentFrame();
355 }
356 };
357
358 static int matchError(int errorCode, MatchStack& stack)
359 {
360 stack.popAllFrames();
361 return errorCode;
362 }
363
364 /* Get the next UTF-8 character, not advancing the pointer, incrementing length
365 if there are extra bytes. This is called when we know we are in UTF-8 mode. */
366
367 static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* su bjectPtr, int& len)
368 {
369 c = *subjectPtr;
370 if ((c & 0xc0) == 0xc0) {
371 int gcaa = kjs_pcre_utf8_table4[c & 0x3f]; /* Number of additional byte s */
372 int gcss = 6 * gcaa;
373 c = (c & kjs_pcre_utf8_table3[gcaa]) << gcss;
374 for (int gcii = 1; gcii <= gcaa; gcii++) {
375 gcss -= 6;
376 c |= (subjectPtr[gcii] & 0x3f) << gcss;
377 }
378 len += gcaa;
379 }
380 }
381
382 static inline void startNewGroup(MatchFrame* currentFrame)
383 {
384 /* At the start of a bracketed group, add the current subject pointer to the
385 stack of such pointers, to be re-instated at the end of the group when we h it
386 the closing ket. When match() is called in other circumstances, we don't ad d to
387 this stack. */
388
389 currentFrame->locals.bracketChainNode.previousBracket = currentFrame->args.b racketChain;
390 currentFrame->locals.bracketChainNode.bracketStart = currentFrame->args.subj ectPtr;
391 currentFrame->args.bracketChain = &currentFrame->locals.bracketChainNode;
392 }
393
394 // FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusing
395 static inline void repeatInformationFromInstructionOffset(short instructionOffse t, bool& minimize, int& minimumRepeats, int& maximumRepeats)
396 {
397 // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_ NOTSTAR
398 static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
399 static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
400
401 ASSERT(instructionOffset >= 0);
402 ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
403
404 minimize = (instructionOffset & 1); // this assumes ordering: Instruction, M inimizeInstruction, Instruction2, MinimizeInstruction2
405 minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
406 maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
407 }
408
409 static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, i nt offsetTop, MatchData& md)
410 {
411 bool isMatch = false;
412 int min;
413 bool minimize = false; /* Initialization not really needed, but some compile rs think so. */
414 unsigned matchCount = 0;
415
416 MatchStack stack;
417
418 /* The opcode jump table. */
419 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
420 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
421 static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
422 #undef EMIT_JUMP_TABLE_ENTRY
423 #endif
424
425 /* One-time setup of the opcode jump table. */
426 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
427 for (int i = 255; !opcodeJumpTable[i]; i--)
428 opcodeJumpTable[i] = &&CAPTURING_BRACKET;
429 #endif
430
431 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
432 // Shark shows this as a hot line
433 // Using a static const here makes this line disappear, but makes later acce ss hotter (not sure why)
434 stack.currentFrame->returnLocation = &&RETURN;
435 #else
436 stack.currentFrame->returnLocation = 0;
437 #endif
438 stack.currentFrame->args.subjectPtr = subjectPtr;
439 stack.currentFrame->args.instructionPtr = instructionPtr;
440 stack.currentFrame->args.offsetTop = offsetTop;
441 stack.currentFrame->args.bracketChain = 0;
442 startNewGroup(stack.currentFrame);
443
444 /* This is where control jumps back to to effect "recursion" */
445
446 RECURSE:
447 if (++matchCount > matchLimit)
448 return matchError(JSRegExpErrorHitLimit, stack);
449
450 /* Now start processing the operations. */
451
452 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
453 while (true)
454 #endif
455 {
456
457 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
458 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
459 #define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionP tr]
460 #else
461 #define BEGIN_OPCODE(opcode) case OP_##opcode
462 #define NEXT_OPCODE continue
463 #endif
464
465 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
466 NEXT_OPCODE;
467 #else
468 switch (*stack.currentFrame->args.instructionPtr)
469 #endif
470 {
471 /* Non-capturing bracket: optimized */
472
473 BEGIN_OPCODE(BRA):
474 NON_CAPTURING_BRACKET:
475 DPRINTF(("start bracket 0\n"));
476 do {
477 RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->arg s.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
478 if (isMatch)
479 RRETURN;
480 stack.currentFrame->args.instructionPtr += getLinkValue(stac k.currentFrame->args.instructionPtr + 1);
481 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
482 DPRINTF(("bracket 0 failed\n"));
483 RRETURN;
484
485 /* Skip over large extraction number data if encountered. */
486
487 BEGIN_OPCODE(BRANUMBER):
488 stack.currentFrame->args.instructionPtr += 3;
489 NEXT_OPCODE;
490
491 /* End of the pattern. */
492
493 BEGIN_OPCODE(END):
494 md.endMatchPtr = stack.currentFrame->args.subjectPtr; / * Record where we ended */
495 md.endOffsetTop = stack.currentFrame->args.offsetTop; /* and h ow many extracts were taken */
496 isMatch = true;
497 RRETURN;
498
499 /* Assertion brackets. Check the alternative branches in turn - the
500 matching won't pass the KET for an assertion. If any one branch mat ches,
501 the assertion is true. Lookbehind assertions have an OP_REVERSE ite m at the
502 start of each branch to move the current point backwards, so the co de at
503 this level is identical to the lookahead case. */
504
505 BEGIN_OPCODE(ASSERT):
506 do {
507 RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->arg s.instructionPtr + 1 + LINK_SIZE, NULL);
508 if (isMatch)
509 break;
510 stack.currentFrame->args.instructionPtr += getLinkValue(stac k.currentFrame->args.instructionPtr + 1);
511 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
512 if (*stack.currentFrame->args.instructionPtr == OP_KET)
513 RRETURN_NO_MATCH;
514
515 /* Continue from after the assertion, updating the offsets high water
516 mark, since extracts may have been taken during the assertion. */
517
518 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
519 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
520 stack.currentFrame->args.offsetTop = md.endOffsetTop;
521 NEXT_OPCODE;
522
523 /* Negative assertion: all branches must fail to match */
524
525 BEGIN_OPCODE(ASSERT_NOT):
526 do {
527 RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->arg s.instructionPtr + 1 + LINK_SIZE, NULL);
528 if (isMatch)
529 RRETURN_NO_MATCH;
530 stack.currentFrame->args.instructionPtr += getLinkValue(stac k.currentFrame->args.instructionPtr + 1);
531 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
532
533 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
534 NEXT_OPCODE;
535
536 /* An alternation is the end of a branch; scan along to find the end of the
537 bracketed group and go to there. */
538
539 BEGIN_OPCODE(ALT):
540 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
541 NEXT_OPCODE;
542
543 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicat ing
544 that it may occur zero times. It may repeat infinitely, or not at a ll -
545 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upp er
546 repeat limits are compiled as a number of copies, with the optional ones
547 preceded by BRAZERO or BRAMINZERO. */
548
549 BEGIN_OPCODE(BRAZERO): {
550 stack.currentFrame->locals.startOfRepeatingBracket = stack.curre ntFrame->args.instructionPtr + 1;
551 RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals .startOfRepeatingBracket, stack.currentFrame->args.bracketChain);
552 if (isMatch)
553 RRETURN;
554 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatin gBracket);
555 stack.currentFrame->args.instructionPtr = stack.currentFrame->lo cals.startOfRepeatingBracket + 1 + LINK_SIZE;
556 NEXT_OPCODE;
557 }
558
559 BEGIN_OPCODE(BRAMINZERO): {
560 stack.currentFrame->locals.startOfRepeatingBracket = stack.curre ntFrame->args.instructionPtr + 1;
561 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatin gBracket);
562 RECURSIVE_MATCH_STARTNG_NEW_GROUP(15, stack.currentFrame->locals .startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain) ;
563 if (isMatch)
564 RRETURN;
565 stack.currentFrame->args.instructionPtr++;
566 NEXT_OPCODE;
567 }
568
569 /* End of a group, repeated or non-repeating. If we are at the end o f
570 an assertion "group", stop matching and return 1, but record the
571 current high water mark for use by positive assertions. Do this als o
572 for the "once" (not-backup up) groups. */
573
574 BEGIN_OPCODE(KET):
575 BEGIN_OPCODE(KETRMIN):
576 BEGIN_OPCODE(KETRMAX):
577 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.c urrentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instruc tionPtr + 1);
578 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stac k.currentFrame->args.bracketChain->bracketStart;
579
580 /* Back up the stack of bracket start pointers. */
581
582 stack.currentFrame->args.bracketChain = stack.currentFrame->args .bracketChain->previousBracket;
583
584 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == O P_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT _NOT) {
585 md.endOffsetTop = stack.currentFrame->args.offsetTop;
586 isMatch = true;
587 RRETURN;
588 }
589
590 /* In all other cases except a conditional group we have to chec k the
591 group number back at the start and if necessary complete handli ng an
592 extraction by setting the offsets and bumping the high water ma rk. */
593
594 stack.currentFrame->locals.number = *stack.currentFrame->locals. instructionPtrAtStartOfOnce - OP_BRA;
595
596 /* For extended extraction brackets (large number), we have to f ish out
597 the number from a dummy opcode at the start. */
598
599 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
600 stack.currentFrame->locals.number = get2ByteValue(stack.curr entFrame->locals.instructionPtrAtStartOfOnce + 2 + LINK_SIZE);
601 stack.currentFrame->locals.offset = stack.currentFrame->locals.n umber << 1;
602
603 #ifdef DEBUG
604 printf("end bracket %d", stack.currentFrame->locals.number);
605 printf("\n");
606 #endif
607
608 /* Test for a numbered group. This includes groups called as a r esult
609 of recursion. Note that whole-pattern recursion is coded as a r ecurse
610 into group 0, so it won't be picked up here. Instead, we catch it when
611 the OP_END is reached. */
612
613 if (stack.currentFrame->locals.number > 0) {
614 if (stack.currentFrame->locals.offset >= md.offsetMax)
615 md.offsetOverflow = true;
616 else {
617 md.offsetVector[stack.currentFrame->locals.offset] =
618 md.offsetVector[md.offsetEnd - stack.currentFrame->local s.number];
619 md.offsetVector[stack.currentFrame->locals.offset+1] = s tack.currentFrame->args.subjectPtr - md.startSubject;
620 if (stack.currentFrame->args.offsetTop <= stack.currentF rame->locals.offset)
621 stack.currentFrame->args.offsetTop = stack.currentFr ame->locals.offset + 2;
622 }
623 }
624
625 /* For a non-repeating ket, just continue at this level. This al so
626 happens for a repeating ket if no characters were matched in th e group.
627 This is the forcible breaking of infinite loops as implemented in Perl
628 5.005. If there is an options reset, it will get obeyed in the normal
629 course of events. */
630
631 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack. currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfI nstruction) {
632 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
633 NEXT_OPCODE;
634 }
635
636 /* The repeating kets try the rest of the pattern or restart fro m the
637 preceding bracket, in the appropriate order. */
638
639 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
640 RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
641 if (isMatch)
642 RRETURN;
643 RECURSIVE_MATCH_STARTNG_NEW_GROUP(17, stack.currentFrame->lo cals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
644 if (isMatch)
645 RRETURN;
646 } else { /* OP_KETRMAX */
647 RECURSIVE_MATCH_STARTNG_NEW_GROUP(18, stack.currentFrame->lo cals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
648 if (isMatch)
649 RRETURN;
650 RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
651 if (isMatch)
652 RRETURN;
653 }
654 RRETURN;
655
656 /* Start of subject. */
657
658 BEGIN_OPCODE(CIRC):
659 if (stack.currentFrame->args.subjectPtr != md.startSubject)
660 RRETURN_NO_MATCH;
661 stack.currentFrame->args.instructionPtr++;
662 NEXT_OPCODE;
663
664 /* After internal newline if multiline. */
665
666 BEGIN_OPCODE(BOL):
667 if (stack.currentFrame->args.subjectPtr != md.startSubject && !i sNewline(stack.currentFrame->args.subjectPtr[-1]))
668 RRETURN_NO_MATCH;
669 stack.currentFrame->args.instructionPtr++;
670 NEXT_OPCODE;
671
672 /* End of subject. */
673
674 BEGIN_OPCODE(DOLL):
675 if (stack.currentFrame->args.subjectPtr < md.endSubject)
676 RRETURN_NO_MATCH;
677 stack.currentFrame->args.instructionPtr++;
678 NEXT_OPCODE;
679
680 /* Before internal newline if multiline. */
681
682 BEGIN_OPCODE(EOL):
683 if (stack.currentFrame->args.subjectPtr < md.endSubject && !isNe wline(*stack.currentFrame->args.subjectPtr))
684 RRETURN_NO_MATCH;
685 stack.currentFrame->args.instructionPtr++;
686 NEXT_OPCODE;
687
688 /* Word boundary assertions */
689
690 BEGIN_OPCODE(NOT_WORD_BOUNDARY):
691 BEGIN_OPCODE(WORD_BOUNDARY): {
692 bool currentCharIsWordChar = false;
693 bool previousCharIsWordChar = false;
694
695 if (stack.currentFrame->args.subjectPtr > md.startSubject)
696 previousCharIsWordChar = isWordChar(stack.currentFrame->args .subjectPtr[-1]);
697 if (stack.currentFrame->args.subjectPtr < md.endSubject)
698 currentCharIsWordChar = isWordChar(*stack.currentFrame->args .subjectPtr);
699
700 /* Now see if the situation is what we want */
701 bool wordBoundaryDesired = (*stack.currentFrame->args.instructio nPtr++ == OP_WORD_BOUNDARY);
702 if (wordBoundaryDesired ? currentCharIsWordChar == previousCharI sWordChar : currentCharIsWordChar != previousCharIsWordChar)
703 RRETURN_NO_MATCH;
704 NEXT_OPCODE;
705 }
706
707 /* Match a single character type; inline for speed */
708
709 BEGIN_OPCODE(NOT_NEWLINE):
710 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
711 RRETURN_NO_MATCH;
712 if (isNewline(*stack.currentFrame->args.subjectPtr++))
713 RRETURN_NO_MATCH;
714 stack.currentFrame->args.instructionPtr++;
715 NEXT_OPCODE;
716
717 BEGIN_OPCODE(NOT_DIGIT):
718 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
719 RRETURN_NO_MATCH;
720 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
721 RRETURN_NO_MATCH;
722 stack.currentFrame->args.instructionPtr++;
723 NEXT_OPCODE;
724
725 BEGIN_OPCODE(DIGIT):
726 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
727 RRETURN_NO_MATCH;
728 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
729 RRETURN_NO_MATCH;
730 stack.currentFrame->args.instructionPtr++;
731 NEXT_OPCODE;
732
733 BEGIN_OPCODE(NOT_WHITESPACE):
734 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
735 RRETURN_NO_MATCH;
736 if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
737 RRETURN_NO_MATCH;
738 stack.currentFrame->args.instructionPtr++;
739 NEXT_OPCODE;
740
741 BEGIN_OPCODE(WHITESPACE):
742 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
743 RRETURN_NO_MATCH;
744 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
745 RRETURN_NO_MATCH;
746 stack.currentFrame->args.instructionPtr++;
747 NEXT_OPCODE;
748
749 BEGIN_OPCODE(NOT_WORDCHAR):
750 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
751 RRETURN_NO_MATCH;
752 if (isWordChar(*stack.currentFrame->args.subjectPtr++))
753 RRETURN_NO_MATCH;
754 stack.currentFrame->args.instructionPtr++;
755 NEXT_OPCODE;
756
757 BEGIN_OPCODE(WORDCHAR):
758 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
759 RRETURN_NO_MATCH;
760 if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
761 RRETURN_NO_MATCH;
762 stack.currentFrame->args.instructionPtr++;
763 NEXT_OPCODE;
764
765 /* Match a back reference, possibly repeatedly. Look past the end of the
766 item to see if there is repeat information following. The code is s imilar
767 to that for character classes, but repeated for efficiency. Then ob ey
768 similar code to character type repeats - written out again for spee d.
769 However, if the referenced string is the empty string, always treat
770 it as matched, any number of times (otherwise there could be infini te
771 loops). */
772
773 BEGIN_OPCODE(REF):
774 stack.currentFrame->locals.offset = get2ByteValue(stack.currentF rame->args.instructionPtr + 1) << 1; /* Doubled ref number */
775 stack.currentFrame->args.instructionPtr += 3; /* Advance past item */
776
777 /* If the reference is unset, set the length to be longer than t he amount
778 of subject left; this ensures that every attempt at a match fai ls. We
779 can't just fail here, because of the possibility of quantifiers with zero
780 minima. */
781
782 if (stack.currentFrame->locals.offset >= stack.currentFrame->arg s.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
783 stack.currentFrame->locals.length = 0;
784 else
785 stack.currentFrame->locals.length = md.offsetVector[stack.cu rrentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset ];
786
787 /* Set up for repetition, or handle the non-repeated case */
788
789 switch (*stack.currentFrame->args.instructionPtr) {
790 case OP_CRSTAR:
791 case OP_CRMINSTAR:
792 case OP_CRPLUS:
793 case OP_CRMINPLUS:
794 case OP_CRQUERY:
795 case OP_CRMINQUERY:
796 repeatInformationFromInstructionOffset(*stack.currentFra me->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals .max);
797 break;
798
799 case OP_CRRANGE:
800 case OP_CRMINRANGE:
801 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
802 min = get2ByteValue(stack.currentFrame->args.instruction Ptr + 1);
803 stack.currentFrame->locals.max = get2ByteValue(stack.cur rentFrame->args.instructionPtr + 3);
804 if (stack.currentFrame->locals.max == 0)
805 stack.currentFrame->locals.max = INT_MAX;
806 stack.currentFrame->args.instructionPtr += 5;
807 break;
808
809 default: /* No repeat follows */
810 if (!matchRef(stack.currentFrame->locals.offset, stack.c urrentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
811 RRETURN_NO_MATCH;
812 stack.currentFrame->args.subjectPtr += stack.currentFram e->locals.length;
813 NEXT_OPCODE;
814 }
815
816 /* If the length of the reference is zero, just continue with th e
817 main loop. */
818
819 if (stack.currentFrame->locals.length == 0)
820 NEXT_OPCODE;
821
822 /* First, ensure the minimum number of matches are present. */
823
824 for (int i = 1; i <= min; i++) {
825 if (!matchRef(stack.currentFrame->locals.offset, stack.curre ntFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
826 RRETURN_NO_MATCH;
827 stack.currentFrame->args.subjectPtr += stack.currentFrame->l ocals.length;
828 }
829
830 /* If min = max, continue at the same level without recursion.
831 They are not both allowed to be zero. */
832
833 if (min == stack.currentFrame->locals.max)
834 NEXT_OPCODE;
835
836 /* If minimizing, keep trying and advancing the pointer */
837
838 if (minimize) {
839 for (stack.currentFrame->locals.fi = min;; stack.currentFram e->locals.fi++) {
840 RECURSIVE_MATCH(20, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
841 if (isMatch)
842 RRETURN;
843 if (stack.currentFrame->locals.fi >= stack.currentFrame- >locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame-> args.subjectPtr, stack.currentFrame->locals.length, md))
844 RRETURN;
845 stack.currentFrame->args.subjectPtr += stack.currentFram e->locals.length;
846 }
847 /* Control never reaches here */
848 }
849
850 /* If maximizing, find the longest string and work backwards */
851
852 else {
853 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
854 for (int i = min; i < stack.currentFrame->locals.max; i++) {
855 if (!matchRef(stack.currentFrame->locals.offset, stack.c urrentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
856 break;
857 stack.currentFrame->args.subjectPtr += stack.currentFram e->locals.length;
858 }
859 while (stack.currentFrame->args.subjectPtr >= stack.currentF rame->locals.subjectPtrAtStartOfInstruction) {
860 RECURSIVE_MATCH(21, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
861 if (isMatch)
862 RRETURN;
863 stack.currentFrame->args.subjectPtr -= stack.currentFram e->locals.length;
864 }
865 RRETURN_NO_MATCH;
866 }
867 /* Control never reaches here */
868
869 /* Match a bit-mapped character class, possibly repeatedly. This op code is
870 used when all the characters in the class have values in the range 0-255,
871 and either the matching is caseful, or the characters are in the ra nge
872 0-127 when UTF-8 processing is enabled. The only difference between
873 OP_CLASS and OP_NCLASS occurs when a data character outside the ran ge is
874 encountered.
875
876 First, look past the end of the item to see if there is repeat info rmation
877 following. Then obey similar code to character type repeats - writt en out
878 again for speed. */
879
880 BEGIN_OPCODE(NCLASS):
881 BEGIN_OPCODE(CLASS):
882 stack.currentFrame->locals.data = stack.currentFrame->args.instr uctionPtr + 1; /* Save for matching */
883 stack.currentFrame->args.instructionPtr += 33; /* Advance past the item */
884
885 switch (*stack.currentFrame->args.instructionPtr) {
886 case OP_CRSTAR:
887 case OP_CRMINSTAR:
888 case OP_CRPLUS:
889 case OP_CRMINPLUS:
890 case OP_CRQUERY:
891 case OP_CRMINQUERY:
892 repeatInformationFromInstructionOffset(*stack.currentFra me->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals .max);
893 break;
894
895 case OP_CRRANGE:
896 case OP_CRMINRANGE:
897 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
898 min = get2ByteValue(stack.currentFrame->args.instruction Ptr + 1);
899 stack.currentFrame->locals.max = get2ByteValue(stack.cur rentFrame->args.instructionPtr + 3);
900 if (stack.currentFrame->locals.max == 0)
901 stack.currentFrame->locals.max = INT_MAX;
902 stack.currentFrame->args.instructionPtr += 5;
903 break;
904
905 default: /* No repeat follows */
906 min = stack.currentFrame->locals.max = 1;
907 break;
908 }
909
910 /* First, ensure the minimum number of matches are present. */
911
912 for (int i = 1; i <= min; i++) {
913 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
914 RRETURN_NO_MATCH;
915 int c = *stack.currentFrame->args.subjectPtr++;
916 if (c > 255) {
917 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
918 RRETURN_NO_MATCH;
919 } else {
920 if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
921 RRETURN_NO_MATCH;
922 }
923 }
924
925 /* If max == min we can continue with the main loop without the
926 need to recurse. */
927
928 if (min == stack.currentFrame->locals.max)
929 NEXT_OPCODE;
930
931 /* If minimizing, keep testing the rest of the expression and ad vancing
932 the pointer while it matches the class. */
933 if (minimize) {
934 for (stack.currentFrame->locals.fi = min;; stack.currentFram e->locals.fi++) {
935 RECURSIVE_MATCH(22, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
936 if (isMatch)
937 RRETURN;
938 if (stack.currentFrame->locals.fi >= stack.currentFrame- >locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
939 RRETURN;
940 int c = *stack.currentFrame->args.subjectPtr++;
941 if (c > 255) {
942 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
943 RRETURN;
944 } else {
945 if ((stack.currentFrame->locals.data[c/8] & (1 << (c &7))) == 0)
946 RRETURN;
947 }
948 }
949 /* Control never reaches here */
950 }
951 /* If maximizing, find the longest possible run, then work backw ards. */
952 else {
953 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
954
955 for (int i = min; i < stack.currentFrame->locals.max; i++) {
956 if (stack.currentFrame->args.subjectPtr >= md.endSubject )
957 break;
958 int c = *stack.currentFrame->args.subjectPtr;
959 if (c > 255) {
960 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
961 break;
962 } else {
963 if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
964 break;
965 }
966 ++stack.currentFrame->args.subjectPtr;
967 }
968 for (;;) {
969 RECURSIVE_MATCH(24, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
970 if (isMatch)
971 RRETURN;
972 if (stack.currentFrame->args.subjectPtr-- == stack.curre ntFrame->locals.subjectPtrAtStartOfInstruction)
973 break; /* Stop if tried at original pos */
974 }
975
976 RRETURN;
977 }
978 /* Control never reaches here */
979
980 /* Match an extended character class. */
981
982 BEGIN_OPCODE(XCLASS):
983 stack.currentFrame->locals.data = stack.currentFrame->args.instr uctionPtr + 1 + LINK_SIZE; /* Save for matching */
984 stack.currentFrame->args.instructionPtr += getLinkValue(stack.cu rrentFrame->args.instructionPtr + 1); /* Advance past the i tem */
985
986 switch (*stack.currentFrame->args.instructionPtr) {
987 case OP_CRSTAR:
988 case OP_CRMINSTAR:
989 case OP_CRPLUS:
990 case OP_CRMINPLUS:
991 case OP_CRQUERY:
992 case OP_CRMINQUERY:
993 repeatInformationFromInstructionOffset(*stack.currentFra me->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals .max);
994 break;
995
996 case OP_CRRANGE:
997 case OP_CRMINRANGE:
998 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
999 min = get2ByteValue(stack.currentFrame->args.instruction Ptr + 1);
1000 stack.currentFrame->locals.max = get2ByteValue(stack.cur rentFrame->args.instructionPtr + 3);
1001 if (stack.currentFrame->locals.max == 0)
1002 stack.currentFrame->locals.max = INT_MAX;
1003 stack.currentFrame->args.instructionPtr += 5;
1004 break;
1005
1006 default: /* No repeat follows */
1007 min = stack.currentFrame->locals.max = 1;
1008 }
1009
1010 /* First, ensure the minimum number of matches are present. */
1011
1012 for (int i = 1; i <= min; i++) {
1013 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1014 RRETURN_NO_MATCH;
1015 int c = *stack.currentFrame->args.subjectPtr++;
1016 if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
1017 RRETURN_NO_MATCH;
1018 }
1019
1020 /* If max == min we can continue with the main loop without the
1021 need to recurse. */
1022
1023 if (min == stack.currentFrame->locals.max)
1024 NEXT_OPCODE;
1025
1026 /* If minimizing, keep testing the rest of the expression and ad vancing
1027 the pointer while it matches the class. */
1028
1029 if (minimize) {
1030 for (stack.currentFrame->locals.fi = min;; stack.currentFram e->locals.fi++) {
1031 RECURSIVE_MATCH(26, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
1032 if (isMatch)
1033 RRETURN;
1034 if (stack.currentFrame->locals.fi >= stack.currentFrame- >locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1035 RRETURN;
1036 int c = *stack.currentFrame->args.subjectPtr++;
1037 if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data) )
1038 RRETURN;
1039 }
1040 /* Control never reaches here */
1041 }
1042
1043 /* If maximizing, find the longest possible run, then work backw ards. */
1044
1045 else {
1046 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1047 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1048 if (stack.currentFrame->args.subjectPtr >= md.endSubject )
1049 break;
1050 int c = *stack.currentFrame->args.subjectPtr;
1051 if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data) )
1052 break;
1053 ++stack.currentFrame->args.subjectPtr;
1054 }
1055 for(;;) {
1056 RECURSIVE_MATCH(27, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
1057 if (isMatch)
1058 RRETURN;
1059 if (stack.currentFrame->args.subjectPtr-- == stack.curre ntFrame->locals.subjectPtrAtStartOfInstruction)
1060 break; /* Stop if tried at original pos */
1061 }
1062 RRETURN;
1063 }
1064
1065 /* Control never reaches here */
1066
1067 /* Match a single character, casefully */
1068
1069 BEGIN_OPCODE(CHAR):
1070 stack.currentFrame->locals.length = 1;
1071 stack.currentFrame->args.instructionPtr++;
1072 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, sta ck.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1073 stack.currentFrame->args.instructionPtr += stack.currentFrame->l ocals.length;
1074 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1075 RRETURN_NO_MATCH;
1076 if (stack.currentFrame->locals.fc != *stack.currentFrame->args.s ubjectPtr++)
1077 RRETURN_NO_MATCH;
1078 NEXT_OPCODE;
1079
1080 /* Match a single character, caselessly */
1081
1082 BEGIN_OPCODE(CHAR_IGNORING_CASE): {
1083 stack.currentFrame->locals.length = 1;
1084 stack.currentFrame->args.instructionPtr++;
1085 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, sta ck.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1086 stack.currentFrame->args.instructionPtr += stack.currentFrame->l ocals.length;
1087 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1088 RRETURN_NO_MATCH;
1089 int dc = *stack.currentFrame->args.subjectPtr++;
1090 if (stack.currentFrame->locals.fc != dc && kjs_pcre_ucp_othercas e(stack.currentFrame->locals.fc) != dc)
1091 RRETURN_NO_MATCH;
1092 NEXT_OPCODE;
1093 }
1094
1095 /* Match a single ASCII character. */
1096
1097 BEGIN_OPCODE(ASCII_CHAR):
1098 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1099 RRETURN_NO_MATCH;
1100 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame-> args.instructionPtr[1])
1101 RRETURN_NO_MATCH;
1102 ++stack.currentFrame->args.subjectPtr;
1103 stack.currentFrame->args.instructionPtr += 2;
1104 NEXT_OPCODE;
1105
1106 /* Match one of two cases of an ASCII letter. */
1107
1108 BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
1109 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1110 RRETURN_NO_MATCH;
1111 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.curre ntFrame->args.instructionPtr[1])
1112 RRETURN_NO_MATCH;
1113 ++stack.currentFrame->args.subjectPtr;
1114 stack.currentFrame->args.instructionPtr += 2;
1115 NEXT_OPCODE;
1116
1117 /* Match a single character repeatedly; different opcodes share code . */
1118
1119 BEGIN_OPCODE(EXACT):
1120 min = stack.currentFrame->locals.max = get2ByteValue(stack.curre ntFrame->args.instructionPtr + 1);
1121 minimize = false;
1122 stack.currentFrame->args.instructionPtr += 3;
1123 goto REPEATCHAR;
1124
1125 BEGIN_OPCODE(UPTO):
1126 BEGIN_OPCODE(MINUPTO):
1127 min = 0;
1128 stack.currentFrame->locals.max = get2ByteValue(stack.currentFram e->args.instructionPtr + 1);
1129 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPT O;
1130 stack.currentFrame->args.instructionPtr += 3;
1131 goto REPEATCHAR;
1132
1133 BEGIN_OPCODE(STAR):
1134 BEGIN_OPCODE(MINSTAR):
1135 BEGIN_OPCODE(PLUS):
1136 BEGIN_OPCODE(MINPLUS):
1137 BEGIN_OPCODE(QUERY):
1138 BEGIN_OPCODE(MINQUERY):
1139 repeatInformationFromInstructionOffset(*stack.currentFrame->args .instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
1140
1141 /* Common code for all repeated single-character matches. We can give
1142 up quickly if there are fewer than the minimum number of charac ters left in
1143 the subject. */
1144
1145 REPEATCHAR:
1146
1147 stack.currentFrame->locals.length = 1;
1148 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, sta ck.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1149 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md. endSubject - stack.currentFrame->args.subjectPtr)
1150 RRETURN_NO_MATCH;
1151 stack.currentFrame->args.instructionPtr += stack.currentFrame->l ocals.length;
1152
1153 if (stack.currentFrame->locals.fc <= 0xFFFF) {
1154 int othercase = md.ignoreCase ? kjs_pcre_ucp_othercase(stack .currentFrame->locals.fc) : -1;
1155
1156 for (int i = 1; i <= min; i++) {
1157 if (*stack.currentFrame->args.subjectPtr != stack.curren tFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1158 RRETURN_NO_MATCH;
1159 ++stack.currentFrame->args.subjectPtr;
1160 }
1161
1162 if (min == stack.currentFrame->locals.max)
1163 NEXT_OPCODE;
1164
1165 if (minimize) {
1166 stack.currentFrame->locals.repeatOthercase = othercase;
1167 for (stack.currentFrame->locals.fi = min;; stack.current Frame->locals.fi++) {
1168 RECURSIVE_MATCH(28, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1169 if (isMatch)
1170 RRETURN;
1171 if (stack.currentFrame->locals.fi >= stack.currentFr ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1172 RRETURN;
1173 if (*stack.currentFrame->args.subjectPtr != stack.cu rrentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFr ame->locals.repeatOthercase)
1174 RRETURN;
1175 ++stack.currentFrame->args.subjectPtr;
1176 }
1177 /* Control never reaches here */
1178 } else {
1179 stack.currentFrame->locals.subjectPtrAtStartOfInstructio n = stack.currentFrame->args.subjectPtr;
1180 for (int i = min; i < stack.currentFrame->locals.max; i+ +) {
1181 if (stack.currentFrame->args.subjectPtr >= md.endSub ject)
1182 break;
1183 if (*stack.currentFrame->args.subjectPtr != stack.cu rrentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1184 break;
1185 ++stack.currentFrame->args.subjectPtr;
1186 }
1187 while (stack.currentFrame->args.subjectPtr >= stack.curr entFrame->locals.subjectPtrAtStartOfInstruction) {
1188 RECURSIVE_MATCH(29, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1189 if (isMatch)
1190 RRETURN;
1191 --stack.currentFrame->args.subjectPtr;
1192 }
1193 RRETURN_NO_MATCH;
1194 }
1195 /* Control never reaches here */
1196 } else {
1197 /* No case on surrogate pairs, so no need to bother with "ot hercase". */
1198
1199 for (int i = 1; i <= min; i++) {
1200 if (*stack.currentFrame->args.subjectPtr != stack.curren tFrame->locals.fc)
1201 RRETURN_NO_MATCH;
1202 stack.currentFrame->args.subjectPtr += 2;
1203 }
1204
1205 if (min == stack.currentFrame->locals.max)
1206 NEXT_OPCODE;
1207
1208 if (minimize) {
1209 for (stack.currentFrame->locals.fi = min;; stack.current Frame->locals.fi++) {
1210 RECURSIVE_MATCH(30, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1211 if (isMatch)
1212 RRETURN;
1213 if (stack.currentFrame->locals.fi >= stack.currentFr ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1214 RRETURN;
1215 if (*stack.currentFrame->args.subjectPtr != stack.cu rrentFrame->locals.fc)
1216 RRETURN;
1217 stack.currentFrame->args.subjectPtr += 2;
1218 }
1219 /* Control never reaches here */
1220 } else {
1221 stack.currentFrame->locals.subjectPtrAtStartOfInstructio n = stack.currentFrame->args.subjectPtr;
1222 for (int i = min; i < stack.currentFrame->locals.max; i+ +) {
1223 if (stack.currentFrame->args.subjectPtr > md.endSubj ect - 2)
1224 break;
1225 if (*stack.currentFrame->args.subjectPtr != stack.cu rrentFrame->locals.fc)
1226 break;
1227 stack.currentFrame->args.subjectPtr += 2;
1228 }
1229 while (stack.currentFrame->args.subjectPtr >= stack.curr entFrame->locals.subjectPtrAtStartOfInstruction) {
1230 RECURSIVE_MATCH(31, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1231 if (isMatch)
1232 RRETURN;
1233 stack.currentFrame->args.subjectPtr -= 2;
1234 }
1235 RRETURN_NO_MATCH;
1236 }
1237 /* Control never reaches here */
1238 }
1239 /* Control never reaches here */
1240
1241 /* Match a negated single one-byte character. */
1242
1243 BEGIN_OPCODE(NOT): {
1244 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1245 RRETURN_NO_MATCH;
1246 stack.currentFrame->args.instructionPtr++;
1247 int c = *stack.currentFrame->args.subjectPtr++;
1248 if (md.ignoreCase) {
1249 if (c < 128)
1250 c = toLowerCase(c);
1251 if (toLowerCase(*stack.currentFrame->args.instructionPtr++) == c)
1252 RRETURN_NO_MATCH;
1253 } else {
1254 if (*stack.currentFrame->args.instructionPtr++ == c)
1255 RRETURN_NO_MATCH;
1256 }
1257 NEXT_OPCODE;
1258 }
1259
1260 /* Match a negated single one-byte character repeatedly. This is alm ost a
1261 repeat of the code for a repeated single character, but I haven't f ound a
1262 nice way of commoning these up that doesn't require a test of the
1263 positive/negative option for each character match. Maybe that would n't add
1264 very much to the time taken, but character matching *is* what this is all
1265 about... */
1266
1267 BEGIN_OPCODE(NOTEXACT):
1268 min = stack.currentFrame->locals.max = get2ByteValue(stack.curre ntFrame->args.instructionPtr + 1);
1269 minimize = false;
1270 stack.currentFrame->args.instructionPtr += 3;
1271 goto REPEATNOTCHAR;
1272
1273 BEGIN_OPCODE(NOTUPTO):
1274 BEGIN_OPCODE(NOTMINUPTO):
1275 min = 0;
1276 stack.currentFrame->locals.max = get2ByteValue(stack.currentFram e->args.instructionPtr + 1);
1277 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMIN UPTO;
1278 stack.currentFrame->args.instructionPtr += 3;
1279 goto REPEATNOTCHAR;
1280
1281 BEGIN_OPCODE(NOTSTAR):
1282 BEGIN_OPCODE(NOTMINSTAR):
1283 BEGIN_OPCODE(NOTPLUS):
1284 BEGIN_OPCODE(NOTMINPLUS):
1285 BEGIN_OPCODE(NOTQUERY):
1286 BEGIN_OPCODE(NOTMINQUERY):
1287 repeatInformationFromInstructionOffset(*stack.currentFrame->args .instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
1288
1289 /* Common code for all repeated single-byte matches. We can give up quickly
1290 if there are fewer than the minimum number of bytes left in the
1291 subject. */
1292
1293 REPEATNOTCHAR:
1294 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1295 RRETURN_NO_MATCH;
1296 stack.currentFrame->locals.fc = *stack.currentFrame->args.instru ctionPtr++;
1297
1298 /* The code is duplicated for the caseless and caseful cases, fo r speed,
1299 since matching characters is likely to be quite common. First, ensure the
1300 minimum number of matches are present. If min = max, continue a t the same
1301 level without recursing. Otherwise, if minimizing, keep trying the rest of
1302 the expression and advancing one matching character if failing, up to the
1303 maximum. Alternatively, if maximizing, find the maximum number of
1304 characters and work backwards. */
1305
1306 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->lo cals.fc, min, stack.currentFrame->locals.max));
1307
1308 if (md.ignoreCase) {
1309 if (stack.currentFrame->locals.fc < 128)
1310 stack.currentFrame->locals.fc = toLowerCase(stack.curren tFrame->locals.fc);
1311
1312 for (int i = 1; i <= min; i++) {
1313 int d = *stack.currentFrame->args.subjectPtr++;
1314 if (d < 128)
1315 d = toLowerCase(d);
1316 if (stack.currentFrame->locals.fc == d)
1317 RRETURN_NO_MATCH;
1318 }
1319
1320 if (min == stack.currentFrame->locals.max)
1321 NEXT_OPCODE;
1322
1323 if (minimize) {
1324 for (stack.currentFrame->locals.fi = min;; stack.current Frame->locals.fi++) {
1325 RECURSIVE_MATCH(38, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1326 if (isMatch)
1327 RRETURN;
1328 int d = *stack.currentFrame->args.subjectPtr++;
1329 if (d < 128)
1330 d = toLowerCase(d);
1331 if (stack.currentFrame->locals.fi >= stack.currentFr ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack .currentFrame->locals.fc == d)
1332 RRETURN;
1333 }
1334 /* Control never reaches here */
1335 }
1336
1337 /* Maximize case */
1338
1339 else {
1340 stack.currentFrame->locals.subjectPtrAtStartOfInstructio n = stack.currentFrame->args.subjectPtr;
1341
1342 for (int i = min; i < stack.currentFrame->locals.max; i+ +) {
1343 if (stack.currentFrame->args.subjectPtr >= md.endSub ject)
1344 break;
1345 int d = *stack.currentFrame->args.subjectPtr;
1346 if (d < 128)
1347 d = toLowerCase(d);
1348 if (stack.currentFrame->locals.fc == d)
1349 break;
1350 ++stack.currentFrame->args.subjectPtr;
1351 }
1352 for (;;) {
1353 RECURSIVE_MATCH(40, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1354 if (isMatch)
1355 RRETURN;
1356 if (stack.currentFrame->args.subjectPtr-- == stack.c urrentFrame->locals.subjectPtrAtStartOfInstruction)
1357 break; /* Stop if tried at original pos * /
1358 }
1359
1360 RRETURN;
1361 }
1362 /* Control never reaches here */
1363 }
1364
1365 /* Caseful comparisons */
1366
1367 else {
1368 for (int i = 1; i <= min; i++) {
1369 int d = *stack.currentFrame->args.subjectPtr++;
1370 if (stack.currentFrame->locals.fc == d)
1371 RRETURN_NO_MATCH;
1372 }
1373
1374 if (min == stack.currentFrame->locals.max)
1375 NEXT_OPCODE;
1376
1377 if (minimize) {
1378 for (stack.currentFrame->locals.fi = min;; stack.current Frame->locals.fi++) {
1379 RECURSIVE_MATCH(42, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1380 if (isMatch)
1381 RRETURN;
1382 int d = *stack.currentFrame->args.subjectPtr++;
1383 if (stack.currentFrame->locals.fi >= stack.currentFr ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack .currentFrame->locals.fc == d)
1384 RRETURN;
1385 }
1386 /* Control never reaches here */
1387 }
1388
1389 /* Maximize case */
1390
1391 else {
1392 stack.currentFrame->locals.subjectPtrAtStartOfInstructio n = stack.currentFrame->args.subjectPtr;
1393
1394 for (int i = min; i < stack.currentFrame->locals.max; i+ +) {
1395 if (stack.currentFrame->args.subjectPtr >= md.endSub ject)
1396 break;
1397 int d = *stack.currentFrame->args.subjectPtr;
1398 if (stack.currentFrame->locals.fc == d)
1399 break;
1400 ++stack.currentFrame->args.subjectPtr;
1401 }
1402 for (;;) {
1403 RECURSIVE_MATCH(44, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1404 if (isMatch)
1405 RRETURN;
1406 if (stack.currentFrame->args.subjectPtr-- == stack.c urrentFrame->locals.subjectPtrAtStartOfInstruction)
1407 break; /* Stop if tried at original pos * /
1408 }
1409
1410 RRETURN;
1411 }
1412 }
1413 /* Control never reaches here */
1414
1415 /* Match a single character type repeatedly; several different opcod es
1416 share code. This is very similar to the code for single characters, but we
1417 repeat it in the interests of efficiency. */
1418
1419 BEGIN_OPCODE(TYPEEXACT):
1420 min = stack.currentFrame->locals.max = get2ByteValue(stack.curre ntFrame->args.instructionPtr + 1);
1421 minimize = true;
1422 stack.currentFrame->args.instructionPtr += 3;
1423 goto REPEATTYPE;
1424
1425 BEGIN_OPCODE(TYPEUPTO):
1426 BEGIN_OPCODE(TYPEMINUPTO):
1427 min = 0;
1428 stack.currentFrame->locals.max = get2ByteValue(stack.currentFram e->args.instructionPtr + 1);
1429 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMI NUPTO;
1430 stack.currentFrame->args.instructionPtr += 3;
1431 goto REPEATTYPE;
1432
1433 BEGIN_OPCODE(TYPESTAR):
1434 BEGIN_OPCODE(TYPEMINSTAR):
1435 BEGIN_OPCODE(TYPEPLUS):
1436 BEGIN_OPCODE(TYPEMINPLUS):
1437 BEGIN_OPCODE(TYPEQUERY):
1438 BEGIN_OPCODE(TYPEMINQUERY):
1439 repeatInformationFromInstructionOffset(*stack.currentFrame->args .instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
1440
1441 /* Common code for all repeated single character type matches. N ote that
1442 in UTF-8 mode, '.' matches a character of any length, but for t he other
1443 character types, the valid characters are all one-byte long. */
1444
1445 REPEATTYPE:
1446 stack.currentFrame->locals.ctype = *stack.currentFrame->args.ins tructionPtr++; /* Code for the character type */
1447
1448 /* First, ensure the minimum number of matches are present. Use inline
1449 code for maximizing the speed, and do the type test once at the start
1450 (i.e. keep it out of the loop). Also we can test that there are at least
1451 the minimum number of characters before we start. */
1452
1453 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1454 RRETURN_NO_MATCH;
1455 if (min > 0) {
1456 switch (stack.currentFrame->locals.ctype) {
1457 case OP_NOT_NEWLINE:
1458 for (int i = 1; i <= min; i++) {
1459 if (isNewline(*stack.currentFrame->args.subjectP tr))
1460 RRETURN_NO_MATCH;
1461 ++stack.currentFrame->args.subjectPtr;
1462 }
1463 break;
1464
1465 case OP_NOT_DIGIT:
1466 for (int i = 1; i <= min; i++) {
1467 if (isASCIIDigit(*stack.currentFrame->args.subje ctPtr))
1468 RRETURN_NO_MATCH;
1469 ++stack.currentFrame->args.subjectPtr;
1470 }
1471 break;
1472
1473 case OP_DIGIT:
1474 for (int i = 1; i <= min; i++) {
1475 if (!isASCIIDigit(*stack.currentFrame->args.subj ectPtr))
1476 RRETURN_NO_MATCH;
1477 ++stack.currentFrame->args.subjectPtr;
1478 }
1479 break;
1480
1481 case OP_NOT_WHITESPACE:
1482 for (int i = 1; i <= min; i++) {
1483 if (isSpaceChar(*stack.currentFrame->args.subjec tPtr))
1484 RRETURN_NO_MATCH;
1485 ++stack.currentFrame->args.subjectPtr;
1486 }
1487 break;
1488
1489 case OP_WHITESPACE:
1490 for (int i = 1; i <= min; i++) {
1491 if (!isSpaceChar(*stack.currentFrame->args.subje ctPtr))
1492 RRETURN_NO_MATCH;
1493 ++stack.currentFrame->args.subjectPtr;
1494 }
1495 break;
1496
1497 case OP_NOT_WORDCHAR:
1498 for (int i = 1; i <= min; i++) {
1499 if (isWordChar(*stack.currentFrame->args.subject Ptr))
1500 RRETURN_NO_MATCH;
1501 ++stack.currentFrame->args.subjectPtr;
1502 }
1503 break;
1504
1505 case OP_WORDCHAR:
1506 for (int i = 1; i <= min; i++) {
1507 if (!isWordChar(*stack.currentFrame->args.subjec tPtr))
1508 RRETURN_NO_MATCH;
1509 ++stack.currentFrame->args.subjectPtr;
1510 }
1511 break;
1512
1513 default:
1514 ASSERT_NOT_REACHED();
1515 return matchError(JSRegExpErrorInternal, stack);
1516 } /* End switch(stack.currentFrame->locals.ctype) */
1517 }
1518
1519 /* If min = max, continue at the same level without recursing */
1520
1521 if (min == stack.currentFrame->locals.max)
1522 NEXT_OPCODE;
1523
1524 /* If minimizing, we have to test the rest of the pattern before each
1525 subsequent match. */
1526
1527 if (minimize) {
1528 for (stack.currentFrame->locals.fi = min;; stack.currentFram e->locals.fi++) {
1529 RECURSIVE_MATCH(48, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
1530 if (isMatch)
1531 RRETURN;
1532 if (stack.currentFrame->locals.fi >= stack.currentFrame- >locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1533 RRETURN;
1534
1535 int c = *stack.currentFrame->args.subjectPtr++;
1536 switch (stack.currentFrame->locals.ctype) {
1537 case OP_NOT_NEWLINE:
1538 if (isNewline(c))
1539 RRETURN;
1540 break;
1541
1542 case OP_NOT_DIGIT:
1543 if (isASCIIDigit(c))
1544 RRETURN;
1545 break;
1546
1547 case OP_DIGIT:
1548 if (!isASCIIDigit(c))
1549 RRETURN;
1550 break;
1551
1552 case OP_NOT_WHITESPACE:
1553 if (isSpaceChar(c))
1554 RRETURN;
1555 break;
1556
1557 case OP_WHITESPACE:
1558 if (!isSpaceChar(c))
1559 RRETURN;
1560 break;
1561
1562 case OP_NOT_WORDCHAR:
1563 if (isWordChar(c))
1564 RRETURN;
1565 break;
1566
1567 case OP_WORDCHAR:
1568 if (!isWordChar(c))
1569 RRETURN;
1570 break;
1571
1572 default:
1573 ASSERT_NOT_REACHED();
1574 return matchError(JSRegExpErrorInternal, stack);
1575 }
1576 }
1577 /* Control never reaches here */
1578 }
1579
1580 /* If maximizing it is worth using inline code for speed, doing the type
1581 test once at the start (i.e. keep it out of the loop). */
1582
1583 else {
1584 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; /* Remember where we started */
1585
1586 switch (stack.currentFrame->locals.ctype) {
1587 case OP_NOT_NEWLINE:
1588 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1589 if (stack.currentFrame->args.subjectPtr >= md.en dSubject || isNewline(*stack.currentFrame->args.subjectPtr))
1590 break;
1591 stack.currentFrame->args.subjectPtr++;
1592 }
1593 break;
1594
1595 case OP_NOT_DIGIT:
1596 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1597 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1598 break;
1599 int c = *stack.currentFrame->args.subjectPtr;
1600 if (isASCIIDigit(c))
1601 break;
1602 ++stack.currentFrame->args.subjectPtr;
1603 }
1604 break;
1605
1606 case OP_DIGIT:
1607 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1608 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1609 break;
1610 int c = *stack.currentFrame->args.subjectPtr;
1611 if (!isASCIIDigit(c))
1612 break;
1613 ++stack.currentFrame->args.subjectPtr;
1614 }
1615 break;
1616
1617 case OP_NOT_WHITESPACE:
1618 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1619 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1620 break;
1621 int c = *stack.currentFrame->args.subjectPtr;
1622 if (isSpaceChar(c))
1623 break;
1624 ++stack.currentFrame->args.subjectPtr;
1625 }
1626 break;
1627
1628 case OP_WHITESPACE:
1629 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1630 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1631 break;
1632 int c = *stack.currentFrame->args.subjectPtr;
1633 if (!isSpaceChar(c))
1634 break;
1635 ++stack.currentFrame->args.subjectPtr;
1636 }
1637 break;
1638
1639 case OP_NOT_WORDCHAR:
1640 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1641 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1642 break;
1643 int c = *stack.currentFrame->args.subjectPtr;
1644 if (isWordChar(c))
1645 break;
1646 ++stack.currentFrame->args.subjectPtr;
1647 }
1648 break;
1649
1650 case OP_WORDCHAR:
1651 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1652 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1653 break;
1654 int c = *stack.currentFrame->args.subjectPtr;
1655 if (!isWordChar(c))
1656 break;
1657 ++stack.currentFrame->args.subjectPtr;
1658 }
1659 break;
1660
1661 default:
1662 ASSERT_NOT_REACHED();
1663 return matchError(JSRegExpErrorInternal, stack);
1664 }
1665
1666 /* stack.currentFrame->args.subjectPtr is now past the end o f the maximum run */
1667
1668 for (;;) {
1669 RECURSIVE_MATCH(52, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
1670 if (isMatch)
1671 RRETURN;
1672 if (stack.currentFrame->args.subjectPtr-- == stack.curre ntFrame->locals.subjectPtrAtStartOfInstruction)
1673 break; /* Stop if tried at original pos */
1674 }
1675
1676 /* Get here if we can't make it match with any permitted rep etitions */
1677
1678 RRETURN;
1679 }
1680 /* Control never reaches here */
1681
1682 BEGIN_OPCODE(CRMINPLUS):
1683 BEGIN_OPCODE(CRMINQUERY):
1684 BEGIN_OPCODE(CRMINRANGE):
1685 BEGIN_OPCODE(CRMINSTAR):
1686 BEGIN_OPCODE(CRPLUS):
1687 BEGIN_OPCODE(CRQUERY):
1688 BEGIN_OPCODE(CRRANGE):
1689 BEGIN_OPCODE(CRSTAR):
1690 ASSERT_NOT_REACHED();
1691 return matchError(JSRegExpErrorInternal, stack);
1692
1693 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1694 CAPTURING_BRACKET:
1695 #else
1696 default:
1697 #endif
1698 /* Opening capturing bracket. If there is space in the offset ve ctor, save
1699 the current subject position in the working slot at the top of the vector. We
1700 mustn't change the current values of the data slot, because the y may be set
1701 from a previous iteration of this group, and be referred to by a reference
1702 inside the group.
1703
1704 If the bracket fails to match, we need to restore this value an d also the
1705 values of the final offsets, in case they were set by a previou s iteration of
1706 the same bracket.
1707
1708 If there isn't enough space in the offset vector, treat this as if it were a
1709 non-capturing bracket. Don't worry about setting the flag for t he error case
1710 here; that is handled in the code for KET. */
1711
1712 ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
1713
1714 stack.currentFrame->locals.number = *stack.currentFrame->args.in structionPtr - OP_BRA;
1715
1716 /* For extended extraction brackets (large number), we have to f ish out the
1717 number from a dummy opcode at the start. */
1718
1719 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
1720 stack.currentFrame->locals.number = get2ByteValue(stack.curr entFrame->args.instructionPtr + 2 + LINK_SIZE);
1721 stack.currentFrame->locals.offset = stack.currentFrame->locals.n umber << 1;
1722
1723 #ifdef DEBUG
1724 printf("start bracket %d subject=", stack.currentFrame->locals.n umber);
1725 pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
1726 printf("\n");
1727 #endif
1728
1729 if (stack.currentFrame->locals.offset < md.offsetMax) {
1730 stack.currentFrame->locals.saveOffset1 = md.offsetVector[sta ck.currentFrame->locals.offset];
1731 stack.currentFrame->locals.saveOffset2 = md.offsetVector[sta ck.currentFrame->locals.offset + 1];
1732 stack.currentFrame->locals.saveOffset3 = md.offsetVector[md. offsetEnd - stack.currentFrame->locals.number];
1733
1734 DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.sav eOffset1, stack.currentFrame->locals.saveOffset2, stack.currentFrame->locals.sav eOffset3));
1735 md.offsetVector[md.offsetEnd - stack.currentFrame->locals.nu mber] = stack.currentFrame->args.subjectPtr - md.startSubject;
1736
1737 do {
1738 RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame- >args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
1739 if (isMatch)
1740 RRETURN;
1741 stack.currentFrame->args.instructionPtr += getLinkValue( stack.currentFrame->args.instructionPtr + 1);
1742 } while (*stack.currentFrame->args.instructionPtr == OP_ALT) ;
1743
1744 DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.n umber));
1745
1746 md.offsetVector[stack.currentFrame->locals.offset] = stack.c urrentFrame->locals.saveOffset1;
1747 md.offsetVector[stack.currentFrame->locals.offset + 1] = sta ck.currentFrame->locals.saveOffset2;
1748 md.offsetVector[md.offsetEnd - stack.currentFrame->locals.nu mber] = stack.currentFrame->locals.saveOffset3;
1749
1750 RRETURN;
1751 }
1752
1753 /* Insufficient room for saving captured contents */
1754
1755 goto NON_CAPTURING_BRACKET;
1756 }
1757
1758 /* Do not stick any code in here without much thought; it is assumed
1759 that "continue" in the code above comes out to here to repeat the main
1760 loop. */
1761
1762 } /* End of main loop */
1763
1764 ASSERT_NOT_REACHED();
1765
1766 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
1767
1768 RRETURN_SWITCH:
1769 switch (stack.currentFrame->returnLocation) {
1770 case 0: goto RETURN;
1771 case 1: goto RRETURN_1;
1772 case 2: goto RRETURN_2;
1773 case 6: goto RRETURN_6;
1774 case 7: goto RRETURN_7;
1775 case 14: goto RRETURN_14;
1776 case 15: goto RRETURN_15;
1777 case 16: goto RRETURN_16;
1778 case 17: goto RRETURN_17;
1779 case 18: goto RRETURN_18;
1780 case 19: goto RRETURN_19;
1781 case 20: goto RRETURN_20;
1782 case 21: goto RRETURN_21;
1783 case 22: goto RRETURN_22;
1784 case 24: goto RRETURN_24;
1785 case 26: goto RRETURN_26;
1786 case 27: goto RRETURN_27;
1787 case 28: goto RRETURN_28;
1788 case 29: goto RRETURN_29;
1789 case 30: goto RRETURN_30;
1790 case 31: goto RRETURN_31;
1791 case 38: goto RRETURN_38;
1792 case 40: goto RRETURN_40;
1793 case 42: goto RRETURN_42;
1794 case 44: goto RRETURN_44;
1795 case 48: goto RRETURN_48;
1796 case 52: goto RRETURN_52;
1797 }
1798
1799 ASSERT_NOT_REACHED();
1800 return matchError(JSRegExpErrorInternal, stack);
1801
1802 #endif
1803
1804 RETURN:
1805 return isMatch;
1806 }
1807
1808
1809 /*************************************************
1810 * Execute a Regular Expression *
1811 *************************************************/
1812
1813 /* This function applies a compiled re to a subject string and picks out
1814 portions of the string if it matches. Two elements in the vector are set for
1815 each substring: the offsets to the start and end of the substring.
1816
1817 Arguments:
1818 re points to the compiled expression
1819 extra_data points to extra data or is NULL
1820 subject points to the subject string
1821 length length of subject string (may contain binary zeros)
1822 start_offset where to start in the subject string
1823 options option bits
1824 offsets points to a vector of ints to be filled in with offsets
1825 offsetcount the number of elements in the vector
1826
1827 Returns: > 0 => success; value is the number of elements filled in
1828 = 0 => success, but offsets is not big enough
1829 -1 => failed to match
1830 < -1 => some kind of unexpected problem
1831 */
1832
1833 static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endS ubject, int first_byte, bool first_byte_caseless, bool useMultiLineFirstCharOpti mization, const UChar* originalSubjectStart)
1834 {
1835 // If first_byte is set, try scanning to the first instance of that byte
1836 // no need to try and match against any earlier part of the subject string.
1837 if (first_byte >= 0) {
1838 UChar first_char = first_byte;
1839 if (first_byte_caseless)
1840 while (subjectPtr < endSubject) {
1841 int c = *subjectPtr;
1842 if (c > 127)
1843 break;
1844 if (toLowerCase(c) == first_char)
1845 break;
1846 subjectPtr++;
1847 }
1848 else {
1849 while (subjectPtr < endSubject && *subjectPtr != first_char)
1850 subjectPtr++;
1851 }
1852 } else if (useMultiLineFirstCharOptimization) {
1853 /* Or to just after \n for a multiline match if possible */
1854 // I'm not sure why this != originalSubjectStart check is necessary -- e cs 11/18/07
1855 if (subjectPtr > originalSubjectStart) {
1856 while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
1857 subjectPtr++;
1858 }
1859 }
1860 }
1861
1862 static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* e ndSubject, int req_byte, int req_byte2, bool req_byte_caseless, bool hasFirstByt e, const UChar*& reqBytePtr)
1863 {
1864 /* If req_byte is set, we know that that character must appear in the subjec t
1865 for the match to succeed. If the first character is set, req_byte must be
1866 later in the subject; otherwise the test starts at the match point. This
1867 optimization can save a huge amount of backtracking in patterns with nested
1868 unlimited repeats that aren't going to match. Writing separate code for
1869 cased/caseless versions makes it go faster, as does using an autoincrement
1870 and backing off on a match.
1871
1872 HOWEVER: when the subject string is very, very long, searching to its end c an
1873 take a long time, and give bad performance on quite ordinary patterns. This
1874 showed up when somebody was matching /^C/ on a 32-megabyte string... so we
1875 don't do this when the string is sufficiently long.
1876 */
1877
1878 if (req_byte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
1879 const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
1880
1881 /* We don't need to repeat the search if we haven't yet reached the
1882 place we found it at last time. */
1883
1884 if (p > reqBytePtr) {
1885 if (req_byte_caseless) {
1886 while (p < endSubject) {
1887 int pp = *p++;
1888 if (pp == req_byte || pp == req_byte2) {
1889 p--;
1890 break;
1891 }
1892 }
1893 } else {
1894 while (p < endSubject) {
1895 if (*p++ == req_byte) {
1896 p--;
1897 break;
1898 }
1899 }
1900 }
1901
1902 /* If we can't find the required character, break the matching loop */
1903
1904 if (p >= endSubject)
1905 return true;
1906
1907 /* If we have found the required character, save the point where we
1908 found it, so that we don't search again next time round the loop if
1909 the start hasn't passed this character yet. */
1910
1911 reqBytePtr = p;
1912 }
1913 }
1914 return false;
1915 }
1916
1917 int jsRegExpExecute(const JSRegExp* re,
1918 const UChar* subject, int length, int start_offset, int* off sets,
1919 int offsetcount)
1920 {
1921 ASSERT(re);
1922 ASSERT(subject);
1923 ASSERT(offsetcount >= 0);
1924 ASSERT(offsets || offsetcount == 0);
1925
1926 MatchData matchBlock;
1927 matchBlock.startSubject = subject;
1928 matchBlock.endSubject = matchBlock.startSubject + length;
1929 const UChar* endSubject = matchBlock.endSubject;
1930
1931 matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption);
1932 matchBlock.ignoreCase = (re->options & IgnoreCaseOption);
1933
1934 /* If the expression has got more back references than the offsets supplied can
1935 hold, we get a temporary chunk of working store to use during the matching.
1936 Otherwise, we can use the vector supplied, rounding down its size to a mult iple
1937 of 3. */
1938
1939 int ocount = offsetcount - (offsetcount % 3);
1940
1941 // FIXME: This is lame that we have to second-guess our caller here.
1942 // The API should change to either fail-hard when we don't have enough offse t space
1943 // or that we shouldn't ask our callers to pre-allocate in the first place.
1944 bool using_temporary_offsets = false;
1945 if (re->top_backref > 0 && re->top_backref >= ocount/3) {
1946 ocount = re->top_backref * 3 + 3;
1947 matchBlock.offsetVector = new int[ocount];
1948 if (!matchBlock.offsetVector)
1949 return JSRegExpErrorNoMemory;
1950 using_temporary_offsets = true;
1951 } else
1952 matchBlock.offsetVector = offsets;
1953
1954 matchBlock.offsetEnd = ocount;
1955 matchBlock.offsetMax = (2*ocount)/3;
1956 matchBlock.offsetOverflow = false;
1957
1958 /* Compute the minimum number of offsets that we need to reset each time. Do ing
1959 this makes a huge difference to execution time when there aren't many brack ets
1960 in the pattern. */
1961
1962 int resetcount = 2 + re->top_bracket * 2;
1963 if (resetcount > offsetcount)
1964 resetcount = ocount;
1965
1966 /* Reset the working variable associated with each extraction. These should
1967 never be used unless previously set, but they get saved and restored, and s o we
1968 initialize them to avoid reading uninitialized locations. */
1969
1970 if (matchBlock.offsetVector) {
1971 int* iptr = matchBlock.offsetVector + ocount;
1972 int* iend = iptr - resetcount/2 + 1;
1973 while (--iptr >= iend)
1974 *iptr = -1;
1975 }
1976
1977 /* Set up the first character to match, if available. The first_byte value i s
1978 never set for an anchored regular expression, but the anchoring may be forc ed
1979 at run time, so we have to test for anchoring. The first char may be unset for
1980 an unanchored pattern, of course. If there's no first char and the pattern was
1981 studied, there may be a bitmap of possible first characters. */
1982
1983 bool first_byte_caseless = false;
1984 int first_byte = -1;
1985 if (re->options & UseFirstByteOptimizationOption) {
1986 first_byte = re->first_byte & 255;
1987 if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE)))
1988 first_byte = toLowerCase(first_byte);
1989 }
1990
1991 /* For anchored or unanchored matches, there may be a "last known required
1992 character" set. */
1993
1994 bool req_byte_caseless = false;
1995 int req_byte = -1;
1996 int req_byte2 = -1;
1997 if (re->options & UseRequiredByteOptimizationOption) {
1998 req_byte = re->req_byte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
1999 req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE);
2000 req_byte2 = flipCase(req_byte);
2001 }
2002
2003 /* Loop for handling unanchored repeated matching attempts; for anchored reg exs
2004 the loop runs just once. */
2005
2006 const UChar* startMatch = subject + start_offset;
2007 const UChar* reqBytePtr = startMatch - 1;
2008 bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByte OptimizationOption;
2009
2010 do {
2011 /* Reset the maximum number of extractions we might see. */
2012 if (matchBlock.offsetVector) {
2013 int* iptr = matchBlock.offsetVector;
2014 int* iend = iptr + resetcount;
2015 while (iptr < iend)
2016 *iptr++ = -1;
2017 }
2018
2019 tryFirstByteOptimization(startMatch, endSubject, first_byte, first_byte_ caseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_off set);
2020 if (tryRequiredByteOptimization(startMatch, endSubject, req_byte, req_by te2, req_byte_caseless, first_byte >= 0, reqBytePtr))
2021 break;
2022
2023 /* When a match occurs, substrings will be set for all internal extracti ons;
2024 we just need to set up the whole thing as substring 0 before returning. If
2025 there were too many extractions, set the return code to zero. In the ca se
2026 where we had to get some local store to hold offsets for backreferences , copy
2027 those back references that we can. In this case there need not be overf low
2028 if certain parts of the pattern were not used. */
2029
2030 /* The code starts after the JSRegExp block and the capture name table. */
2031 const unsigned char* start_code = (const unsigned char*)(re + 1);
2032
2033 int returnCode = match(startMatch, start_code, 2, matchBlock);
2034
2035 /* When the result is no match, advance the pointer to the next characte r
2036 and continue. */
2037 if (returnCode == 0) {
2038 startMatch++;
2039 continue;
2040 }
2041
2042 if (returnCode != 1) {
2043 ASSERT(returnCode == JSRegExpErrorHitLimit || returnCode == JSRegExp ErrorNoMemory);
2044 DPRINTF((">>>> error: returning %d\n", returnCode));
2045 return returnCode;
2046 }
2047
2048 /* We have a match! Copy the offset information from temporary store if
2049 necessary */
2050
2051 if (using_temporary_offsets) {
2052 if (offsetcount >= 4) {
2053 memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetcount - 2) * sizeof(int));
2054 DPRINTF(("Copied offsets from temporary memory\n"));
2055 }
2056 if (matchBlock.endOffsetTop > offsetcount)
2057 matchBlock.offsetOverflow = true;
2058
2059 DPRINTF(("Freeing temporary memory\n"));
2060 delete [] matchBlock.offsetVector;
2061 }
2062
2063 returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2 ;
2064
2065 if (offsetcount < 2)
2066 returnCode = 0;
2067 else {
2068 offsets[0] = startMatch - matchBlock.startSubject;
2069 offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject;
2070 }
2071
2072 DPRINTF((">>>> returning %d\n", returnCode));
2073 return returnCode;
2074 } while (!(re->options & IsAnchoredOption) && startMatch <= endSubject);
2075
2076 if (using_temporary_offsets) {
2077 DPRINTF(("Freeing temporary memory\n"));
2078 delete [] matchBlock.offsetVector;
2079 }
2080
2081 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
2082 return JSRegExpErrorNoMatch;
2083 }
2084
2085 } } // namespace v8::jscre
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698