Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(932)

Side by Side Diff: runtime/third_party/jscre/pcre_exec.cpp

Issue 1071713003: - Remove JSCRE from the runtime. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/
Patch Set: Created 5 years, 8 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch | Annotate | Revision Log
OLDNEW
(Empty)
1 /* This is JavaScriptCore's variant of the PCRE library. While this library
2 started out as a copy of PCRE, many of the features of PCRE have been
3 removed. This library now supports only the regular expression features
4 required by the JavaScript language specification, and has only the functions
5 needed by JavaScriptCore and the rest of WebKit.
6
7 Originally written by Philip Hazel
8 Copyright (c) 1997-2006 University of Cambridge
9 Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
10 Copyright (C) 2007 Eric Seidel <eric@webkit.org>
11
12 -----------------------------------------------------------------------------
13 Redistribution and use in source and binary forms, with or without
14 modification, are permitted provided that the following conditions are met:
15
16 * Redistributions of source code must retain the above copyright notice,
17 this list of conditions and the following disclaimer.
18
19 * Redistributions in binary form must reproduce the above copyright
20 notice, this list of conditions and the following disclaimer in the
21 documentation and/or other materials provided with the distribution.
22
23 * Neither the name of the University of Cambridge nor the names of its
24 contributors may be used to endorse or promote products derived from
25 this software without specific prior written permission.
26
27 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
28 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
31 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
32 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
33 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
34 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
35 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
36 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
37 POSSIBILITY OF SUCH DAMAGE.
38 -----------------------------------------------------------------------------
39 */
40
41 /* This module contains jsRegExpExecute(), the externally visible function
42 that does pattern matching using an NFA algorithm, following the rules from
43 the JavaScript specification. There are also some supporting functions. */
44
45 #include "config.h"
46
47 #include "pcre_internal.h"
48
49 #include "ASCIICType.h"
50
51 #include <ctype.h>
52 #include <limits.h>
53 #include <string.h> /* for memcpy */
54
55 #ifdef __GNUC__
56 #define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
57 //#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
58 #endif
59
60 /* Avoid warnings on Windows. */
61 #undef min
62 #undef max
63
64 namespace dart { namespace jscre {
65
66 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
67 typedef int ReturnLocation;
68 #else
69 typedef void* ReturnLocation;
70 #endif
71
72 /* Structure for building a chain of data for holding the values of
73 the subject pointer at the start of each bracket, used to detect when
74 an empty string has been matched by a bracket to break infinite loops. */
75 struct BracketChainNode {
76 BracketChainNode* previousBracket;
77 const UChar* bracketStart;
78 };
79
80 struct MatchFrame {
81 ReturnLocation returnLocation;
82 struct MatchFrame* previousFrame;
83
84 /* Function arguments that may change */
85 struct {
86 const UChar* subjectPtr;
87 const unsigned char* instructionPtr;
88 int offsetTop;
89 BracketChainNode* bracketChain;
90 } args;
91
92
93 /* PCRE uses "fake" recursion built off of gotos, thus
94 stack-based local variables are not safe to use. Instead we have to
95 store local variables on the current MatchFrame. */
96 struct {
97 const unsigned char* data;
98 const unsigned char* startOfRepeatingBracket;
99 const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stas h away a subjectPtr here for later compare
100 const unsigned char* instructionPtrAtStartOfOnce;
101
102 int repeatOthercase;
103
104 int ctype;
105 int fc;
106 int fi;
107 int length;
108 int max;
109 int number;
110 int offset;
111 int saveOffset1;
112 int saveOffset2;
113 int saveOffset3;
114
115 BracketChainNode bracketChainNode;
116 } locals;
117 };
118
119 /* Structure for passing "static" information around between the functions
120 doing traditional NFA matching, so that they are thread-safe. */
121
122 struct MatchData {
123 int* offsetVector; /* Offset vector */
124 int offsetEnd; /* One past the end */
125 int offsetMax; /* The maximum usable for return data */
126 bool offsetOverflow; /* Set if too many extractions */
127 const UChar* startSubject; /* Start of the subject string */
128 const UChar* endSubject; /* End of the subject string */
129 const UChar* endMatchPtr; /* Subject position at end match */
130 int endOffsetTop; /* Highwater mark at end of match */
131 bool multiline;
132 bool ignoreCase;
133 };
134
135 /* The maximum remaining length of subject we are prepared to search for a
136 req_byte match. */
137
138 #define REQ_BYTE_MAX 1000
139
140 /* The below limit restricts the number of "recursive" match calls in order to
141 avoid spending exponential time on complex regular expressions. */
142
143 static const unsigned matchLimit = 100000;
144
145 #ifdef DEBUG
146 /*************************************************
147 * Debugging function to print chars *
148 *************************************************/
149
150 /* Print a sequence of chars in printable format, stopping at the end of the
151 subject if the requested.
152
153 Arguments:
154 p points to characters
155 length number to print
156 isSubject true if printing from within md.startSubject
157 md pointer to matching data block, if isSubject is true
158 */
159
160 static void pchars(const UChar* p, int length, bool isSubject, const MatchData& md)
161 {
162 if (isSubject && length > md.endSubject - p)
163 length = md.endSubject - p;
164 while (length-- > 0) {
165 int c;
166 if (isprint(c = *(p++)))
167 printf("%c", c);
168 else if (c < 256)
169 printf("\\x%02x", c);
170 else
171 printf("\\x{%x}", c);
172 }
173 }
174 #endif
175
176 /*************************************************
177 * Match a back-reference *
178 *************************************************/
179
180 /* If a back reference hasn't been set, the length that is passed is greater
181 than the number of characters left in the string, so the match fails.
182
183 Arguments:
184 offset index into the offset vector
185 subjectPtr points into the subject
186 length length to be matched
187 md points to match data block
188
189 Returns: true if matched
190 */
191
192 static bool matchRef(int offset, const UChar* subjectPtr, int length, const Matc hData& md)
193 {
194 const UChar* p = md.startSubject + md.offsetVector[offset];
195
196 #ifdef DEBUG
197 if (subjectPtr >= md.endSubject)
198 printf("matching subject <null>");
199 else {
200 printf("matching subject ");
201 pchars(subjectPtr, length, true, md);
202 }
203 printf(" against backref ");
204 pchars(p, length, false, md);
205 printf("\n");
206 #endif
207
208 /* Always fail if not enough characters left */
209
210 if (length > md.endSubject - subjectPtr)
211 return false;
212
213 /* Separate the caselesss case for speed */
214
215 if (md.ignoreCase) {
216 while (length-- > 0) {
217 UChar c = *p++;
218 int othercase = kjs_pcre_ucp_othercase(c);
219 UChar d = *subjectPtr++;
220 if (c != d && othercase != d)
221 return false;
222 }
223 }
224 else {
225 while (length-- > 0)
226 if (*p++ != *subjectPtr++)
227 return false;
228 }
229
230 return true;
231 }
232
233 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
234
235 /* Use numbered labels and switch statement at the bottom of the match function. */
236
237 #define RMATCH_WHERE(num) num
238 #define RRETURN_LABEL RRETURN_SWITCH
239
240 #else
241
242 /* Use GCC's computed goto extension. */
243
244 /* For one test case this is more than 40% faster than the switch statement.
245 We could avoid the use of the num argument entirely by using local labels,
246 but using it for the GCC case as well as the non-GCC case allows us to share
247 a bit more code and notice if we use conflicting numbers.*/
248
249 #define RMATCH_WHERE(num) &&RRETURN_##num
250 #define RRETURN_LABEL *stack.currentFrame->returnLocation
251
252 #endif
253
254 #define RECURSIVE_MATCH_COMMON(num) \
255 goto RECURSE;\
256 RRETURN_##num: \
257 stack.popCurrentFrame();
258
259 #define RECURSIVE_MATCH(num, ra, rb) \
260 do { \
261 stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
262 RECURSIVE_MATCH_COMMON(num) \
263 } while (0)
264
265 #define RECURSIVE_MATCH_STARTNG_NEW_GROUP(num, ra, rb) \
266 do { \
267 stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
268 startNewGroup(stack.currentFrame); \
269 RECURSIVE_MATCH_COMMON(num) \
270 } while (0)
271
272 #define RRETURN goto RRETURN_LABEL
273
274 #define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)
275
276 /*************************************************
277 * Match from current position *
278 *************************************************/
279
280 /* On entry instructionPtr points to the first opcode, and subjectPtr to the fir st character
281 in the subject string, while substringStart holds the value of subjectPtr at the start of the
282 last bracketed group - used for breaking infinite loops matching zero-length
283 strings. This function is called recursively in many circumstances. Whenever it
284 returns a negative (error) response, the outer match() call must also return the
285 same response.
286
287 Arguments:
288 subjectPtr pointer in subject
289 instructionPtr position in code
290 offsetTop current top pointer
291 md pointer to "static" info for the match
292
293 Returns: 1 if matched ) these values are >= 0
294 0 if failed to match )
295 a negative error value if aborted by an error condition
296 (e.g. stopped by repeated call or recursion limit)
297 */
298
299 static const unsigned FRAMES_ON_STACK = 16;
300
301 struct MatchStack {
302 MatchStack()
303 : framesEnd(frames + FRAMES_ON_STACK)
304 , currentFrame(frames)
305 , size(1) // match() creates accesses the first frame w/o calling pushNe wFrame
306 {
307 ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK);
308 }
309
310 MatchFrame frames[FRAMES_ON_STACK];
311 MatchFrame* framesEnd;
312 MatchFrame* currentFrame;
313 unsigned size;
314
315 inline bool canUseStackBufferForNextFrame()
316 {
317 return size < FRAMES_ON_STACK;
318 }
319
320 inline MatchFrame* allocateNextFrame()
321 {
322 if (canUseStackBufferForNextFrame())
323 return currentFrame + 1;
324 return new MatchFrame;
325 }
326
327 inline void pushNewFrame(const unsigned char* instructionPtr, BracketChainNo de* bracketChain, ReturnLocation returnLocation)
328 {
329 MatchFrame* newframe = allocateNextFrame();
330 newframe->previousFrame = currentFrame;
331
332 newframe->args.subjectPtr = currentFrame->args.subjectPtr;
333 newframe->args.offsetTop = currentFrame->args.offsetTop;
334 newframe->args.instructionPtr = instructionPtr;
335 newframe->args.bracketChain = bracketChain;
336 newframe->returnLocation = returnLocation;
337 size++;
338
339 currentFrame = newframe;
340 }
341
342 inline void popCurrentFrame()
343 {
344 MatchFrame* oldFrame = currentFrame;
345 currentFrame = currentFrame->previousFrame;
346 if (size > FRAMES_ON_STACK)
347 delete oldFrame;
348 size--;
349 }
350
351 void popAllFrames()
352 {
353 while (size)
354 popCurrentFrame();
355 }
356 };
357
358 static int matchError(int errorCode, MatchStack& stack)
359 {
360 stack.popAllFrames();
361 return errorCode;
362 }
363
364 /* Get the next UTF-8 character, not advancing the pointer, incrementing length
365 if there are extra bytes. This is called when we know we are in UTF-8 mode. */
366
367 static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* su bjectPtr, int& len)
368 {
369 c = *subjectPtr;
370 if ((c & 0xc0) == 0xc0) {
371 int gcaa = kjs_pcre_utf8_table4[c & 0x3f]; /* Number of additional byte s */
372 int gcss = 6 * gcaa;
373 c = (c & kjs_pcre_utf8_table3[gcaa]) << gcss;
374 for (int gcii = 1; gcii <= gcaa; gcii++) {
375 gcss -= 6;
376 c |= (subjectPtr[gcii] & 0x3f) << gcss;
377 }
378 len += gcaa;
379 }
380 }
381
382 static inline void startNewGroup(MatchFrame* currentFrame)
383 {
384 /* At the start of a bracketed group, add the current subject pointer to the
385 stack of such pointers, to be re-instated at the end of the group when we h it
386 the closing ket. When match() is called in other circumstances, we don't ad d to
387 this stack. */
388
389 currentFrame->locals.bracketChainNode.previousBracket = currentFrame->args.b racketChain;
390 currentFrame->locals.bracketChainNode.bracketStart = currentFrame->args.subj ectPtr;
391 currentFrame->args.bracketChain = &currentFrame->locals.bracketChainNode;
392 }
393
394 // FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusing
395 static inline void repeatInformationFromInstructionOffset(short instructionOffse t, bool& minimize, int& minimumRepeats, int& maximumRepeats)
396 {
397 // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_ NOTSTAR
398 static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
399 static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
400
401 ASSERT(instructionOffset >= 0);
402 ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
403
404 minimize = (instructionOffset & 1); // this assumes ordering: Instruction, M inimizeInstruction, Instruction2, MinimizeInstruction2
405 minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
406 maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
407 }
408
409 static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, i nt offsetTop, MatchData& md)
410 {
411 bool isMatch = false;
412 int min;
413 bool minimize = false; /* Initialization not really needed, but some compile rs think so. */
414 unsigned matchCount = 0;
415
416 MatchStack stack;
417
418 /* The opcode jump table. */
419 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
420 #define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
421 static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
422 #undef EMIT_JUMP_TABLE_ENTRY
423 #endif
424
425 /* One-time setup of the opcode jump table. */
426 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
427 for (int i = 255; !opcodeJumpTable[i]; i--)
428 opcodeJumpTable[i] = &&CAPTURING_BRACKET;
429 #endif
430
431 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
432 // Shark shows this as a hot line
433 // Using a static const here makes this line disappear, but makes later acce ss hotter (not sure why)
434 stack.currentFrame->returnLocation = &&RETURN;
435 #else
436 stack.currentFrame->returnLocation = 0;
437 #endif
438 stack.currentFrame->args.subjectPtr = subjectPtr;
439 stack.currentFrame->args.instructionPtr = instructionPtr;
440 stack.currentFrame->args.offsetTop = offsetTop;
441 stack.currentFrame->args.bracketChain = 0;
442 startNewGroup(stack.currentFrame);
443
444 /* This is where control jumps back to to effect "recursion" */
445
446 RECURSE:
447 if (++matchCount > matchLimit)
448 return matchError(JSRegExpErrorHitLimit, stack);
449
450 /* Now start processing the operations. */
451
452 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
453 while (true)
454 #endif
455 {
456
457 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
458 #define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
459 #define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionP tr]
460 #else
461 #define BEGIN_OPCODE(opcode) case OP_##opcode
462 #define NEXT_OPCODE continue
463 #endif
464
465 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
466 NEXT_OPCODE;
467 #else
468 switch (*stack.currentFrame->args.instructionPtr)
469 #endif
470 {
471 /* Non-capturing bracket: optimized */
472
473 BEGIN_OPCODE(BRA):
474 NON_CAPTURING_BRACKET:
475 DPRINTF(("start bracket 0\n"));
476 do {
477 RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->arg s.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
478 if (isMatch)
479 RRETURN;
480 stack.currentFrame->args.instructionPtr += getLinkValue(stac k.currentFrame->args.instructionPtr + 1);
481 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
482 DPRINTF(("bracket 0 failed\n"));
483 RRETURN;
484
485 /* Skip over large extraction number data if encountered. */
486
487 BEGIN_OPCODE(BRANUMBER):
488 stack.currentFrame->args.instructionPtr += 3;
489 NEXT_OPCODE;
490
491 /* End of the pattern. */
492
493 BEGIN_OPCODE(END):
494 md.endMatchPtr = stack.currentFrame->args.subjectPtr; / * Record where we ended */
495 md.endOffsetTop = stack.currentFrame->args.offsetTop; /* and h ow many extracts were taken */
496 isMatch = true;
497 RRETURN;
498
499 /* Assertion brackets. Check the alternative branches in turn - the
500 matching won't pass the KET for an assertion. If any one branch mat ches,
501 the assertion is true. Lookbehind assertions have an OP_REVERSE ite m at the
502 start of each branch to move the current point backwards, so the co de at
503 this level is identical to the lookahead case. */
504
505 BEGIN_OPCODE(ASSERT):
506 do {
507 RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->arg s.instructionPtr + 1 + LINK_SIZE, NULL);
508 if (isMatch)
509 break;
510 stack.currentFrame->args.instructionPtr += getLinkValue(stac k.currentFrame->args.instructionPtr + 1);
511 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
512 if (*stack.currentFrame->args.instructionPtr == OP_KET)
513 RRETURN_NO_MATCH;
514
515 /* Continue from after the assertion, updating the offsets high water
516 mark, since extracts may have been taken during the assertion. */
517
518 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
519 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
520 stack.currentFrame->args.offsetTop = md.endOffsetTop;
521 NEXT_OPCODE;
522
523 /* Negative assertion: all branches must fail to match */
524
525 BEGIN_OPCODE(ASSERT_NOT):
526 do {
527 RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->arg s.instructionPtr + 1 + LINK_SIZE, NULL);
528 if (isMatch)
529 RRETURN_NO_MATCH;
530 stack.currentFrame->args.instructionPtr += getLinkValue(stac k.currentFrame->args.instructionPtr + 1);
531 } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
532
533 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
534 NEXT_OPCODE;
535
536 /* An alternation is the end of a branch; scan along to find the end of the
537 bracketed group and go to there. */
538
539 BEGIN_OPCODE(ALT):
540 advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
541 NEXT_OPCODE;
542
543 /* BRAZERO and BRAMINZERO occur just before a bracket group, indicat ing
544 that it may occur zero times. It may repeat infinitely, or not at a ll -
545 i.e. it could be ()* or ()? in the pattern. Brackets with fixed upp er
546 repeat limits are compiled as a number of copies, with the optional ones
547 preceded by BRAZERO or BRAMINZERO. */
548
549 BEGIN_OPCODE(BRAZERO): {
550 stack.currentFrame->locals.startOfRepeatingBracket = stack.curre ntFrame->args.instructionPtr + 1;
551 RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals .startOfRepeatingBracket, stack.currentFrame->args.bracketChain);
552 if (isMatch)
553 RRETURN;
554 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatin gBracket);
555 stack.currentFrame->args.instructionPtr = stack.currentFrame->lo cals.startOfRepeatingBracket + 1 + LINK_SIZE;
556 NEXT_OPCODE;
557 }
558
559 BEGIN_OPCODE(BRAMINZERO): {
560 stack.currentFrame->locals.startOfRepeatingBracket = stack.curre ntFrame->args.instructionPtr + 1;
561 advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatin gBracket);
562 RECURSIVE_MATCH_STARTNG_NEW_GROUP(15, stack.currentFrame->locals .startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain) ;
563 if (isMatch)
564 RRETURN;
565 stack.currentFrame->args.instructionPtr++;
566 NEXT_OPCODE;
567 }
568
569 /* End of a group, repeated or non-repeating. If we are at the end o f
570 an assertion "group", stop matching and return 1, but record the
571 current high water mark for use by positive assertions. Do this als o
572 for the "once" (not-backup up) groups. */
573
574 BEGIN_OPCODE(KET):
575 BEGIN_OPCODE(KETRMIN):
576 BEGIN_OPCODE(KETRMAX):
577 stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.c urrentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instruc tionPtr + 1);
578 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stac k.currentFrame->args.bracketChain->bracketStart;
579
580 /* Back up the stack of bracket start pointers. */
581
582 stack.currentFrame->args.bracketChain = stack.currentFrame->args .bracketChain->previousBracket;
583
584 if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == O P_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT _NOT) {
585 md.endOffsetTop = stack.currentFrame->args.offsetTop;
586 isMatch = true;
587 RRETURN;
588 }
589
590 /* In all other cases except a conditional group we have to chec k the
591 group number back at the start and if necessary complete handli ng an
592 extraction by setting the offsets and bumping the high water ma rk. */
593
594 stack.currentFrame->locals.number = *stack.currentFrame->locals. instructionPtrAtStartOfOnce - OP_BRA;
595
596 /* For extended extraction brackets (large number), we have to f ish out
597 the number from a dummy opcode at the start. */
598
599 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
600 stack.currentFrame->locals.number = get2ByteValue(stack.curr entFrame->locals.instructionPtrAtStartOfOnce + 2 + LINK_SIZE);
601 stack.currentFrame->locals.offset = stack.currentFrame->locals.n umber << 1;
602
603 #ifdef DEBUG
604 printf("end bracket %d", stack.currentFrame->locals.number);
605 printf("\n");
606 #endif
607
608 /* Test for a numbered group. This includes groups called as a r esult
609 of recursion. Note that whole-pattern recursion is coded as a r ecurse
610 into group 0, so it won't be picked up here. Instead, we catch it when
611 the OP_END is reached. */
612
613 if (stack.currentFrame->locals.number > 0) {
614 if (stack.currentFrame->locals.offset >= md.offsetMax)
615 md.offsetOverflow = true;
616 else {
617 md.offsetVector[stack.currentFrame->locals.offset] =
618 md.offsetVector[md.offsetEnd - stack.currentFrame->local s.number];
619 md.offsetVector[stack.currentFrame->locals.offset+1] = s tack.currentFrame->args.subjectPtr - md.startSubject;
620 if (stack.currentFrame->args.offsetTop <= stack.currentF rame->locals.offset)
621 stack.currentFrame->args.offsetTop = stack.currentFr ame->locals.offset + 2;
622 }
623 }
624
625 /* For a non-repeating ket, just continue at this level. This al so
626 happens for a repeating ket if no characters were matched in th e group.
627 This is the forcible breaking of infinite loops as implemented in Perl
628 5.005. If there is an options reset, it will get obeyed in the normal
629 course of events. */
630
631 if (*stack.currentFrame->args.instructionPtr == OP_KET || stack. currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfI nstruction) {
632 stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
633 NEXT_OPCODE;
634 }
635
636 /* The repeating kets try the rest of the pattern or restart fro m the
637 preceding bracket, in the appropriate order. */
638
639 if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
640 RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
641 if (isMatch)
642 RRETURN;
643 RECURSIVE_MATCH_STARTNG_NEW_GROUP(17, stack.currentFrame->lo cals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
644 if (isMatch)
645 RRETURN;
646 } else { /* OP_KETRMAX */
647 RECURSIVE_MATCH_STARTNG_NEW_GROUP(18, stack.currentFrame->lo cals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
648 if (isMatch)
649 RRETURN;
650 RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
651 if (isMatch)
652 RRETURN;
653 }
654 RRETURN;
655
656 /* Start of subject. */
657
658 BEGIN_OPCODE(CIRC):
659 if (stack.currentFrame->args.subjectPtr != md.startSubject)
660 RRETURN_NO_MATCH;
661 stack.currentFrame->args.instructionPtr++;
662 NEXT_OPCODE;
663
664 /* After internal newline if multiline. */
665
666 BEGIN_OPCODE(BOL):
667 if (stack.currentFrame->args.subjectPtr != md.startSubject && !i sNewline(stack.currentFrame->args.subjectPtr[-1]))
668 RRETURN_NO_MATCH;
669 stack.currentFrame->args.instructionPtr++;
670 NEXT_OPCODE;
671
672 /* End of subject. */
673
674 BEGIN_OPCODE(DOLL):
675 if (stack.currentFrame->args.subjectPtr < md.endSubject)
676 RRETURN_NO_MATCH;
677 stack.currentFrame->args.instructionPtr++;
678 NEXT_OPCODE;
679
680 /* Before internal newline if multiline. */
681
682 BEGIN_OPCODE(EOL):
683 if (stack.currentFrame->args.subjectPtr < md.endSubject && !isNe wline(*stack.currentFrame->args.subjectPtr))
684 RRETURN_NO_MATCH;
685 stack.currentFrame->args.instructionPtr++;
686 NEXT_OPCODE;
687
688 /* Word boundary assertions */
689
690 BEGIN_OPCODE(NOT_WORD_BOUNDARY):
691 BEGIN_OPCODE(WORD_BOUNDARY): {
692 bool currentCharIsWordChar = false;
693 bool previousCharIsWordChar = false;
694
695 if (stack.currentFrame->args.subjectPtr > md.startSubject)
696 previousCharIsWordChar = isWordChar(stack.currentFrame->args .subjectPtr[-1]);
697 if (stack.currentFrame->args.subjectPtr < md.endSubject)
698 currentCharIsWordChar = isWordChar(*stack.currentFrame->args .subjectPtr);
699
700 /* Now see if the situation is what we want */
701 bool wordBoundaryDesired = (*stack.currentFrame->args.instructio nPtr++ == OP_WORD_BOUNDARY);
702 if (wordBoundaryDesired ? currentCharIsWordChar == previousCharI sWordChar : currentCharIsWordChar != previousCharIsWordChar)
703 RRETURN_NO_MATCH;
704 NEXT_OPCODE;
705 }
706
707 /* Match a single character type; inline for speed */
708
709 BEGIN_OPCODE(NOT_NEWLINE):
710 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
711 RRETURN_NO_MATCH;
712 if (isNewline(*stack.currentFrame->args.subjectPtr++))
713 RRETURN_NO_MATCH;
714 stack.currentFrame->args.instructionPtr++;
715 NEXT_OPCODE;
716
717 BEGIN_OPCODE(NOT_DIGIT):
718 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
719 RRETURN_NO_MATCH;
720 if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
721 RRETURN_NO_MATCH;
722 stack.currentFrame->args.instructionPtr++;
723 NEXT_OPCODE;
724
725 BEGIN_OPCODE(DIGIT):
726 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
727 RRETURN_NO_MATCH;
728 if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
729 RRETURN_NO_MATCH;
730 stack.currentFrame->args.instructionPtr++;
731 NEXT_OPCODE;
732
733 BEGIN_OPCODE(NOT_WHITESPACE):
734 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
735 RRETURN_NO_MATCH;
736 if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
737 RRETURN_NO_MATCH;
738 stack.currentFrame->args.instructionPtr++;
739 NEXT_OPCODE;
740
741 BEGIN_OPCODE(WHITESPACE):
742 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
743 RRETURN_NO_MATCH;
744 if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
745 RRETURN_NO_MATCH;
746 stack.currentFrame->args.instructionPtr++;
747 NEXT_OPCODE;
748
749 BEGIN_OPCODE(NOT_WORDCHAR):
750 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
751 RRETURN_NO_MATCH;
752 if (isWordChar(*stack.currentFrame->args.subjectPtr++))
753 RRETURN_NO_MATCH;
754 stack.currentFrame->args.instructionPtr++;
755 NEXT_OPCODE;
756
757 BEGIN_OPCODE(WORDCHAR):
758 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
759 RRETURN_NO_MATCH;
760 if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
761 RRETURN_NO_MATCH;
762 stack.currentFrame->args.instructionPtr++;
763 NEXT_OPCODE;
764
765 /* Match a back reference, possibly repeatedly. Look past the end of the
766 item to see if there is repeat information following. The code is s imilar
767 to that for character classes, but repeated for efficiency. Then ob ey
768 similar code to character type repeats - written out again for spee d.
769 However, if the referenced string is the empty string, always treat
770 it as matched, any number of times (otherwise there could be infini te
771 loops). */
772
773 BEGIN_OPCODE(REF):
774 stack.currentFrame->locals.offset = get2ByteValue(stack.currentF rame->args.instructionPtr + 1) << 1; /* Doubled ref number */
775 stack.currentFrame->args.instructionPtr += 3; /* Advance past item */
776
777 /* If the reference is unset, set the length to be longer than t he amount
778 of subject left; this ensures that every attempt at a match fai ls. We
779 can't just fail here, because of the possibility of quantifiers with zero
780 minima. */
781
782 if (stack.currentFrame->locals.offset >= stack.currentFrame->arg s.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
783 stack.currentFrame->locals.length = 0;
784 else
785 stack.currentFrame->locals.length = md.offsetVector[stack.cu rrentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset ];
786
787 /* Set up for repetition, or handle the non-repeated case */
788
789 switch (*stack.currentFrame->args.instructionPtr) {
790 case OP_CRSTAR:
791 case OP_CRMINSTAR:
792 case OP_CRPLUS:
793 case OP_CRMINPLUS:
794 case OP_CRQUERY:
795 case OP_CRMINQUERY:
796 repeatInformationFromInstructionOffset(*stack.currentFra me->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals .max);
797 break;
798
799 case OP_CRRANGE:
800 case OP_CRMINRANGE:
801 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
802 min = get2ByteValue(stack.currentFrame->args.instruction Ptr + 1);
803 stack.currentFrame->locals.max = get2ByteValue(stack.cur rentFrame->args.instructionPtr + 3);
804 if (stack.currentFrame->locals.max == 0)
805 stack.currentFrame->locals.max = INT_MAX;
806 stack.currentFrame->args.instructionPtr += 5;
807 break;
808
809 default: /* No repeat follows */
810 if (!matchRef(stack.currentFrame->locals.offset, stack.c urrentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
811 RRETURN_NO_MATCH;
812 stack.currentFrame->args.subjectPtr += stack.currentFram e->locals.length;
813 NEXT_OPCODE;
814 }
815
816 /* If the length of the reference is zero, just continue with th e
817 main loop. */
818
819 if (stack.currentFrame->locals.length == 0)
820 NEXT_OPCODE;
821
822 /* First, ensure the minimum number of matches are present. */
823
824 for (int i = 1; i <= min; i++) {
825 if (!matchRef(stack.currentFrame->locals.offset, stack.curre ntFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
826 RRETURN_NO_MATCH;
827 stack.currentFrame->args.subjectPtr += stack.currentFrame->l ocals.length;
828 }
829
830 /* If min = max, continue at the same level without recursion.
831 They are not both allowed to be zero. */
832
833 if (min == stack.currentFrame->locals.max)
834 NEXT_OPCODE;
835
836 /* If minimizing, keep trying and advancing the pointer */
837
838 if (minimize) {
839 for (stack.currentFrame->locals.fi = min;; stack.currentFram e->locals.fi++) {
840 RECURSIVE_MATCH(20, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
841 if (isMatch)
842 RRETURN;
843 if (stack.currentFrame->locals.fi >= stack.currentFrame- >locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame-> args.subjectPtr, stack.currentFrame->locals.length, md))
844 RRETURN;
845 stack.currentFrame->args.subjectPtr += stack.currentFram e->locals.length;
846 }
847 /* Control never reaches here */
848 }
849
850 /* If maximizing, find the longest string and work backwards */
851
852 else {
853 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
854 for (int i = min; i < stack.currentFrame->locals.max; i++) {
855 if (!matchRef(stack.currentFrame->locals.offset, stack.c urrentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
856 break;
857 stack.currentFrame->args.subjectPtr += stack.currentFram e->locals.length;
858 }
859 while (stack.currentFrame->args.subjectPtr >= stack.currentF rame->locals.subjectPtrAtStartOfInstruction) {
860 RECURSIVE_MATCH(21, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
861 if (isMatch)
862 RRETURN;
863 stack.currentFrame->args.subjectPtr -= stack.currentFram e->locals.length;
864 }
865 RRETURN_NO_MATCH;
866 }
867 /* Control never reaches here */
868
869 /* Match a bit-mapped character class, possibly repeatedly. This op code is
870 used when all the characters in the class have values in the range 0-255,
871 and either the matching is caseful, or the characters are in the ra nge
872 0-127 when UTF-8 processing is enabled. The only difference between
873 OP_CLASS and OP_NCLASS occurs when a data character outside the ran ge is
874 encountered.
875
876 First, look past the end of the item to see if there is repeat info rmation
877 following. Then obey similar code to character type repeats - writt en out
878 again for speed. */
879
880 BEGIN_OPCODE(NCLASS):
881 BEGIN_OPCODE(CLASS):
882 stack.currentFrame->locals.data = stack.currentFrame->args.instr uctionPtr + 1; /* Save for matching */
883 stack.currentFrame->args.instructionPtr += 33; /* Advance past the item */
884
885 switch (*stack.currentFrame->args.instructionPtr) {
886 case OP_CRSTAR:
887 case OP_CRMINSTAR:
888 case OP_CRPLUS:
889 case OP_CRMINPLUS:
890 case OP_CRQUERY:
891 case OP_CRMINQUERY:
892 repeatInformationFromInstructionOffset(*stack.currentFra me->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals .max);
893 break;
894
895 case OP_CRRANGE:
896 case OP_CRMINRANGE:
897 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
898 min = get2ByteValue(stack.currentFrame->args.instruction Ptr + 1);
899 stack.currentFrame->locals.max = get2ByteValue(stack.cur rentFrame->args.instructionPtr + 3);
900 if (stack.currentFrame->locals.max == 0)
901 stack.currentFrame->locals.max = INT_MAX;
902 stack.currentFrame->args.instructionPtr += 5;
903 break;
904
905 default: /* No repeat follows */
906 min = stack.currentFrame->locals.max = 1;
907 break;
908 }
909
910 /* First, ensure the minimum number of matches are present. */
911
912 for (int i = 1; i <= min; i++) {
913 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
914 RRETURN_NO_MATCH;
915 int c = *stack.currentFrame->args.subjectPtr++;
916 if (c > 255) {
917 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
918 RRETURN_NO_MATCH;
919 } else {
920 if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
921 RRETURN_NO_MATCH;
922 }
923 }
924
925 /* If max == min we can continue with the main loop without the
926 need to recurse. */
927
928 if (min == stack.currentFrame->locals.max)
929 NEXT_OPCODE;
930
931 /* If minimizing, keep testing the rest of the expression and ad vancing
932 the pointer while it matches the class. */
933 if (minimize) {
934 for (stack.currentFrame->locals.fi = min;; stack.currentFram e->locals.fi++) {
935 RECURSIVE_MATCH(22, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
936 if (isMatch)
937 RRETURN;
938 if (stack.currentFrame->locals.fi >= stack.currentFrame- >locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
939 RRETURN;
940 int c = *stack.currentFrame->args.subjectPtr++;
941 if (c > 255) {
942 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
943 RRETURN;
944 } else {
945 if ((stack.currentFrame->locals.data[c/8] & (1 << (c &7))) == 0)
946 RRETURN;
947 }
948 }
949 /* Control never reaches here */
950 }
951 /* If maximizing, find the longest possible run, then work backw ards. */
952 else {
953 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
954
955 for (int i = min; i < stack.currentFrame->locals.max; i++) {
956 if (stack.currentFrame->args.subjectPtr >= md.endSubject )
957 break;
958 int c = *stack.currentFrame->args.subjectPtr;
959 if (c > 255) {
960 if (stack.currentFrame->locals.data[-1] == OP_CLASS)
961 break;
962 } else {
963 if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
964 break;
965 }
966 ++stack.currentFrame->args.subjectPtr;
967 }
968 for (;;) {
969 RECURSIVE_MATCH(24, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
970 if (isMatch)
971 RRETURN;
972 if (stack.currentFrame->args.subjectPtr-- == stack.curre ntFrame->locals.subjectPtrAtStartOfInstruction)
973 break; /* Stop if tried at original pos */
974 }
975
976 RRETURN;
977 }
978 /* Control never reaches here */
979
980 /* Match an extended character class. */
981
982 BEGIN_OPCODE(XCLASS):
983 stack.currentFrame->locals.data = stack.currentFrame->args.instr uctionPtr + 1 + LINK_SIZE; /* Save for matching */
984 stack.currentFrame->args.instructionPtr += getLinkValue(stack.cu rrentFrame->args.instructionPtr + 1); /* Advance past the i tem */
985
986 switch (*stack.currentFrame->args.instructionPtr) {
987 case OP_CRSTAR:
988 case OP_CRMINSTAR:
989 case OP_CRPLUS:
990 case OP_CRMINPLUS:
991 case OP_CRQUERY:
992 case OP_CRMINQUERY:
993 repeatInformationFromInstructionOffset(*stack.currentFra me->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals .max);
994 break;
995
996 case OP_CRRANGE:
997 case OP_CRMINRANGE:
998 minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
999 min = get2ByteValue(stack.currentFrame->args.instruction Ptr + 1);
1000 stack.currentFrame->locals.max = get2ByteValue(stack.cur rentFrame->args.instructionPtr + 3);
1001 if (stack.currentFrame->locals.max == 0)
1002 stack.currentFrame->locals.max = INT_MAX;
1003 stack.currentFrame->args.instructionPtr += 5;
1004 break;
1005
1006 default: /* No repeat follows */
1007 min = stack.currentFrame->locals.max = 1;
1008 }
1009
1010 /* First, ensure the minimum number of matches are present. */
1011
1012 for (int i = 1; i <= min; i++) {
1013 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1014 RRETURN_NO_MATCH;
1015 int c = *stack.currentFrame->args.subjectPtr++;
1016 if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
1017 RRETURN_NO_MATCH;
1018 }
1019
1020 /* If max == min we can continue with the main loop without the
1021 need to recurse. */
1022
1023 if (min == stack.currentFrame->locals.max)
1024 NEXT_OPCODE;
1025
1026 /* If minimizing, keep testing the rest of the expression and ad vancing
1027 the pointer while it matches the class. */
1028
1029 if (minimize) {
1030 for (stack.currentFrame->locals.fi = min;; stack.currentFram e->locals.fi++) {
1031 RECURSIVE_MATCH(26, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
1032 if (isMatch)
1033 RRETURN;
1034 if (stack.currentFrame->locals.fi >= stack.currentFrame- >locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1035 RRETURN;
1036 int c = *stack.currentFrame->args.subjectPtr++;
1037 if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data) )
1038 RRETURN;
1039 }
1040 /* Control never reaches here */
1041 }
1042
1043 /* If maximizing, find the longest possible run, then work backw ards. */
1044
1045 else {
1046 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
1047 for (int i = min; i < stack.currentFrame->locals.max; i++) {
1048 if (stack.currentFrame->args.subjectPtr >= md.endSubject )
1049 break;
1050 int c = *stack.currentFrame->args.subjectPtr;
1051 if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data) )
1052 break;
1053 ++stack.currentFrame->args.subjectPtr;
1054 }
1055 for(;;) {
1056 RECURSIVE_MATCH(27, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
1057 if (isMatch)
1058 RRETURN;
1059 if (stack.currentFrame->args.subjectPtr-- == stack.curre ntFrame->locals.subjectPtrAtStartOfInstruction)
1060 break; /* Stop if tried at original pos */
1061 }
1062 RRETURN;
1063 }
1064
1065 /* Control never reaches here */
1066
1067 /* Match a single character, casefully */
1068
1069 BEGIN_OPCODE(CHAR):
1070 stack.currentFrame->locals.length = 1;
1071 stack.currentFrame->args.instructionPtr++;
1072 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, sta ck.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1073 stack.currentFrame->args.instructionPtr += stack.currentFrame->l ocals.length;
1074 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1075 RRETURN_NO_MATCH;
1076 if (stack.currentFrame->locals.fc != *stack.currentFrame->args.s ubjectPtr++)
1077 RRETURN_NO_MATCH;
1078 NEXT_OPCODE;
1079
1080 /* Match a single character, caselessly */
1081
1082 BEGIN_OPCODE(CHAR_IGNORING_CASE): {
1083 stack.currentFrame->locals.length = 1;
1084 stack.currentFrame->args.instructionPtr++;
1085 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, sta ck.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1086 stack.currentFrame->args.instructionPtr += stack.currentFrame->l ocals.length;
1087 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1088 RRETURN_NO_MATCH;
1089 int dc = *stack.currentFrame->args.subjectPtr++;
1090 if (stack.currentFrame->locals.fc != dc && kjs_pcre_ucp_othercas e(stack.currentFrame->locals.fc) != dc)
1091 RRETURN_NO_MATCH;
1092 NEXT_OPCODE;
1093 }
1094
1095 /* Match a single ASCII character. */
1096
1097 BEGIN_OPCODE(ASCII_CHAR):
1098 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1099 RRETURN_NO_MATCH;
1100 if (*stack.currentFrame->args.subjectPtr != stack.currentFrame-> args.instructionPtr[1])
1101 RRETURN_NO_MATCH;
1102 ++stack.currentFrame->args.subjectPtr;
1103 stack.currentFrame->args.instructionPtr += 2;
1104 NEXT_OPCODE;
1105
1106 /* Match one of two cases of an ASCII letter. */
1107
1108 BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
1109 if (md.endSubject == stack.currentFrame->args.subjectPtr)
1110 RRETURN_NO_MATCH;
1111 if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.curre ntFrame->args.instructionPtr[1])
1112 RRETURN_NO_MATCH;
1113 ++stack.currentFrame->args.subjectPtr;
1114 stack.currentFrame->args.instructionPtr += 2;
1115 NEXT_OPCODE;
1116
1117 /* Match a single character repeatedly; different opcodes share code . */
1118
1119 BEGIN_OPCODE(EXACT):
1120 min = stack.currentFrame->locals.max = get2ByteValue(stack.curre ntFrame->args.instructionPtr + 1);
1121 minimize = false;
1122 stack.currentFrame->args.instructionPtr += 3;
1123 goto REPEATCHAR;
1124
1125 BEGIN_OPCODE(UPTO):
1126 BEGIN_OPCODE(MINUPTO):
1127 min = 0;
1128 stack.currentFrame->locals.max = get2ByteValue(stack.currentFram e->args.instructionPtr + 1);
1129 minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPT O;
1130 stack.currentFrame->args.instructionPtr += 3;
1131 goto REPEATCHAR;
1132
1133 BEGIN_OPCODE(STAR):
1134 BEGIN_OPCODE(MINSTAR):
1135 BEGIN_OPCODE(PLUS):
1136 BEGIN_OPCODE(MINPLUS):
1137 BEGIN_OPCODE(QUERY):
1138 BEGIN_OPCODE(MINQUERY):
1139 repeatInformationFromInstructionOffset(*stack.currentFrame->args .instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
1140
1141 /* Common code for all repeated single-character matches. We can give
1142 up quickly if there are fewer than the minimum number of charac ters left in
1143 the subject. */
1144
1145 REPEATCHAR:
1146
1147 stack.currentFrame->locals.length = 1;
1148 getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, sta ck.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
1149 if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md. endSubject - stack.currentFrame->args.subjectPtr)
1150 RRETURN_NO_MATCH;
1151 stack.currentFrame->args.instructionPtr += stack.currentFrame->l ocals.length;
1152
1153 if (stack.currentFrame->locals.fc <= 0xFFFF) {
1154 int othercase;
1155 othercase = md.ignoreCase ? kjs_pcre_ucp_othercase(stack.cur rentFrame->locals.fc) : -1;
1156
1157 for (int i = 1; i <= min; i++) {
1158 if (*stack.currentFrame->args.subjectPtr != stack.curren tFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1159 RRETURN_NO_MATCH;
1160 ++stack.currentFrame->args.subjectPtr;
1161 }
1162
1163 if (min == stack.currentFrame->locals.max)
1164 NEXT_OPCODE;
1165
1166 if (minimize) {
1167 stack.currentFrame->locals.repeatOthercase = othercase;
1168 for (stack.currentFrame->locals.fi = min;; stack.current Frame->locals.fi++) {
1169 RECURSIVE_MATCH(28, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1170 if (isMatch)
1171 RRETURN;
1172 if (stack.currentFrame->locals.fi >= stack.currentFr ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1173 RRETURN;
1174 if (*stack.currentFrame->args.subjectPtr != stack.cu rrentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFr ame->locals.repeatOthercase)
1175 RRETURN;
1176 ++stack.currentFrame->args.subjectPtr;
1177 }
1178 /* Control never reaches here */
1179 } else {
1180 stack.currentFrame->locals.subjectPtrAtStartOfInstructio n = stack.currentFrame->args.subjectPtr;
1181 for (int i = min; i < stack.currentFrame->locals.max; i+ +) {
1182 if (stack.currentFrame->args.subjectPtr >= md.endSub ject)
1183 break;
1184 if (*stack.currentFrame->args.subjectPtr != stack.cu rrentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
1185 break;
1186 ++stack.currentFrame->args.subjectPtr;
1187 }
1188 while (stack.currentFrame->args.subjectPtr >= stack.curr entFrame->locals.subjectPtrAtStartOfInstruction) {
1189 RECURSIVE_MATCH(29, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1190 if (isMatch)
1191 RRETURN;
1192 --stack.currentFrame->args.subjectPtr;
1193 }
1194 RRETURN_NO_MATCH;
1195 }
1196 /* Control never reaches here */
1197 } else {
1198 /* No case on surrogate pairs, so no need to bother with "ot hercase". */
1199
1200 for (int i = 1; i <= min; i++) {
1201 if (*stack.currentFrame->args.subjectPtr != stack.curren tFrame->locals.fc)
1202 RRETURN_NO_MATCH;
1203 stack.currentFrame->args.subjectPtr += 2;
1204 }
1205
1206 if (min == stack.currentFrame->locals.max)
1207 NEXT_OPCODE;
1208
1209 if (minimize) {
1210 for (stack.currentFrame->locals.fi = min;; stack.current Frame->locals.fi++) {
1211 RECURSIVE_MATCH(30, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1212 if (isMatch)
1213 RRETURN;
1214 if (stack.currentFrame->locals.fi >= stack.currentFr ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1215 RRETURN;
1216 if (*stack.currentFrame->args.subjectPtr != stack.cu rrentFrame->locals.fc)
1217 RRETURN;
1218 stack.currentFrame->args.subjectPtr += 2;
1219 }
1220 /* Control never reaches here */
1221 } else {
1222 stack.currentFrame->locals.subjectPtrAtStartOfInstructio n = stack.currentFrame->args.subjectPtr;
1223 for (int i = min; i < stack.currentFrame->locals.max; i+ +) {
1224 if (stack.currentFrame->args.subjectPtr > md.endSubj ect - 2)
1225 break;
1226 if (*stack.currentFrame->args.subjectPtr != stack.cu rrentFrame->locals.fc)
1227 break;
1228 stack.currentFrame->args.subjectPtr += 2;
1229 }
1230 while (stack.currentFrame->args.subjectPtr >= stack.curr entFrame->locals.subjectPtrAtStartOfInstruction) {
1231 RECURSIVE_MATCH(31, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1232 if (isMatch)
1233 RRETURN;
1234 stack.currentFrame->args.subjectPtr -= 2;
1235 }
1236 RRETURN_NO_MATCH;
1237 }
1238 /* Control never reaches here */
1239 }
1240 /* Control never reaches here */
1241
1242 /* Match a negated single one-byte character. */
1243
1244 BEGIN_OPCODE(NOT): {
1245 if (stack.currentFrame->args.subjectPtr >= md.endSubject)
1246 RRETURN_NO_MATCH;
1247 stack.currentFrame->args.instructionPtr++;
1248 int c = *stack.currentFrame->args.subjectPtr++;
1249 if (md.ignoreCase) {
1250 if (c < 128)
1251 c = toLowerCase(c);
1252 if (toLowerCase(*stack.currentFrame->args.instructionPtr++) == c)
1253 RRETURN_NO_MATCH;
1254 } else {
1255 if (*stack.currentFrame->args.instructionPtr++ == c)
1256 RRETURN_NO_MATCH;
1257 }
1258 NEXT_OPCODE;
1259 }
1260
1261 /* Match a negated single one-byte character repeatedly. This is alm ost a
1262 repeat of the code for a repeated single character, but I haven't f ound a
1263 nice way of commoning these up that doesn't require a test of the
1264 positive/negative option for each character match. Maybe that would n't add
1265 very much to the time taken, but character matching *is* what this is all
1266 about... */
1267
1268 BEGIN_OPCODE(NOTEXACT):
1269 min = stack.currentFrame->locals.max = get2ByteValue(stack.curre ntFrame->args.instructionPtr + 1);
1270 minimize = false;
1271 stack.currentFrame->args.instructionPtr += 3;
1272 goto REPEATNOTCHAR;
1273
1274 BEGIN_OPCODE(NOTUPTO):
1275 BEGIN_OPCODE(NOTMINUPTO):
1276 min = 0;
1277 stack.currentFrame->locals.max = get2ByteValue(stack.currentFram e->args.instructionPtr + 1);
1278 minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMIN UPTO;
1279 stack.currentFrame->args.instructionPtr += 3;
1280 goto REPEATNOTCHAR;
1281
1282 BEGIN_OPCODE(NOTSTAR):
1283 BEGIN_OPCODE(NOTMINSTAR):
1284 BEGIN_OPCODE(NOTPLUS):
1285 BEGIN_OPCODE(NOTMINPLUS):
1286 BEGIN_OPCODE(NOTQUERY):
1287 BEGIN_OPCODE(NOTMINQUERY):
1288 repeatInformationFromInstructionOffset(*stack.currentFrame->args .instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
1289
1290 /* Common code for all repeated single-byte matches. We can give up quickly
1291 if there are fewer than the minimum number of bytes left in the
1292 subject. */
1293
1294 REPEATNOTCHAR:
1295 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1296 RRETURN_NO_MATCH;
1297 stack.currentFrame->locals.fc = *stack.currentFrame->args.instru ctionPtr++;
1298
1299 /* The code is duplicated for the caseless and caseful cases, fo r speed,
1300 since matching characters is likely to be quite common. First, ensure the
1301 minimum number of matches are present. If min = max, continue a t the same
1302 level without recursing. Otherwise, if minimizing, keep trying the rest of
1303 the expression and advancing one matching character if failing, up to the
1304 maximum. Alternatively, if maximizing, find the maximum number of
1305 characters and work backwards. */
1306
1307 DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->lo cals.fc, min, stack.currentFrame->locals.max));
1308
1309 if (md.ignoreCase) {
1310 if (stack.currentFrame->locals.fc < 128)
1311 stack.currentFrame->locals.fc = toLowerCase(stack.curren tFrame->locals.fc);
1312
1313 for (int i = 1; i <= min; i++) {
1314 int d = *stack.currentFrame->args.subjectPtr++;
1315 if (d < 128)
1316 d = toLowerCase(d);
1317 if (stack.currentFrame->locals.fc == d)
1318 RRETURN_NO_MATCH;
1319 }
1320
1321 if (min == stack.currentFrame->locals.max)
1322 NEXT_OPCODE;
1323
1324 if (minimize) {
1325 for (stack.currentFrame->locals.fi = min;; stack.current Frame->locals.fi++) {
1326 RECURSIVE_MATCH(38, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1327 if (isMatch)
1328 RRETURN;
1329 int d = *stack.currentFrame->args.subjectPtr++;
1330 if (d < 128)
1331 d = toLowerCase(d);
1332 if (stack.currentFrame->locals.fi >= stack.currentFr ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack .currentFrame->locals.fc == d)
1333 RRETURN;
1334 }
1335 /* Control never reaches here */
1336 }
1337
1338 /* Maximize case */
1339
1340 else {
1341 stack.currentFrame->locals.subjectPtrAtStartOfInstructio n = stack.currentFrame->args.subjectPtr;
1342
1343 for (int i = min; i < stack.currentFrame->locals.max; i+ +) {
1344 if (stack.currentFrame->args.subjectPtr >= md.endSub ject)
1345 break;
1346 int d = *stack.currentFrame->args.subjectPtr;
1347 if (d < 128)
1348 d = toLowerCase(d);
1349 if (stack.currentFrame->locals.fc == d)
1350 break;
1351 ++stack.currentFrame->args.subjectPtr;
1352 }
1353 for (;;) {
1354 RECURSIVE_MATCH(40, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1355 if (isMatch)
1356 RRETURN;
1357 if (stack.currentFrame->args.subjectPtr-- == stack.c urrentFrame->locals.subjectPtrAtStartOfInstruction)
1358 break; /* Stop if tried at original pos * /
1359 }
1360
1361 RRETURN;
1362 }
1363 /* Control never reaches here */
1364 }
1365
1366 /* Caseful comparisons */
1367
1368 else {
1369 for (int i = 1; i <= min; i++) {
1370 int d = *stack.currentFrame->args.subjectPtr++;
1371 if (stack.currentFrame->locals.fc == d)
1372 RRETURN_NO_MATCH;
1373 }
1374
1375 if (min == stack.currentFrame->locals.max)
1376 NEXT_OPCODE;
1377
1378 if (minimize) {
1379 for (stack.currentFrame->locals.fi = min;; stack.current Frame->locals.fi++) {
1380 RECURSIVE_MATCH(42, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1381 if (isMatch)
1382 RRETURN;
1383 int d = *stack.currentFrame->args.subjectPtr++;
1384 if (stack.currentFrame->locals.fi >= stack.currentFr ame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack .currentFrame->locals.fc == d)
1385 RRETURN;
1386 }
1387 /* Control never reaches here */
1388 }
1389
1390 /* Maximize case */
1391
1392 else {
1393 stack.currentFrame->locals.subjectPtrAtStartOfInstructio n = stack.currentFrame->args.subjectPtr;
1394
1395 for (int i = min; i < stack.currentFrame->locals.max; i+ +) {
1396 if (stack.currentFrame->args.subjectPtr >= md.endSub ject)
1397 break;
1398 int d = *stack.currentFrame->args.subjectPtr;
1399 if (stack.currentFrame->locals.fc == d)
1400 break;
1401 ++stack.currentFrame->args.subjectPtr;
1402 }
1403 for (;;) {
1404 RECURSIVE_MATCH(44, stack.currentFrame->args.instruc tionPtr, stack.currentFrame->args.bracketChain);
1405 if (isMatch)
1406 RRETURN;
1407 if (stack.currentFrame->args.subjectPtr-- == stack.c urrentFrame->locals.subjectPtrAtStartOfInstruction)
1408 break; /* Stop if tried at original pos * /
1409 }
1410
1411 RRETURN;
1412 }
1413 }
1414 /* Control never reaches here */
1415
1416 /* Match a single character type repeatedly; several different opcod es
1417 share code. This is very similar to the code for single characters, but we
1418 repeat it in the interests of efficiency. */
1419
1420 BEGIN_OPCODE(TYPEEXACT):
1421 min = stack.currentFrame->locals.max = get2ByteValue(stack.curre ntFrame->args.instructionPtr + 1);
1422 minimize = true;
1423 stack.currentFrame->args.instructionPtr += 3;
1424 goto REPEATTYPE;
1425
1426 BEGIN_OPCODE(TYPEUPTO):
1427 BEGIN_OPCODE(TYPEMINUPTO):
1428 min = 0;
1429 stack.currentFrame->locals.max = get2ByteValue(stack.currentFram e->args.instructionPtr + 1);
1430 minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMI NUPTO;
1431 stack.currentFrame->args.instructionPtr += 3;
1432 goto REPEATTYPE;
1433
1434 BEGIN_OPCODE(TYPESTAR):
1435 BEGIN_OPCODE(TYPEMINSTAR):
1436 BEGIN_OPCODE(TYPEPLUS):
1437 BEGIN_OPCODE(TYPEMINPLUS):
1438 BEGIN_OPCODE(TYPEQUERY):
1439 BEGIN_OPCODE(TYPEMINQUERY):
1440 repeatInformationFromInstructionOffset(*stack.currentFrame->args .instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
1441
1442 /* Common code for all repeated single character type matches. N ote that
1443 in UTF-8 mode, '.' matches a character of any length, but for t he other
1444 character types, the valid characters are all one-byte long. */
1445
1446 REPEATTYPE:
1447 stack.currentFrame->locals.ctype = *stack.currentFrame->args.ins tructionPtr++; /* Code for the character type */
1448
1449 /* First, ensure the minimum number of matches are present. Use inline
1450 code for maximizing the speed, and do the type test once at the start
1451 (i.e. keep it out of the loop). Also we can test that there are at least
1452 the minimum number of characters before we start. */
1453
1454 if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
1455 RRETURN_NO_MATCH;
1456 if (min > 0) {
1457 switch (stack.currentFrame->locals.ctype) {
1458 case OP_NOT_NEWLINE:
1459 for (int i = 1; i <= min; i++) {
1460 if (isNewline(*stack.currentFrame->args.subjectP tr))
1461 RRETURN_NO_MATCH;
1462 ++stack.currentFrame->args.subjectPtr;
1463 }
1464 break;
1465
1466 case OP_NOT_DIGIT:
1467 for (int i = 1; i <= min; i++) {
1468 if (isASCIIDigit(*stack.currentFrame->args.subje ctPtr))
1469 RRETURN_NO_MATCH;
1470 ++stack.currentFrame->args.subjectPtr;
1471 }
1472 break;
1473
1474 case OP_DIGIT:
1475 for (int i = 1; i <= min; i++) {
1476 if (!isASCIIDigit(*stack.currentFrame->args.subj ectPtr))
1477 RRETURN_NO_MATCH;
1478 ++stack.currentFrame->args.subjectPtr;
1479 }
1480 break;
1481
1482 case OP_NOT_WHITESPACE:
1483 for (int i = 1; i <= min; i++) {
1484 if (isSpaceChar(*stack.currentFrame->args.subjec tPtr))
1485 RRETURN_NO_MATCH;
1486 ++stack.currentFrame->args.subjectPtr;
1487 }
1488 break;
1489
1490 case OP_WHITESPACE:
1491 for (int i = 1; i <= min; i++) {
1492 if (!isSpaceChar(*stack.currentFrame->args.subje ctPtr))
1493 RRETURN_NO_MATCH;
1494 ++stack.currentFrame->args.subjectPtr;
1495 }
1496 break;
1497
1498 case OP_NOT_WORDCHAR:
1499 for (int i = 1; i <= min; i++) {
1500 if (isWordChar(*stack.currentFrame->args.subject Ptr))
1501 RRETURN_NO_MATCH;
1502 ++stack.currentFrame->args.subjectPtr;
1503 }
1504 break;
1505
1506 case OP_WORDCHAR:
1507 for (int i = 1; i <= min; i++) {
1508 if (!isWordChar(*stack.currentFrame->args.subjec tPtr))
1509 RRETURN_NO_MATCH;
1510 ++stack.currentFrame->args.subjectPtr;
1511 }
1512 break;
1513
1514 default:
1515 ASSERT_NOT_REACHED();
1516 return matchError(JSRegExpErrorInternal, stack);
1517 } /* End switch(stack.currentFrame->locals.ctype) */
1518 }
1519
1520 /* If min = max, continue at the same level without recursing */
1521
1522 if (min == stack.currentFrame->locals.max)
1523 NEXT_OPCODE;
1524
1525 /* If minimizing, we have to test the rest of the pattern before each
1526 subsequent match. */
1527
1528 if (minimize) {
1529 for (stack.currentFrame->locals.fi = min;; stack.currentFram e->locals.fi++) {
1530 RECURSIVE_MATCH(48, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
1531 if (isMatch)
1532 RRETURN;
1533 if (stack.currentFrame->locals.fi >= stack.currentFrame- >locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
1534 RRETURN;
1535
1536 int c = *stack.currentFrame->args.subjectPtr++;
1537 switch (stack.currentFrame->locals.ctype) {
1538 case OP_NOT_NEWLINE:
1539 if (isNewline(c))
1540 RRETURN;
1541 break;
1542
1543 case OP_NOT_DIGIT:
1544 if (isASCIIDigit(c))
1545 RRETURN;
1546 break;
1547
1548 case OP_DIGIT:
1549 if (!isASCIIDigit(c))
1550 RRETURN;
1551 break;
1552
1553 case OP_NOT_WHITESPACE:
1554 if (isSpaceChar(c))
1555 RRETURN;
1556 break;
1557
1558 case OP_WHITESPACE:
1559 if (!isSpaceChar(c))
1560 RRETURN;
1561 break;
1562
1563 case OP_NOT_WORDCHAR:
1564 if (isWordChar(c))
1565 RRETURN;
1566 break;
1567
1568 case OP_WORDCHAR:
1569 if (!isWordChar(c))
1570 RRETURN;
1571 break;
1572
1573 default:
1574 ASSERT_NOT_REACHED();
1575 return matchError(JSRegExpErrorInternal, stack);
1576 }
1577 }
1578 /* Control never reaches here */
1579 }
1580
1581 /* If maximizing it is worth using inline code for speed, doing the type
1582 test once at the start (i.e. keep it out of the loop). */
1583
1584 else {
1585 stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; /* Remember where we started */
1586
1587 switch (stack.currentFrame->locals.ctype) {
1588 case OP_NOT_NEWLINE:
1589 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1590 if (stack.currentFrame->args.subjectPtr >= md.en dSubject || isNewline(*stack.currentFrame->args.subjectPtr))
1591 break;
1592 stack.currentFrame->args.subjectPtr++;
1593 }
1594 break;
1595
1596 case OP_NOT_DIGIT:
1597 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1598 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1599 break;
1600 int c = *stack.currentFrame->args.subjectPtr;
1601 if (isASCIIDigit(c))
1602 break;
1603 ++stack.currentFrame->args.subjectPtr;
1604 }
1605 break;
1606
1607 case OP_DIGIT:
1608 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1609 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1610 break;
1611 int c = *stack.currentFrame->args.subjectPtr;
1612 if (!isASCIIDigit(c))
1613 break;
1614 ++stack.currentFrame->args.subjectPtr;
1615 }
1616 break;
1617
1618 case OP_NOT_WHITESPACE:
1619 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1620 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1621 break;
1622 int c = *stack.currentFrame->args.subjectPtr;
1623 if (isSpaceChar(c))
1624 break;
1625 ++stack.currentFrame->args.subjectPtr;
1626 }
1627 break;
1628
1629 case OP_WHITESPACE:
1630 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1631 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1632 break;
1633 int c = *stack.currentFrame->args.subjectPtr;
1634 if (!isSpaceChar(c))
1635 break;
1636 ++stack.currentFrame->args.subjectPtr;
1637 }
1638 break;
1639
1640 case OP_NOT_WORDCHAR:
1641 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1642 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1643 break;
1644 int c = *stack.currentFrame->args.subjectPtr;
1645 if (isWordChar(c))
1646 break;
1647 ++stack.currentFrame->args.subjectPtr;
1648 }
1649 break;
1650
1651 case OP_WORDCHAR:
1652 for (int i = min; i < stack.currentFrame->locals.max ; i++) {
1653 if (stack.currentFrame->args.subjectPtr >= md.en dSubject)
1654 break;
1655 int c = *stack.currentFrame->args.subjectPtr;
1656 if (!isWordChar(c))
1657 break;
1658 ++stack.currentFrame->args.subjectPtr;
1659 }
1660 break;
1661
1662 default:
1663 ASSERT_NOT_REACHED();
1664 return matchError(JSRegExpErrorInternal, stack);
1665 }
1666
1667 /* stack.currentFrame->args.subjectPtr is now past the end o f the maximum run */
1668
1669 for (;;) {
1670 RECURSIVE_MATCH(52, stack.currentFrame->args.instruction Ptr, stack.currentFrame->args.bracketChain);
1671 if (isMatch)
1672 RRETURN;
1673 if (stack.currentFrame->args.subjectPtr-- == stack.curre ntFrame->locals.subjectPtrAtStartOfInstruction)
1674 break; /* Stop if tried at original pos */
1675 }
1676
1677 /* Get here if we can't make it match with any permitted rep etitions */
1678
1679 RRETURN;
1680 }
1681 /* Control never reaches here */
1682
1683 BEGIN_OPCODE(CRMINPLUS):
1684 BEGIN_OPCODE(CRMINQUERY):
1685 BEGIN_OPCODE(CRMINRANGE):
1686 BEGIN_OPCODE(CRMINSTAR):
1687 BEGIN_OPCODE(CRPLUS):
1688 BEGIN_OPCODE(CRQUERY):
1689 BEGIN_OPCODE(CRRANGE):
1690 BEGIN_OPCODE(CRSTAR):
1691 ASSERT_NOT_REACHED();
1692 return matchError(JSRegExpErrorInternal, stack);
1693
1694 #ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
1695 CAPTURING_BRACKET:
1696 #else
1697 default:
1698 #endif
1699 /* Opening capturing bracket. If there is space in the offset ve ctor, save
1700 the current subject position in the working slot at the top of the vector. We
1701 mustn't change the current values of the data slot, because the y may be set
1702 from a previous iteration of this group, and be referred to by a reference
1703 inside the group.
1704
1705 If the bracket fails to match, we need to restore this value an d also the
1706 values of the final offsets, in case they were set by a previou s iteration of
1707 the same bracket.
1708
1709 If there isn't enough space in the offset vector, treat this as if it were a
1710 non-capturing bracket. Don't worry about setting the flag for t he error case
1711 here; that is handled in the code for KET. */
1712
1713 ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
1714
1715 stack.currentFrame->locals.number = *stack.currentFrame->args.in structionPtr - OP_BRA;
1716
1717 /* For extended extraction brackets (large number), we have to f ish out the
1718 number from a dummy opcode at the start. */
1719
1720 if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
1721 stack.currentFrame->locals.number = get2ByteValue(stack.curr entFrame->args.instructionPtr + 2 + LINK_SIZE);
1722 stack.currentFrame->locals.offset = stack.currentFrame->locals.n umber << 1;
1723
1724 #ifdef DEBUG
1725 printf("start bracket %d subject=", stack.currentFrame->locals.n umber);
1726 pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
1727 printf("\n");
1728 #endif
1729
1730 if (stack.currentFrame->locals.offset < md.offsetMax) {
1731 stack.currentFrame->locals.saveOffset1 = md.offsetVector[sta ck.currentFrame->locals.offset];
1732 stack.currentFrame->locals.saveOffset2 = md.offsetVector[sta ck.currentFrame->locals.offset + 1];
1733 stack.currentFrame->locals.saveOffset3 = md.offsetVector[md. offsetEnd - stack.currentFrame->locals.number];
1734
1735 DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.sav eOffset1, stack.currentFrame->locals.saveOffset2, stack.currentFrame->locals.sav eOffset3));
1736 md.offsetVector[md.offsetEnd - stack.currentFrame->locals.nu mber] = stack.currentFrame->args.subjectPtr - md.startSubject;
1737
1738 do {
1739 RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame- >args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
1740 if (isMatch)
1741 RRETURN;
1742 stack.currentFrame->args.instructionPtr += getLinkValue( stack.currentFrame->args.instructionPtr + 1);
1743 } while (*stack.currentFrame->args.instructionPtr == OP_ALT) ;
1744
1745 DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.n umber));
1746
1747 md.offsetVector[stack.currentFrame->locals.offset] = stack.c urrentFrame->locals.saveOffset1;
1748 md.offsetVector[stack.currentFrame->locals.offset + 1] = sta ck.currentFrame->locals.saveOffset2;
1749 md.offsetVector[md.offsetEnd - stack.currentFrame->locals.nu mber] = stack.currentFrame->locals.saveOffset3;
1750
1751 RRETURN;
1752 }
1753
1754 /* Insufficient room for saving captured contents */
1755
1756 goto NON_CAPTURING_BRACKET;
1757 }
1758
1759 /* Do not stick any code in here without much thought; it is assumed
1760 that "continue" in the code above comes out to here to repeat the main
1761 loop. */
1762
1763 } /* End of main loop */
1764
1765 ASSERT_NOT_REACHED();
1766
1767 #ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
1768
1769 RRETURN_SWITCH:
1770 switch (stack.currentFrame->returnLocation) {
1771 case 0: goto RETURN;
1772 case 1: goto RRETURN_1;
1773 case 2: goto RRETURN_2;
1774 case 6: goto RRETURN_6;
1775 case 7: goto RRETURN_7;
1776 case 14: goto RRETURN_14;
1777 case 15: goto RRETURN_15;
1778 case 16: goto RRETURN_16;
1779 case 17: goto RRETURN_17;
1780 case 18: goto RRETURN_18;
1781 case 19: goto RRETURN_19;
1782 case 20: goto RRETURN_20;
1783 case 21: goto RRETURN_21;
1784 case 22: goto RRETURN_22;
1785 case 24: goto RRETURN_24;
1786 case 26: goto RRETURN_26;
1787 case 27: goto RRETURN_27;
1788 case 28: goto RRETURN_28;
1789 case 29: goto RRETURN_29;
1790 case 30: goto RRETURN_30;
1791 case 31: goto RRETURN_31;
1792 case 38: goto RRETURN_38;
1793 case 40: goto RRETURN_40;
1794 case 42: goto RRETURN_42;
1795 case 44: goto RRETURN_44;
1796 case 48: goto RRETURN_48;
1797 case 52: goto RRETURN_52;
1798 }
1799
1800 ASSERT_NOT_REACHED();
1801 return matchError(JSRegExpErrorInternal, stack);
1802
1803 #endif
1804
1805 RETURN:
1806 return isMatch;
1807 }
1808
1809
1810 /*************************************************
1811 * Execute a Regular Expression *
1812 *************************************************/
1813
1814 /* This function applies a compiled re to a subject string and picks out
1815 portions of the string if it matches. Two elements in the vector are set for
1816 each substring: the offsets to the start and end of the substring.
1817
1818 Arguments:
1819 re points to the compiled expression
1820 extra_data points to extra data or is NULL
1821 subject points to the subject string
1822 length length of subject string (may contain binary zeros)
1823 start_offset where to start in the subject string
1824 options option bits
1825 offsets points to a vector of ints to be filled in with offsets
1826 offsetcount the number of elements in the vector
1827
1828 Returns: > 0 => success; value is the number of elements filled in
1829 = 0 => success, but offsets is not big enough
1830 -1 => failed to match
1831 < -1 => some kind of unexpected problem
1832 */
1833
1834 static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endS ubject, int first_byte, bool first_byte_caseless, bool useMultiLineFirstCharOpti mization, const UChar* originalSubjectStart)
1835 {
1836 // If first_byte is set, try scanning to the first instance of that byte
1837 // no need to try and match against any earlier part of the subject string.
1838 if (first_byte >= 0) {
1839 UChar first_char = first_byte;
1840 if (first_byte_caseless)
1841 while (subjectPtr < endSubject) {
1842 int c = *subjectPtr;
1843 if (c > 127)
1844 break;
1845 if (toLowerCase(c) == first_char)
1846 break;
1847 subjectPtr++;
1848 }
1849 else {
1850 while (subjectPtr < endSubject && *subjectPtr != first_char)
1851 subjectPtr++;
1852 }
1853 } else if (useMultiLineFirstCharOptimization) {
1854 /* Or to just after \n for a multiline match if possible */
1855 // I'm not sure why this != originalSubjectStart check is necessary -- e cs 11/18/07
1856 if (subjectPtr > originalSubjectStart) {
1857 while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
1858 subjectPtr++;
1859 }
1860 }
1861 }
1862
1863 static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* e ndSubject, int req_byte, int req_byte2, bool req_byte_caseless, bool hasFirstByt e, const UChar*& reqBytePtr)
1864 {
1865 /* If req_byte is set, we know that that character must appear in the subjec t
1866 for the match to succeed. If the first character is set, req_byte must be
1867 later in the subject; otherwise the test starts at the match point. This
1868 optimization can save a huge amount of backtracking in patterns with nested
1869 unlimited repeats that aren't going to match. Writing separate code for
1870 cased/caseless versions makes it go faster, as does using an autoincrement
1871 and backing off on a match.
1872
1873 HOWEVER: when the subject string is very, very long, searching to its end c an
1874 take a long time, and give bad performance on quite ordinary patterns. This
1875 showed up when somebody was matching /^C/ on a 32-megabyte string... so we
1876 don't do this when the string is sufficiently long.
1877 */
1878
1879 if (req_byte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
1880 const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
1881
1882 /* We don't need to repeat the search if we haven't yet reached the
1883 place we found it at last time. */
1884
1885 if (p > reqBytePtr) {
1886 if (req_byte_caseless) {
1887 while (p < endSubject) {
1888 int pp = *p++;
1889 if (pp == req_byte || pp == req_byte2) {
1890 p--;
1891 break;
1892 }
1893 }
1894 } else {
1895 while (p < endSubject) {
1896 if (*p++ == req_byte) {
1897 p--;
1898 break;
1899 }
1900 }
1901 }
1902
1903 /* If we can't find the required character, break the matching loop */
1904
1905 if (p >= endSubject)
1906 return true;
1907
1908 /* If we have found the required character, save the point where we
1909 found it, so that we don't search again next time round the loop if
1910 the start hasn't passed this character yet. */
1911
1912 reqBytePtr = p;
1913 }
1914 }
1915 return false;
1916 }
1917
1918 int jsRegExpExecute(const JSRegExp* re,
1919 const UChar* subject, int length, int start_offset, int* off sets,
1920 int offsetcount)
1921 {
1922 ASSERT(re);
1923 ASSERT(subject);
1924 ASSERT(offsetcount >= 0);
1925 ASSERT(offsets || offsetcount == 0);
1926
1927 MatchData matchBlock;
1928 matchBlock.startSubject = subject;
1929 matchBlock.endSubject = matchBlock.startSubject + length;
1930 const UChar* endSubject = matchBlock.endSubject;
1931
1932 matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption);
1933 matchBlock.ignoreCase = (re->options & IgnoreCaseOption);
1934
1935 /* If the expression has got more back references than the offsets supplied can
1936 hold, we get a temporary chunk of working store to use during the matching.
1937 Otherwise, we can use the vector supplied, rounding down its size to a mult iple
1938 of 3. */
1939
1940 int ocount = offsetcount - (offsetcount % 3);
1941
1942 // FIXME: This is lame that we have to second-guess our caller here.
1943 // The API should change to either fail-hard when we don't have enough offse t space
1944 // or that we shouldn't ask our callers to pre-allocate in the first place.
1945 bool using_temporary_offsets = false;
1946 if (re->top_backref > 0 && re->top_backref >= ocount/3) {
1947 ocount = re->top_backref * 3 + 3;
1948 matchBlock.offsetVector = new int[ocount];
1949 if (!matchBlock.offsetVector)
1950 return JSRegExpErrorNoMemory;
1951 using_temporary_offsets = true;
1952 } else
1953 matchBlock.offsetVector = offsets;
1954
1955 matchBlock.offsetEnd = ocount;
1956 matchBlock.offsetMax = (2*ocount)/3;
1957 matchBlock.offsetOverflow = false;
1958
1959 /* Compute the minimum number of offsets that we need to reset each time. Do ing
1960 this makes a huge difference to execution time when there aren't many brack ets
1961 in the pattern. */
1962
1963 int resetcount = 2 + re->top_bracket * 2;
1964 if (resetcount > offsetcount)
1965 resetcount = ocount;
1966
1967 /* Reset the working variable associated with each extraction. These should
1968 never be used unless previously set, but they get saved and restored, and s o we
1969 initialize them to avoid reading uninitialized locations. */
1970
1971 if (matchBlock.offsetVector) {
1972 int* iptr = matchBlock.offsetVector + ocount;
1973 int* iend = iptr - resetcount/2 + 1;
1974 while (--iptr >= iend)
1975 *iptr = -1;
1976 }
1977
1978 /* Set up the first character to match, if available. The first_byte value i s
1979 never set for an anchored regular expression, but the anchoring may be forc ed
1980 at run time, so we have to test for anchoring. The first char may be unset for
1981 an unanchored pattern, of course. If there's no first char and the pattern was
1982 studied, there may be a bitmap of possible first characters. */
1983
1984 bool first_byte_caseless = false;
1985 int first_byte = -1;
1986 if (re->options & UseFirstByteOptimizationOption) {
1987 first_byte = re->first_byte & 255;
1988 if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE)))
1989 first_byte = toLowerCase(first_byte);
1990 }
1991
1992 /* For anchored or unanchored matches, there may be a "last known required
1993 character" set. */
1994
1995 bool req_byte_caseless = false;
1996 int req_byte = -1;
1997 int req_byte2 = -1;
1998 if (re->options & UseRequiredByteOptimizationOption) {
1999 req_byte = re->req_byte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
2000 req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE);
2001 req_byte2 = flipCase(req_byte);
2002 }
2003
2004 /* Loop for handling unanchored repeated matching attempts; for anchored reg exs
2005 the loop runs just once. */
2006
2007 const UChar* startMatch = subject + start_offset;
2008 const UChar* reqBytePtr = startMatch - 1;
2009 bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByte OptimizationOption;
2010
2011 do {
2012 /* Reset the maximum number of extractions we might see. */
2013 if (matchBlock.offsetVector) {
2014 int* iptr = matchBlock.offsetVector;
2015 int* iend = iptr + resetcount;
2016 while (iptr < iend)
2017 *iptr++ = -1;
2018 }
2019
2020 tryFirstByteOptimization(startMatch, endSubject, first_byte, first_byte_ caseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_off set);
2021 if (tryRequiredByteOptimization(startMatch, endSubject, req_byte, req_by te2, req_byte_caseless, first_byte >= 0, reqBytePtr))
2022 break;
2023
2024 /* When a match occurs, substrings will be set for all internal extracti ons;
2025 we just need to set up the whole thing as substring 0 before returning. If
2026 there were too many extractions, set the return code to zero. In the ca se
2027 where we had to get some local store to hold offsets for backreferences , copy
2028 those back references that we can. In this case there need not be overf low
2029 if certain parts of the pattern were not used. */
2030
2031 /* The code starts after the JSRegExp block and the capture name table. */
2032 const unsigned char* start_code = reinterpret_cast<const unsigned char*> (re + 1);
2033
2034 int returnCode = match(startMatch, start_code, 2, matchBlock);
2035
2036 /* When the result is no match, advance the pointer to the next characte r
2037 and continue. */
2038 if (returnCode == 0) {
2039 startMatch++;
2040 continue;
2041 }
2042
2043 if (returnCode != 1) {
2044 ASSERT(returnCode == JSRegExpErrorHitLimit || returnCode == JSRegExp ErrorNoMemory);
2045 DPRINTF((">>>> error: returning %d\n", returnCode));
2046 return returnCode;
2047 }
2048
2049 /* We have a match! Copy the offset information from temporary store if
2050 necessary */
2051
2052 if (using_temporary_offsets) {
2053 if (offsetcount >= 4) {
2054 memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetcount - 2) * sizeof(int));
2055 DPRINTF(("Copied offsets from temporary memory\n"));
2056 }
2057 if (matchBlock.endOffsetTop > offsetcount)
2058 matchBlock.offsetOverflow = true;
2059
2060 DPRINTF(("Freeing temporary memory\n"));
2061 delete [] matchBlock.offsetVector;
2062 }
2063
2064 returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2 ;
2065
2066 if (offsetcount < 2)
2067 returnCode = 0;
2068 else {
2069 offsets[0] = startMatch - matchBlock.startSubject;
2070 offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject;
2071 }
2072
2073 DPRINTF((">>>> returning %d\n", returnCode));
2074 return returnCode;
2075 } while (!(re->options & IsAnchoredOption) && startMatch <= endSubject);
2076
2077 if (using_temporary_offsets) {
2078 DPRINTF(("Freeing temporary memory\n"));
2079 delete [] matchBlock.offsetVector;
2080 }
2081
2082 DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
2083 return JSRegExpErrorNoMatch;
2084 }
2085
2086 } } // namespace dart::jscre
OLDNEW

Powered by Google App Engine
This is Rietveld 408576698