| Index: runtime/third_party/jscre/pcre_exec.cpp
|
| ===================================================================
|
| --- runtime/third_party/jscre/pcre_exec.cpp (revision 44981)
|
| +++ runtime/third_party/jscre/pcre_exec.cpp (working copy)
|
| @@ -1,2086 +0,0 @@
|
| -/* This is JavaScriptCore's variant of the PCRE library. While this library
|
| -started out as a copy of PCRE, many of the features of PCRE have been
|
| -removed. This library now supports only the regular expression features
|
| -required by the JavaScript language specification, and has only the functions
|
| -needed by JavaScriptCore and the rest of WebKit.
|
| -
|
| - Originally written by Philip Hazel
|
| - Copyright (c) 1997-2006 University of Cambridge
|
| - Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
|
| - Copyright (C) 2007 Eric Seidel <eric@webkit.org>
|
| -
|
| ------------------------------------------------------------------------------
|
| -Redistribution and use in source and binary forms, with or without
|
| -modification, are permitted provided that the following conditions are met:
|
| -
|
| - * Redistributions of source code must retain the above copyright notice,
|
| - this list of conditions and the following disclaimer.
|
| -
|
| - * Redistributions in binary form must reproduce the above copyright
|
| - notice, this list of conditions and the following disclaimer in the
|
| - documentation and/or other materials provided with the distribution.
|
| -
|
| - * Neither the name of the University of Cambridge nor the names of its
|
| - contributors may be used to endorse or promote products derived from
|
| - this software without specific prior written permission.
|
| -
|
| -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
| -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
| -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
| -ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
| -LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
| -CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
| -SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
| -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
| -CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
| -ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
| -POSSIBILITY OF SUCH DAMAGE.
|
| ------------------------------------------------------------------------------
|
| -*/
|
| -
|
| -/* This module contains jsRegExpExecute(), the externally visible function
|
| -that does pattern matching using an NFA algorithm, following the rules from
|
| -the JavaScript specification. There are also some supporting functions. */
|
| -
|
| -#include "config.h"
|
| -
|
| -#include "pcre_internal.h"
|
| -
|
| -#include "ASCIICType.h"
|
| -
|
| -#include <ctype.h>
|
| -#include <limits.h>
|
| -#include <string.h> /* for memcpy */
|
| -
|
| -#ifdef __GNUC__
|
| -#define USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
|
| -//#define USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
|
| -#endif
|
| -
|
| -/* Avoid warnings on Windows. */
|
| -#undef min
|
| -#undef max
|
| -
|
| -namespace dart { namespace jscre {
|
| -
|
| -#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
|
| -typedef int ReturnLocation;
|
| -#else
|
| -typedef void* ReturnLocation;
|
| -#endif
|
| -
|
| -/* Structure for building a chain of data for holding the values of
|
| -the subject pointer at the start of each bracket, used to detect when
|
| -an empty string has been matched by a bracket to break infinite loops. */
|
| -struct BracketChainNode {
|
| - BracketChainNode* previousBracket;
|
| - const UChar* bracketStart;
|
| -};
|
| -
|
| -struct MatchFrame {
|
| - ReturnLocation returnLocation;
|
| - struct MatchFrame* previousFrame;
|
| -
|
| - /* Function arguments that may change */
|
| - struct {
|
| - const UChar* subjectPtr;
|
| - const unsigned char* instructionPtr;
|
| - int offsetTop;
|
| - BracketChainNode* bracketChain;
|
| - } args;
|
| -
|
| -
|
| - /* PCRE uses "fake" recursion built off of gotos, thus
|
| - stack-based local variables are not safe to use. Instead we have to
|
| - store local variables on the current MatchFrame. */
|
| - struct {
|
| - const unsigned char* data;
|
| - const unsigned char* startOfRepeatingBracket;
|
| - const UChar* subjectPtrAtStartOfInstruction; // Several instrutions stash away a subjectPtr here for later compare
|
| - const unsigned char* instructionPtrAtStartOfOnce;
|
| -
|
| - int repeatOthercase;
|
| -
|
| - int ctype;
|
| - int fc;
|
| - int fi;
|
| - int length;
|
| - int max;
|
| - int number;
|
| - int offset;
|
| - int saveOffset1;
|
| - int saveOffset2;
|
| - int saveOffset3;
|
| -
|
| - BracketChainNode bracketChainNode;
|
| - } locals;
|
| -};
|
| -
|
| -/* Structure for passing "static" information around between the functions
|
| -doing traditional NFA matching, so that they are thread-safe. */
|
| -
|
| -struct MatchData {
|
| - int* offsetVector; /* Offset vector */
|
| - int offsetEnd; /* One past the end */
|
| - int offsetMax; /* The maximum usable for return data */
|
| - bool offsetOverflow; /* Set if too many extractions */
|
| - const UChar* startSubject; /* Start of the subject string */
|
| - const UChar* endSubject; /* End of the subject string */
|
| - const UChar* endMatchPtr; /* Subject position at end match */
|
| - int endOffsetTop; /* Highwater mark at end of match */
|
| - bool multiline;
|
| - bool ignoreCase;
|
| -};
|
| -
|
| -/* The maximum remaining length of subject we are prepared to search for a
|
| -req_byte match. */
|
| -
|
| -#define REQ_BYTE_MAX 1000
|
| -
|
| -/* The below limit restricts the number of "recursive" match calls in order to
|
| -avoid spending exponential time on complex regular expressions. */
|
| -
|
| -static const unsigned matchLimit = 100000;
|
| -
|
| -#ifdef DEBUG
|
| -/*************************************************
|
| -* Debugging function to print chars *
|
| -*************************************************/
|
| -
|
| -/* Print a sequence of chars in printable format, stopping at the end of the
|
| -subject if the requested.
|
| -
|
| -Arguments:
|
| - p points to characters
|
| - length number to print
|
| - isSubject true if printing from within md.startSubject
|
| - md pointer to matching data block, if isSubject is true
|
| -*/
|
| -
|
| -static void pchars(const UChar* p, int length, bool isSubject, const MatchData& md)
|
| -{
|
| - if (isSubject && length > md.endSubject - p)
|
| - length = md.endSubject - p;
|
| - while (length-- > 0) {
|
| - int c;
|
| - if (isprint(c = *(p++)))
|
| - printf("%c", c);
|
| - else if (c < 256)
|
| - printf("\\x%02x", c);
|
| - else
|
| - printf("\\x{%x}", c);
|
| - }
|
| -}
|
| -#endif
|
| -
|
| -/*************************************************
|
| -* Match a back-reference *
|
| -*************************************************/
|
| -
|
| -/* If a back reference hasn't been set, the length that is passed is greater
|
| -than the number of characters left in the string, so the match fails.
|
| -
|
| -Arguments:
|
| - offset index into the offset vector
|
| - subjectPtr points into the subject
|
| - length length to be matched
|
| - md points to match data block
|
| -
|
| -Returns: true if matched
|
| -*/
|
| -
|
| -static bool matchRef(int offset, const UChar* subjectPtr, int length, const MatchData& md)
|
| -{
|
| - const UChar* p = md.startSubject + md.offsetVector[offset];
|
| -
|
| -#ifdef DEBUG
|
| - if (subjectPtr >= md.endSubject)
|
| - printf("matching subject <null>");
|
| - else {
|
| - printf("matching subject ");
|
| - pchars(subjectPtr, length, true, md);
|
| - }
|
| - printf(" against backref ");
|
| - pchars(p, length, false, md);
|
| - printf("\n");
|
| -#endif
|
| -
|
| - /* Always fail if not enough characters left */
|
| -
|
| - if (length > md.endSubject - subjectPtr)
|
| - return false;
|
| -
|
| - /* Separate the caselesss case for speed */
|
| -
|
| - if (md.ignoreCase) {
|
| - while (length-- > 0) {
|
| - UChar c = *p++;
|
| - int othercase = kjs_pcre_ucp_othercase(c);
|
| - UChar d = *subjectPtr++;
|
| - if (c != d && othercase != d)
|
| - return false;
|
| - }
|
| - }
|
| - else {
|
| - while (length-- > 0)
|
| - if (*p++ != *subjectPtr++)
|
| - return false;
|
| - }
|
| -
|
| - return true;
|
| -}
|
| -
|
| -#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
|
| -
|
| -/* Use numbered labels and switch statement at the bottom of the match function. */
|
| -
|
| -#define RMATCH_WHERE(num) num
|
| -#define RRETURN_LABEL RRETURN_SWITCH
|
| -
|
| -#else
|
| -
|
| -/* Use GCC's computed goto extension. */
|
| -
|
| -/* For one test case this is more than 40% faster than the switch statement.
|
| -We could avoid the use of the num argument entirely by using local labels,
|
| -but using it for the GCC case as well as the non-GCC case allows us to share
|
| -a bit more code and notice if we use conflicting numbers.*/
|
| -
|
| -#define RMATCH_WHERE(num) &&RRETURN_##num
|
| -#define RRETURN_LABEL *stack.currentFrame->returnLocation
|
| -
|
| -#endif
|
| -
|
| -#define RECURSIVE_MATCH_COMMON(num) \
|
| - goto RECURSE;\
|
| - RRETURN_##num: \
|
| - stack.popCurrentFrame();
|
| -
|
| -#define RECURSIVE_MATCH(num, ra, rb) \
|
| - do { \
|
| - stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
|
| - RECURSIVE_MATCH_COMMON(num) \
|
| - } while (0)
|
| -
|
| -#define RECURSIVE_MATCH_STARTNG_NEW_GROUP(num, ra, rb) \
|
| - do { \
|
| - stack.pushNewFrame((ra), (rb), RMATCH_WHERE(num)); \
|
| - startNewGroup(stack.currentFrame); \
|
| - RECURSIVE_MATCH_COMMON(num) \
|
| - } while (0)
|
| -
|
| -#define RRETURN goto RRETURN_LABEL
|
| -
|
| -#define RRETURN_NO_MATCH do { isMatch = false; RRETURN; } while (0)
|
| -
|
| -/*************************************************
|
| -* Match from current position *
|
| -*************************************************/
|
| -
|
| -/* On entry instructionPtr points to the first opcode, and subjectPtr to the first character
|
| -in the subject string, while substringStart holds the value of subjectPtr at the start of the
|
| -last bracketed group - used for breaking infinite loops matching zero-length
|
| -strings. This function is called recursively in many circumstances. Whenever it
|
| -returns a negative (error) response, the outer match() call must also return the
|
| -same response.
|
| -
|
| -Arguments:
|
| - subjectPtr pointer in subject
|
| - instructionPtr position in code
|
| - offsetTop current top pointer
|
| - md pointer to "static" info for the match
|
| -
|
| -Returns: 1 if matched ) these values are >= 0
|
| - 0 if failed to match )
|
| - a negative error value if aborted by an error condition
|
| - (e.g. stopped by repeated call or recursion limit)
|
| -*/
|
| -
|
| -static const unsigned FRAMES_ON_STACK = 16;
|
| -
|
| -struct MatchStack {
|
| - MatchStack()
|
| - : framesEnd(frames + FRAMES_ON_STACK)
|
| - , currentFrame(frames)
|
| - , size(1) // match() creates accesses the first frame w/o calling pushNewFrame
|
| - {
|
| - ASSERT((sizeof(frames) / sizeof(frames[0])) == FRAMES_ON_STACK);
|
| - }
|
| -
|
| - MatchFrame frames[FRAMES_ON_STACK];
|
| - MatchFrame* framesEnd;
|
| - MatchFrame* currentFrame;
|
| - unsigned size;
|
| -
|
| - inline bool canUseStackBufferForNextFrame()
|
| - {
|
| - return size < FRAMES_ON_STACK;
|
| - }
|
| -
|
| - inline MatchFrame* allocateNextFrame()
|
| - {
|
| - if (canUseStackBufferForNextFrame())
|
| - return currentFrame + 1;
|
| - return new MatchFrame;
|
| - }
|
| -
|
| - inline void pushNewFrame(const unsigned char* instructionPtr, BracketChainNode* bracketChain, ReturnLocation returnLocation)
|
| - {
|
| - MatchFrame* newframe = allocateNextFrame();
|
| - newframe->previousFrame = currentFrame;
|
| -
|
| - newframe->args.subjectPtr = currentFrame->args.subjectPtr;
|
| - newframe->args.offsetTop = currentFrame->args.offsetTop;
|
| - newframe->args.instructionPtr = instructionPtr;
|
| - newframe->args.bracketChain = bracketChain;
|
| - newframe->returnLocation = returnLocation;
|
| - size++;
|
| -
|
| - currentFrame = newframe;
|
| - }
|
| -
|
| - inline void popCurrentFrame()
|
| - {
|
| - MatchFrame* oldFrame = currentFrame;
|
| - currentFrame = currentFrame->previousFrame;
|
| - if (size > FRAMES_ON_STACK)
|
| - delete oldFrame;
|
| - size--;
|
| - }
|
| -
|
| - void popAllFrames()
|
| - {
|
| - while (size)
|
| - popCurrentFrame();
|
| - }
|
| -};
|
| -
|
| -static int matchError(int errorCode, MatchStack& stack)
|
| -{
|
| - stack.popAllFrames();
|
| - return errorCode;
|
| -}
|
| -
|
| -/* Get the next UTF-8 character, not advancing the pointer, incrementing length
|
| - if there are extra bytes. This is called when we know we are in UTF-8 mode. */
|
| -
|
| -static inline void getUTF8CharAndIncrementLength(int& c, const unsigned char* subjectPtr, int& len)
|
| -{
|
| - c = *subjectPtr;
|
| - if ((c & 0xc0) == 0xc0) {
|
| - int gcaa = kjs_pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
| - int gcss = 6 * gcaa;
|
| - c = (c & kjs_pcre_utf8_table3[gcaa]) << gcss;
|
| - for (int gcii = 1; gcii <= gcaa; gcii++) {
|
| - gcss -= 6;
|
| - c |= (subjectPtr[gcii] & 0x3f) << gcss;
|
| - }
|
| - len += gcaa;
|
| - }
|
| -}
|
| -
|
| -static inline void startNewGroup(MatchFrame* currentFrame)
|
| -{
|
| - /* At the start of a bracketed group, add the current subject pointer to the
|
| - stack of such pointers, to be re-instated at the end of the group when we hit
|
| - the closing ket. When match() is called in other circumstances, we don't add to
|
| - this stack. */
|
| -
|
| - currentFrame->locals.bracketChainNode.previousBracket = currentFrame->args.bracketChain;
|
| - currentFrame->locals.bracketChainNode.bracketStart = currentFrame->args.subjectPtr;
|
| - currentFrame->args.bracketChain = ¤tFrame->locals.bracketChainNode;
|
| -}
|
| -
|
| -// FIXME: "minimize" means "not greedy", we should invert the callers to ask for "greedy" to be less confusing
|
| -static inline void repeatInformationFromInstructionOffset(short instructionOffset, bool& minimize, int& minimumRepeats, int& maximumRepeats)
|
| -{
|
| - // Instruction offsets are based off of OP_CRSTAR, OP_STAR, OP_TYPESTAR, OP_NOTSTAR
|
| - static const char minimumRepeatsFromInstructionOffset[] = { 0, 0, 1, 1, 0, 0 };
|
| - static const int maximumRepeatsFromInstructionOffset[] = { INT_MAX, INT_MAX, INT_MAX, INT_MAX, 1, 1 };
|
| -
|
| - ASSERT(instructionOffset >= 0);
|
| - ASSERT(instructionOffset <= (OP_CRMINQUERY - OP_CRSTAR));
|
| -
|
| - minimize = (instructionOffset & 1); // this assumes ordering: Instruction, MinimizeInstruction, Instruction2, MinimizeInstruction2
|
| - minimumRepeats = minimumRepeatsFromInstructionOffset[instructionOffset];
|
| - maximumRepeats = maximumRepeatsFromInstructionOffset[instructionOffset];
|
| -}
|
| -
|
| -static int match(const UChar* subjectPtr, const unsigned char* instructionPtr, int offsetTop, MatchData& md)
|
| -{
|
| - bool isMatch = false;
|
| - int min;
|
| - bool minimize = false; /* Initialization not really needed, but some compilers think so. */
|
| - unsigned matchCount = 0;
|
| -
|
| - MatchStack stack;
|
| -
|
| - /* The opcode jump table. */
|
| -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
|
| -#define EMIT_JUMP_TABLE_ENTRY(opcode) &&LABEL_OP_##opcode,
|
| - static void* opcodeJumpTable[256] = { FOR_EACH_OPCODE(EMIT_JUMP_TABLE_ENTRY) };
|
| -#undef EMIT_JUMP_TABLE_ENTRY
|
| -#endif
|
| -
|
| - /* One-time setup of the opcode jump table. */
|
| -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
|
| - for (int i = 255; !opcodeJumpTable[i]; i--)
|
| - opcodeJumpTable[i] = &&CAPTURING_BRACKET;
|
| -#endif
|
| -
|
| -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
|
| - // Shark shows this as a hot line
|
| - // Using a static const here makes this line disappear, but makes later access hotter (not sure why)
|
| - stack.currentFrame->returnLocation = &&RETURN;
|
| -#else
|
| - stack.currentFrame->returnLocation = 0;
|
| -#endif
|
| - stack.currentFrame->args.subjectPtr = subjectPtr;
|
| - stack.currentFrame->args.instructionPtr = instructionPtr;
|
| - stack.currentFrame->args.offsetTop = offsetTop;
|
| - stack.currentFrame->args.bracketChain = 0;
|
| - startNewGroup(stack.currentFrame);
|
| -
|
| - /* This is where control jumps back to to effect "recursion" */
|
| -
|
| -RECURSE:
|
| - if (++matchCount > matchLimit)
|
| - return matchError(JSRegExpErrorHitLimit, stack);
|
| -
|
| - /* Now start processing the operations. */
|
| -
|
| -#ifndef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
|
| - while (true)
|
| -#endif
|
| - {
|
| -
|
| -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
|
| -#define BEGIN_OPCODE(opcode) LABEL_OP_##opcode
|
| -#define NEXT_OPCODE goto *opcodeJumpTable[*stack.currentFrame->args.instructionPtr]
|
| -#else
|
| -#define BEGIN_OPCODE(opcode) case OP_##opcode
|
| -#define NEXT_OPCODE continue
|
| -#endif
|
| -
|
| -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
|
| - NEXT_OPCODE;
|
| -#else
|
| - switch (*stack.currentFrame->args.instructionPtr)
|
| -#endif
|
| - {
|
| - /* Non-capturing bracket: optimized */
|
| -
|
| - BEGIN_OPCODE(BRA):
|
| - NON_CAPTURING_BRACKET:
|
| - DPRINTF(("start bracket 0\n"));
|
| - do {
|
| - RECURSIVE_MATCH_STARTNG_NEW_GROUP(2, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
|
| - } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
|
| - DPRINTF(("bracket 0 failed\n"));
|
| - RRETURN;
|
| -
|
| - /* Skip over large extraction number data if encountered. */
|
| -
|
| - BEGIN_OPCODE(BRANUMBER):
|
| - stack.currentFrame->args.instructionPtr += 3;
|
| - NEXT_OPCODE;
|
| -
|
| - /* End of the pattern. */
|
| -
|
| - BEGIN_OPCODE(END):
|
| - md.endMatchPtr = stack.currentFrame->args.subjectPtr; /* Record where we ended */
|
| - md.endOffsetTop = stack.currentFrame->args.offsetTop; /* and how many extracts were taken */
|
| - isMatch = true;
|
| - RRETURN;
|
| -
|
| - /* Assertion brackets. Check the alternative branches in turn - the
|
| - matching won't pass the KET for an assertion. If any one branch matches,
|
| - the assertion is true. Lookbehind assertions have an OP_REVERSE item at the
|
| - start of each branch to move the current point backwards, so the code at
|
| - this level is identical to the lookahead case. */
|
| -
|
| - BEGIN_OPCODE(ASSERT):
|
| - do {
|
| - RECURSIVE_MATCH_STARTNG_NEW_GROUP(6, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
|
| - if (isMatch)
|
| - break;
|
| - stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
|
| - } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
|
| - if (*stack.currentFrame->args.instructionPtr == OP_KET)
|
| - RRETURN_NO_MATCH;
|
| -
|
| - /* Continue from after the assertion, updating the offsets high water
|
| - mark, since extracts may have been taken during the assertion. */
|
| -
|
| - advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
|
| - stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
|
| - stack.currentFrame->args.offsetTop = md.endOffsetTop;
|
| - NEXT_OPCODE;
|
| -
|
| - /* Negative assertion: all branches must fail to match */
|
| -
|
| - BEGIN_OPCODE(ASSERT_NOT):
|
| - do {
|
| - RECURSIVE_MATCH_STARTNG_NEW_GROUP(7, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, NULL);
|
| - if (isMatch)
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
|
| - } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
|
| -
|
| - stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
|
| - NEXT_OPCODE;
|
| -
|
| - /* An alternation is the end of a branch; scan along to find the end of the
|
| - bracketed group and go to there. */
|
| -
|
| - BEGIN_OPCODE(ALT):
|
| - advanceToEndOfBracket(stack.currentFrame->args.instructionPtr);
|
| - NEXT_OPCODE;
|
| -
|
| - /* BRAZERO and BRAMINZERO occur just before a bracket group, indicating
|
| - that it may occur zero times. It may repeat infinitely, or not at all -
|
| - i.e. it could be ()* or ()? in the pattern. Brackets with fixed upper
|
| - repeat limits are compiled as a number of copies, with the optional ones
|
| - preceded by BRAZERO or BRAMINZERO. */
|
| -
|
| - BEGIN_OPCODE(BRAZERO): {
|
| - stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
|
| - RECURSIVE_MATCH_STARTNG_NEW_GROUP(14, stack.currentFrame->locals.startOfRepeatingBracket, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
|
| - stack.currentFrame->args.instructionPtr = stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE;
|
| - NEXT_OPCODE;
|
| - }
|
| -
|
| - BEGIN_OPCODE(BRAMINZERO): {
|
| - stack.currentFrame->locals.startOfRepeatingBracket = stack.currentFrame->args.instructionPtr + 1;
|
| - advanceToEndOfBracket(stack.currentFrame->locals.startOfRepeatingBracket);
|
| - RECURSIVE_MATCH_STARTNG_NEW_GROUP(15, stack.currentFrame->locals.startOfRepeatingBracket + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| - }
|
| -
|
| - /* End of a group, repeated or non-repeating. If we are at the end of
|
| - an assertion "group", stop matching and return 1, but record the
|
| - current high water mark for use by positive assertions. Do this also
|
| - for the "once" (not-backup up) groups. */
|
| -
|
| - BEGIN_OPCODE(KET):
|
| - BEGIN_OPCODE(KETRMIN):
|
| - BEGIN_OPCODE(KETRMAX):
|
| - stack.currentFrame->locals.instructionPtrAtStartOfOnce = stack.currentFrame->args.instructionPtr - getLinkValue(stack.currentFrame->args.instructionPtr + 1);
|
| - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.bracketChain->bracketStart;
|
| -
|
| - /* Back up the stack of bracket start pointers. */
|
| -
|
| - stack.currentFrame->args.bracketChain = stack.currentFrame->args.bracketChain->previousBracket;
|
| -
|
| - if (*stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT || *stack.currentFrame->locals.instructionPtrAtStartOfOnce == OP_ASSERT_NOT) {
|
| - md.endOffsetTop = stack.currentFrame->args.offsetTop;
|
| - isMatch = true;
|
| - RRETURN;
|
| - }
|
| -
|
| - /* In all other cases except a conditional group we have to check the
|
| - group number back at the start and if necessary complete handling an
|
| - extraction by setting the offsets and bumping the high water mark. */
|
| -
|
| - stack.currentFrame->locals.number = *stack.currentFrame->locals.instructionPtrAtStartOfOnce - OP_BRA;
|
| -
|
| - /* For extended extraction brackets (large number), we have to fish out
|
| - the number from a dummy opcode at the start. */
|
| -
|
| - if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
|
| - stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->locals.instructionPtrAtStartOfOnce + 2 + LINK_SIZE);
|
| - stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
|
| -
|
| -#ifdef DEBUG
|
| - printf("end bracket %d", stack.currentFrame->locals.number);
|
| - printf("\n");
|
| -#endif
|
| -
|
| - /* Test for a numbered group. This includes groups called as a result
|
| - of recursion. Note that whole-pattern recursion is coded as a recurse
|
| - into group 0, so it won't be picked up here. Instead, we catch it when
|
| - the OP_END is reached. */
|
| -
|
| - if (stack.currentFrame->locals.number > 0) {
|
| - if (stack.currentFrame->locals.offset >= md.offsetMax)
|
| - md.offsetOverflow = true;
|
| - else {
|
| - md.offsetVector[stack.currentFrame->locals.offset] =
|
| - md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
|
| - md.offsetVector[stack.currentFrame->locals.offset+1] = stack.currentFrame->args.subjectPtr - md.startSubject;
|
| - if (stack.currentFrame->args.offsetTop <= stack.currentFrame->locals.offset)
|
| - stack.currentFrame->args.offsetTop = stack.currentFrame->locals.offset + 2;
|
| - }
|
| - }
|
| -
|
| - /* For a non-repeating ket, just continue at this level. This also
|
| - happens for a repeating ket if no characters were matched in the group.
|
| - This is the forcible breaking of infinite loops as implemented in Perl
|
| - 5.005. If there is an options reset, it will get obeyed in the normal
|
| - course of events. */
|
| -
|
| - if (*stack.currentFrame->args.instructionPtr == OP_KET || stack.currentFrame->args.subjectPtr == stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
|
| - stack.currentFrame->args.instructionPtr += 1 + LINK_SIZE;
|
| - NEXT_OPCODE;
|
| - }
|
| -
|
| - /* The repeating kets try the rest of the pattern or restart from the
|
| - preceding bracket, in the appropriate order. */
|
| -
|
| - if (*stack.currentFrame->args.instructionPtr == OP_KETRMIN) {
|
| - RECURSIVE_MATCH(16, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - RECURSIVE_MATCH_STARTNG_NEW_GROUP(17, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - } else { /* OP_KETRMAX */
|
| - RECURSIVE_MATCH_STARTNG_NEW_GROUP(18, stack.currentFrame->locals.instructionPtrAtStartOfOnce, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - RECURSIVE_MATCH(19, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - }
|
| - RRETURN;
|
| -
|
| - /* Start of subject. */
|
| -
|
| - BEGIN_OPCODE(CIRC):
|
| - if (stack.currentFrame->args.subjectPtr != md.startSubject)
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| -
|
| - /* After internal newline if multiline. */
|
| -
|
| - BEGIN_OPCODE(BOL):
|
| - if (stack.currentFrame->args.subjectPtr != md.startSubject && !isNewline(stack.currentFrame->args.subjectPtr[-1]))
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| -
|
| - /* End of subject. */
|
| -
|
| - BEGIN_OPCODE(DOLL):
|
| - if (stack.currentFrame->args.subjectPtr < md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| -
|
| - /* Before internal newline if multiline. */
|
| -
|
| - BEGIN_OPCODE(EOL):
|
| - if (stack.currentFrame->args.subjectPtr < md.endSubject && !isNewline(*stack.currentFrame->args.subjectPtr))
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| -
|
| - /* Word boundary assertions */
|
| -
|
| - BEGIN_OPCODE(NOT_WORD_BOUNDARY):
|
| - BEGIN_OPCODE(WORD_BOUNDARY): {
|
| - bool currentCharIsWordChar = false;
|
| - bool previousCharIsWordChar = false;
|
| -
|
| - if (stack.currentFrame->args.subjectPtr > md.startSubject)
|
| - previousCharIsWordChar = isWordChar(stack.currentFrame->args.subjectPtr[-1]);
|
| - if (stack.currentFrame->args.subjectPtr < md.endSubject)
|
| - currentCharIsWordChar = isWordChar(*stack.currentFrame->args.subjectPtr);
|
| -
|
| - /* Now see if the situation is what we want */
|
| - bool wordBoundaryDesired = (*stack.currentFrame->args.instructionPtr++ == OP_WORD_BOUNDARY);
|
| - if (wordBoundaryDesired ? currentCharIsWordChar == previousCharIsWordChar : currentCharIsWordChar != previousCharIsWordChar)
|
| - RRETURN_NO_MATCH;
|
| - NEXT_OPCODE;
|
| - }
|
| -
|
| - /* Match a single character type; inline for speed */
|
| -
|
| - BEGIN_OPCODE(NOT_NEWLINE):
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - if (isNewline(*stack.currentFrame->args.subjectPtr++))
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| -
|
| - BEGIN_OPCODE(NOT_DIGIT):
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - if (isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| -
|
| - BEGIN_OPCODE(DIGIT):
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr++))
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| -
|
| - BEGIN_OPCODE(NOT_WHITESPACE):
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - if (isSpaceChar(*stack.currentFrame->args.subjectPtr++))
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| -
|
| - BEGIN_OPCODE(WHITESPACE):
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - if (!isSpaceChar(*stack.currentFrame->args.subjectPtr++))
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| -
|
| - BEGIN_OPCODE(NOT_WORDCHAR):
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - if (isWordChar(*stack.currentFrame->args.subjectPtr++))
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| -
|
| - BEGIN_OPCODE(WORDCHAR):
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - if (!isWordChar(*stack.currentFrame->args.subjectPtr++))
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - NEXT_OPCODE;
|
| -
|
| - /* Match a back reference, possibly repeatedly. Look past the end of the
|
| - item to see if there is repeat information following. The code is similar
|
| - to that for character classes, but repeated for efficiency. Then obey
|
| - similar code to character type repeats - written out again for speed.
|
| - However, if the referenced string is the empty string, always treat
|
| - it as matched, any number of times (otherwise there could be infinite
|
| - loops). */
|
| -
|
| - BEGIN_OPCODE(REF):
|
| - stack.currentFrame->locals.offset = get2ByteValue(stack.currentFrame->args.instructionPtr + 1) << 1; /* Doubled ref number */
|
| - stack.currentFrame->args.instructionPtr += 3; /* Advance past item */
|
| -
|
| - /* If the reference is unset, set the length to be longer than the amount
|
| - of subject left; this ensures that every attempt at a match fails. We
|
| - can't just fail here, because of the possibility of quantifiers with zero
|
| - minima. */
|
| -
|
| - if (stack.currentFrame->locals.offset >= stack.currentFrame->args.offsetTop || md.offsetVector[stack.currentFrame->locals.offset] < 0)
|
| - stack.currentFrame->locals.length = 0;
|
| - else
|
| - stack.currentFrame->locals.length = md.offsetVector[stack.currentFrame->locals.offset+1] - md.offsetVector[stack.currentFrame->locals.offset];
|
| -
|
| - /* Set up for repetition, or handle the non-repeated case */
|
| -
|
| - switch (*stack.currentFrame->args.instructionPtr) {
|
| - case OP_CRSTAR:
|
| - case OP_CRMINSTAR:
|
| - case OP_CRPLUS:
|
| - case OP_CRMINPLUS:
|
| - case OP_CRQUERY:
|
| - case OP_CRMINQUERY:
|
| - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
|
| - break;
|
| -
|
| - case OP_CRRANGE:
|
| - case OP_CRMINRANGE:
|
| - minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
|
| - min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
|
| - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
|
| - if (stack.currentFrame->locals.max == 0)
|
| - stack.currentFrame->locals.max = INT_MAX;
|
| - stack.currentFrame->args.instructionPtr += 5;
|
| - break;
|
| -
|
| - default: /* No repeat follows */
|
| - if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
|
| - NEXT_OPCODE;
|
| - }
|
| -
|
| - /* If the length of the reference is zero, just continue with the
|
| - main loop. */
|
| -
|
| - if (stack.currentFrame->locals.length == 0)
|
| - NEXT_OPCODE;
|
| -
|
| - /* First, ensure the minimum number of matches are present. */
|
| -
|
| - for (int i = 1; i <= min; i++) {
|
| - if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
|
| - }
|
| -
|
| - /* If min = max, continue at the same level without recursion.
|
| - They are not both allowed to be zero. */
|
| -
|
| - if (min == stack.currentFrame->locals.max)
|
| - NEXT_OPCODE;
|
| -
|
| - /* If minimizing, keep trying and advancing the pointer */
|
| -
|
| - if (minimize) {
|
| - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
|
| - RECURSIVE_MATCH(20, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || !matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
|
| - RRETURN;
|
| - stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
|
| - }
|
| - /* Control never reaches here */
|
| - }
|
| -
|
| - /* If maximizing, find the longest string and work backwards */
|
| -
|
| - else {
|
| - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (!matchRef(stack.currentFrame->locals.offset, stack.currentFrame->args.subjectPtr, stack.currentFrame->locals.length, md))
|
| - break;
|
| - stack.currentFrame->args.subjectPtr += stack.currentFrame->locals.length;
|
| - }
|
| - while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
|
| - RECURSIVE_MATCH(21, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - stack.currentFrame->args.subjectPtr -= stack.currentFrame->locals.length;
|
| - }
|
| - RRETURN_NO_MATCH;
|
| - }
|
| - /* Control never reaches here */
|
| -
|
| - /* Match a bit-mapped character class, possibly repeatedly. This op code is
|
| - used when all the characters in the class have values in the range 0-255,
|
| - and either the matching is caseful, or the characters are in the range
|
| - 0-127 when UTF-8 processing is enabled. The only difference between
|
| - OP_CLASS and OP_NCLASS occurs when a data character outside the range is
|
| - encountered.
|
| -
|
| - First, look past the end of the item to see if there is repeat information
|
| - following. Then obey similar code to character type repeats - written out
|
| - again for speed. */
|
| -
|
| - BEGIN_OPCODE(NCLASS):
|
| - BEGIN_OPCODE(CLASS):
|
| - stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1; /* Save for matching */
|
| - stack.currentFrame->args.instructionPtr += 33; /* Advance past the item */
|
| -
|
| - switch (*stack.currentFrame->args.instructionPtr) {
|
| - case OP_CRSTAR:
|
| - case OP_CRMINSTAR:
|
| - case OP_CRPLUS:
|
| - case OP_CRMINPLUS:
|
| - case OP_CRQUERY:
|
| - case OP_CRMINQUERY:
|
| - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
|
| - break;
|
| -
|
| - case OP_CRRANGE:
|
| - case OP_CRMINRANGE:
|
| - minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
|
| - min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
|
| - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
|
| - if (stack.currentFrame->locals.max == 0)
|
| - stack.currentFrame->locals.max = INT_MAX;
|
| - stack.currentFrame->args.instructionPtr += 5;
|
| - break;
|
| -
|
| - default: /* No repeat follows */
|
| - min = stack.currentFrame->locals.max = 1;
|
| - break;
|
| - }
|
| -
|
| - /* First, ensure the minimum number of matches are present. */
|
| -
|
| - for (int i = 1; i <= min; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - int c = *stack.currentFrame->args.subjectPtr++;
|
| - if (c > 255) {
|
| - if (stack.currentFrame->locals.data[-1] == OP_CLASS)
|
| - RRETURN_NO_MATCH;
|
| - } else {
|
| - if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
|
| - RRETURN_NO_MATCH;
|
| - }
|
| - }
|
| -
|
| - /* If max == min we can continue with the main loop without the
|
| - need to recurse. */
|
| -
|
| - if (min == stack.currentFrame->locals.max)
|
| - NEXT_OPCODE;
|
| -
|
| - /* If minimizing, keep testing the rest of the expression and advancing
|
| - the pointer while it matches the class. */
|
| - if (minimize) {
|
| - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
|
| - RECURSIVE_MATCH(22, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN;
|
| - int c = *stack.currentFrame->args.subjectPtr++;
|
| - if (c > 255) {
|
| - if (stack.currentFrame->locals.data[-1] == OP_CLASS)
|
| - RRETURN;
|
| - } else {
|
| - if ((stack.currentFrame->locals.data[c/8] & (1 << (c&7))) == 0)
|
| - RRETURN;
|
| - }
|
| - }
|
| - /* Control never reaches here */
|
| - }
|
| - /* If maximizing, find the longest possible run, then work backwards. */
|
| - else {
|
| - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
|
| -
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - break;
|
| - int c = *stack.currentFrame->args.subjectPtr;
|
| - if (c > 255) {
|
| - if (stack.currentFrame->locals.data[-1] == OP_CLASS)
|
| - break;
|
| - } else {
|
| - if (!(stack.currentFrame->locals.data[c / 8] & (1 << (c & 7))))
|
| - break;
|
| - }
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - for (;;) {
|
| - RECURSIVE_MATCH(24, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
|
| - break; /* Stop if tried at original pos */
|
| - }
|
| -
|
| - RRETURN;
|
| - }
|
| - /* Control never reaches here */
|
| -
|
| - /* Match an extended character class. */
|
| -
|
| - BEGIN_OPCODE(XCLASS):
|
| - stack.currentFrame->locals.data = stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE; /* Save for matching */
|
| - stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1); /* Advance past the item */
|
| -
|
| - switch (*stack.currentFrame->args.instructionPtr) {
|
| - case OP_CRSTAR:
|
| - case OP_CRMINSTAR:
|
| - case OP_CRPLUS:
|
| - case OP_CRMINPLUS:
|
| - case OP_CRQUERY:
|
| - case OP_CRMINQUERY:
|
| - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_CRSTAR, minimize, min, stack.currentFrame->locals.max);
|
| - break;
|
| -
|
| - case OP_CRRANGE:
|
| - case OP_CRMINRANGE:
|
| - minimize = (*stack.currentFrame->args.instructionPtr == OP_CRMINRANGE);
|
| - min = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
|
| - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 3);
|
| - if (stack.currentFrame->locals.max == 0)
|
| - stack.currentFrame->locals.max = INT_MAX;
|
| - stack.currentFrame->args.instructionPtr += 5;
|
| - break;
|
| -
|
| - default: /* No repeat follows */
|
| - min = stack.currentFrame->locals.max = 1;
|
| - }
|
| -
|
| - /* First, ensure the minimum number of matches are present. */
|
| -
|
| - for (int i = 1; i <= min; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - int c = *stack.currentFrame->args.subjectPtr++;
|
| - if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
|
| - RRETURN_NO_MATCH;
|
| - }
|
| -
|
| - /* If max == min we can continue with the main loop without the
|
| - need to recurse. */
|
| -
|
| - if (min == stack.currentFrame->locals.max)
|
| - NEXT_OPCODE;
|
| -
|
| - /* If minimizing, keep testing the rest of the expression and advancing
|
| - the pointer while it matches the class. */
|
| -
|
| - if (minimize) {
|
| - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
|
| - RECURSIVE_MATCH(26, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN;
|
| - int c = *stack.currentFrame->args.subjectPtr++;
|
| - if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
|
| - RRETURN;
|
| - }
|
| - /* Control never reaches here */
|
| - }
|
| -
|
| - /* If maximizing, find the longest possible run, then work backwards. */
|
| -
|
| - else {
|
| - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - break;
|
| - int c = *stack.currentFrame->args.subjectPtr;
|
| - if (!kjs_pcre_xclass(c, stack.currentFrame->locals.data))
|
| - break;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - for(;;) {
|
| - RECURSIVE_MATCH(27, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
|
| - break; /* Stop if tried at original pos */
|
| - }
|
| - RRETURN;
|
| - }
|
| -
|
| - /* Control never reaches here */
|
| -
|
| - /* Match a single character, casefully */
|
| -
|
| - BEGIN_OPCODE(CHAR):
|
| - stack.currentFrame->locals.length = 1;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
|
| - stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - if (stack.currentFrame->locals.fc != *stack.currentFrame->args.subjectPtr++)
|
| - RRETURN_NO_MATCH;
|
| - NEXT_OPCODE;
|
| -
|
| - /* Match a single character, caselessly */
|
| -
|
| - BEGIN_OPCODE(CHAR_IGNORING_CASE): {
|
| - stack.currentFrame->locals.length = 1;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
|
| - stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - int dc = *stack.currentFrame->args.subjectPtr++;
|
| - if (stack.currentFrame->locals.fc != dc && kjs_pcre_ucp_othercase(stack.currentFrame->locals.fc) != dc)
|
| - RRETURN_NO_MATCH;
|
| - NEXT_OPCODE;
|
| - }
|
| -
|
| - /* Match a single ASCII character. */
|
| -
|
| - BEGIN_OPCODE(ASCII_CHAR):
|
| - if (md.endSubject == stack.currentFrame->args.subjectPtr)
|
| - RRETURN_NO_MATCH;
|
| - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->args.instructionPtr[1])
|
| - RRETURN_NO_MATCH;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - stack.currentFrame->args.instructionPtr += 2;
|
| - NEXT_OPCODE;
|
| -
|
| - /* Match one of two cases of an ASCII letter. */
|
| -
|
| - BEGIN_OPCODE(ASCII_LETTER_IGNORING_CASE):
|
| - if (md.endSubject == stack.currentFrame->args.subjectPtr)
|
| - RRETURN_NO_MATCH;
|
| - if ((*stack.currentFrame->args.subjectPtr | 0x20) != stack.currentFrame->args.instructionPtr[1])
|
| - RRETURN_NO_MATCH;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - stack.currentFrame->args.instructionPtr += 2;
|
| - NEXT_OPCODE;
|
| -
|
| - /* Match a single character repeatedly; different opcodes share code. */
|
| -
|
| - BEGIN_OPCODE(EXACT):
|
| - min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
|
| - minimize = false;
|
| - stack.currentFrame->args.instructionPtr += 3;
|
| - goto REPEATCHAR;
|
| -
|
| - BEGIN_OPCODE(UPTO):
|
| - BEGIN_OPCODE(MINUPTO):
|
| - min = 0;
|
| - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
|
| - minimize = *stack.currentFrame->args.instructionPtr == OP_MINUPTO;
|
| - stack.currentFrame->args.instructionPtr += 3;
|
| - goto REPEATCHAR;
|
| -
|
| - BEGIN_OPCODE(STAR):
|
| - BEGIN_OPCODE(MINSTAR):
|
| - BEGIN_OPCODE(PLUS):
|
| - BEGIN_OPCODE(MINPLUS):
|
| - BEGIN_OPCODE(QUERY):
|
| - BEGIN_OPCODE(MINQUERY):
|
| - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_STAR, minimize, min, stack.currentFrame->locals.max);
|
| -
|
| - /* Common code for all repeated single-character matches. We can give
|
| - up quickly if there are fewer than the minimum number of characters left in
|
| - the subject. */
|
| -
|
| - REPEATCHAR:
|
| -
|
| - stack.currentFrame->locals.length = 1;
|
| - getUTF8CharAndIncrementLength(stack.currentFrame->locals.fc, stack.currentFrame->args.instructionPtr, stack.currentFrame->locals.length);
|
| - if (min * (stack.currentFrame->locals.fc > 0xFFFF ? 2 : 1) > md.endSubject - stack.currentFrame->args.subjectPtr)
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr += stack.currentFrame->locals.length;
|
| -
|
| - if (stack.currentFrame->locals.fc <= 0xFFFF) {
|
| - int othercase;
|
| - othercase = md.ignoreCase ? kjs_pcre_ucp_othercase(stack.currentFrame->locals.fc) : -1;
|
| -
|
| - for (int i = 1; i <= min; i++) {
|
| - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
|
| - RRETURN_NO_MATCH;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| -
|
| - if (min == stack.currentFrame->locals.max)
|
| - NEXT_OPCODE;
|
| -
|
| - if (minimize) {
|
| - stack.currentFrame->locals.repeatOthercase = othercase;
|
| - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
|
| - RECURSIVE_MATCH(28, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN;
|
| - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.repeatOthercase)
|
| - RRETURN;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - /* Control never reaches here */
|
| - } else {
|
| - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - break;
|
| - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc && *stack.currentFrame->args.subjectPtr != othercase)
|
| - break;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
|
| - RECURSIVE_MATCH(29, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - --stack.currentFrame->args.subjectPtr;
|
| - }
|
| - RRETURN_NO_MATCH;
|
| - }
|
| - /* Control never reaches here */
|
| - } else {
|
| - /* No case on surrogate pairs, so no need to bother with "othercase". */
|
| -
|
| - for (int i = 1; i <= min; i++) {
|
| - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.subjectPtr += 2;
|
| - }
|
| -
|
| - if (min == stack.currentFrame->locals.max)
|
| - NEXT_OPCODE;
|
| -
|
| - if (minimize) {
|
| - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
|
| - RECURSIVE_MATCH(30, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN;
|
| - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
|
| - RRETURN;
|
| - stack.currentFrame->args.subjectPtr += 2;
|
| - }
|
| - /* Control never reaches here */
|
| - } else {
|
| - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr > md.endSubject - 2)
|
| - break;
|
| - if (*stack.currentFrame->args.subjectPtr != stack.currentFrame->locals.fc)
|
| - break;
|
| - stack.currentFrame->args.subjectPtr += 2;
|
| - }
|
| - while (stack.currentFrame->args.subjectPtr >= stack.currentFrame->locals.subjectPtrAtStartOfInstruction) {
|
| - RECURSIVE_MATCH(31, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - stack.currentFrame->args.subjectPtr -= 2;
|
| - }
|
| - RRETURN_NO_MATCH;
|
| - }
|
| - /* Control never reaches here */
|
| - }
|
| - /* Control never reaches here */
|
| -
|
| - /* Match a negated single one-byte character. */
|
| -
|
| - BEGIN_OPCODE(NOT): {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->args.instructionPtr++;
|
| - int c = *stack.currentFrame->args.subjectPtr++;
|
| - if (md.ignoreCase) {
|
| - if (c < 128)
|
| - c = toLowerCase(c);
|
| - if (toLowerCase(*stack.currentFrame->args.instructionPtr++) == c)
|
| - RRETURN_NO_MATCH;
|
| - } else {
|
| - if (*stack.currentFrame->args.instructionPtr++ == c)
|
| - RRETURN_NO_MATCH;
|
| - }
|
| - NEXT_OPCODE;
|
| - }
|
| -
|
| - /* Match a negated single one-byte character repeatedly. This is almost a
|
| - repeat of the code for a repeated single character, but I haven't found a
|
| - nice way of commoning these up that doesn't require a test of the
|
| - positive/negative option for each character match. Maybe that wouldn't add
|
| - very much to the time taken, but character matching *is* what this is all
|
| - about... */
|
| -
|
| - BEGIN_OPCODE(NOTEXACT):
|
| - min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
|
| - minimize = false;
|
| - stack.currentFrame->args.instructionPtr += 3;
|
| - goto REPEATNOTCHAR;
|
| -
|
| - BEGIN_OPCODE(NOTUPTO):
|
| - BEGIN_OPCODE(NOTMINUPTO):
|
| - min = 0;
|
| - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
|
| - minimize = *stack.currentFrame->args.instructionPtr == OP_NOTMINUPTO;
|
| - stack.currentFrame->args.instructionPtr += 3;
|
| - goto REPEATNOTCHAR;
|
| -
|
| - BEGIN_OPCODE(NOTSTAR):
|
| - BEGIN_OPCODE(NOTMINSTAR):
|
| - BEGIN_OPCODE(NOTPLUS):
|
| - BEGIN_OPCODE(NOTMINPLUS):
|
| - BEGIN_OPCODE(NOTQUERY):
|
| - BEGIN_OPCODE(NOTMINQUERY):
|
| - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_NOTSTAR, minimize, min, stack.currentFrame->locals.max);
|
| -
|
| - /* Common code for all repeated single-byte matches. We can give up quickly
|
| - if there are fewer than the minimum number of bytes left in the
|
| - subject. */
|
| -
|
| - REPEATNOTCHAR:
|
| - if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
|
| - RRETURN_NO_MATCH;
|
| - stack.currentFrame->locals.fc = *stack.currentFrame->args.instructionPtr++;
|
| -
|
| - /* The code is duplicated for the caseless and caseful cases, for speed,
|
| - since matching characters is likely to be quite common. First, ensure the
|
| - minimum number of matches are present. If min = max, continue at the same
|
| - level without recursing. Otherwise, if minimizing, keep trying the rest of
|
| - the expression and advancing one matching character if failing, up to the
|
| - maximum. Alternatively, if maximizing, find the maximum number of
|
| - characters and work backwards. */
|
| -
|
| - DPRINTF(("negative matching %c{%d,%d}\n", stack.currentFrame->locals.fc, min, stack.currentFrame->locals.max));
|
| -
|
| - if (md.ignoreCase) {
|
| - if (stack.currentFrame->locals.fc < 128)
|
| - stack.currentFrame->locals.fc = toLowerCase(stack.currentFrame->locals.fc);
|
| -
|
| - for (int i = 1; i <= min; i++) {
|
| - int d = *stack.currentFrame->args.subjectPtr++;
|
| - if (d < 128)
|
| - d = toLowerCase(d);
|
| - if (stack.currentFrame->locals.fc == d)
|
| - RRETURN_NO_MATCH;
|
| - }
|
| -
|
| - if (min == stack.currentFrame->locals.max)
|
| - NEXT_OPCODE;
|
| -
|
| - if (minimize) {
|
| - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
|
| - RECURSIVE_MATCH(38, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - int d = *stack.currentFrame->args.subjectPtr++;
|
| - if (d < 128)
|
| - d = toLowerCase(d);
|
| - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
|
| - RRETURN;
|
| - }
|
| - /* Control never reaches here */
|
| - }
|
| -
|
| - /* Maximize case */
|
| -
|
| - else {
|
| - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
|
| -
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - break;
|
| - int d = *stack.currentFrame->args.subjectPtr;
|
| - if (d < 128)
|
| - d = toLowerCase(d);
|
| - if (stack.currentFrame->locals.fc == d)
|
| - break;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - for (;;) {
|
| - RECURSIVE_MATCH(40, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
|
| - break; /* Stop if tried at original pos */
|
| - }
|
| -
|
| - RRETURN;
|
| - }
|
| - /* Control never reaches here */
|
| - }
|
| -
|
| - /* Caseful comparisons */
|
| -
|
| - else {
|
| - for (int i = 1; i <= min; i++) {
|
| - int d = *stack.currentFrame->args.subjectPtr++;
|
| - if (stack.currentFrame->locals.fc == d)
|
| - RRETURN_NO_MATCH;
|
| - }
|
| -
|
| - if (min == stack.currentFrame->locals.max)
|
| - NEXT_OPCODE;
|
| -
|
| - if (minimize) {
|
| - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
|
| - RECURSIVE_MATCH(42, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - int d = *stack.currentFrame->args.subjectPtr++;
|
| - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject || stack.currentFrame->locals.fc == d)
|
| - RRETURN;
|
| - }
|
| - /* Control never reaches here */
|
| - }
|
| -
|
| - /* Maximize case */
|
| -
|
| - else {
|
| - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr;
|
| -
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - break;
|
| - int d = *stack.currentFrame->args.subjectPtr;
|
| - if (stack.currentFrame->locals.fc == d)
|
| - break;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - for (;;) {
|
| - RECURSIVE_MATCH(44, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
|
| - break; /* Stop if tried at original pos */
|
| - }
|
| -
|
| - RRETURN;
|
| - }
|
| - }
|
| - /* Control never reaches here */
|
| -
|
| - /* Match a single character type repeatedly; several different opcodes
|
| - share code. This is very similar to the code for single characters, but we
|
| - repeat it in the interests of efficiency. */
|
| -
|
| - BEGIN_OPCODE(TYPEEXACT):
|
| - min = stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
|
| - minimize = true;
|
| - stack.currentFrame->args.instructionPtr += 3;
|
| - goto REPEATTYPE;
|
| -
|
| - BEGIN_OPCODE(TYPEUPTO):
|
| - BEGIN_OPCODE(TYPEMINUPTO):
|
| - min = 0;
|
| - stack.currentFrame->locals.max = get2ByteValue(stack.currentFrame->args.instructionPtr + 1);
|
| - minimize = *stack.currentFrame->args.instructionPtr == OP_TYPEMINUPTO;
|
| - stack.currentFrame->args.instructionPtr += 3;
|
| - goto REPEATTYPE;
|
| -
|
| - BEGIN_OPCODE(TYPESTAR):
|
| - BEGIN_OPCODE(TYPEMINSTAR):
|
| - BEGIN_OPCODE(TYPEPLUS):
|
| - BEGIN_OPCODE(TYPEMINPLUS):
|
| - BEGIN_OPCODE(TYPEQUERY):
|
| - BEGIN_OPCODE(TYPEMINQUERY):
|
| - repeatInformationFromInstructionOffset(*stack.currentFrame->args.instructionPtr++ - OP_TYPESTAR, minimize, min, stack.currentFrame->locals.max);
|
| -
|
| - /* Common code for all repeated single character type matches. Note that
|
| - in UTF-8 mode, '.' matches a character of any length, but for the other
|
| - character types, the valid characters are all one-byte long. */
|
| -
|
| - REPEATTYPE:
|
| - stack.currentFrame->locals.ctype = *stack.currentFrame->args.instructionPtr++; /* Code for the character type */
|
| -
|
| - /* First, ensure the minimum number of matches are present. Use inline
|
| - code for maximizing the speed, and do the type test once at the start
|
| - (i.e. keep it out of the loop). Also we can test that there are at least
|
| - the minimum number of characters before we start. */
|
| -
|
| - if (min > md.endSubject - stack.currentFrame->args.subjectPtr)
|
| - RRETURN_NO_MATCH;
|
| - if (min > 0) {
|
| - switch (stack.currentFrame->locals.ctype) {
|
| - case OP_NOT_NEWLINE:
|
| - for (int i = 1; i <= min; i++) {
|
| - if (isNewline(*stack.currentFrame->args.subjectPtr))
|
| - RRETURN_NO_MATCH;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - case OP_NOT_DIGIT:
|
| - for (int i = 1; i <= min; i++) {
|
| - if (isASCIIDigit(*stack.currentFrame->args.subjectPtr))
|
| - RRETURN_NO_MATCH;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - case OP_DIGIT:
|
| - for (int i = 1; i <= min; i++) {
|
| - if (!isASCIIDigit(*stack.currentFrame->args.subjectPtr))
|
| - RRETURN_NO_MATCH;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - case OP_NOT_WHITESPACE:
|
| - for (int i = 1; i <= min; i++) {
|
| - if (isSpaceChar(*stack.currentFrame->args.subjectPtr))
|
| - RRETURN_NO_MATCH;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - case OP_WHITESPACE:
|
| - for (int i = 1; i <= min; i++) {
|
| - if (!isSpaceChar(*stack.currentFrame->args.subjectPtr))
|
| - RRETURN_NO_MATCH;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - case OP_NOT_WORDCHAR:
|
| - for (int i = 1; i <= min; i++) {
|
| - if (isWordChar(*stack.currentFrame->args.subjectPtr))
|
| - RRETURN_NO_MATCH;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - case OP_WORDCHAR:
|
| - for (int i = 1; i <= min; i++) {
|
| - if (!isWordChar(*stack.currentFrame->args.subjectPtr))
|
| - RRETURN_NO_MATCH;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - default:
|
| - ASSERT_NOT_REACHED();
|
| - return matchError(JSRegExpErrorInternal, stack);
|
| - } /* End switch(stack.currentFrame->locals.ctype) */
|
| - }
|
| -
|
| - /* If min = max, continue at the same level without recursing */
|
| -
|
| - if (min == stack.currentFrame->locals.max)
|
| - NEXT_OPCODE;
|
| -
|
| - /* If minimizing, we have to test the rest of the pattern before each
|
| - subsequent match. */
|
| -
|
| - if (minimize) {
|
| - for (stack.currentFrame->locals.fi = min;; stack.currentFrame->locals.fi++) {
|
| - RECURSIVE_MATCH(48, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - if (stack.currentFrame->locals.fi >= stack.currentFrame->locals.max || stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - RRETURN;
|
| -
|
| - int c = *stack.currentFrame->args.subjectPtr++;
|
| - switch (stack.currentFrame->locals.ctype) {
|
| - case OP_NOT_NEWLINE:
|
| - if (isNewline(c))
|
| - RRETURN;
|
| - break;
|
| -
|
| - case OP_NOT_DIGIT:
|
| - if (isASCIIDigit(c))
|
| - RRETURN;
|
| - break;
|
| -
|
| - case OP_DIGIT:
|
| - if (!isASCIIDigit(c))
|
| - RRETURN;
|
| - break;
|
| -
|
| - case OP_NOT_WHITESPACE:
|
| - if (isSpaceChar(c))
|
| - RRETURN;
|
| - break;
|
| -
|
| - case OP_WHITESPACE:
|
| - if (!isSpaceChar(c))
|
| - RRETURN;
|
| - break;
|
| -
|
| - case OP_NOT_WORDCHAR:
|
| - if (isWordChar(c))
|
| - RRETURN;
|
| - break;
|
| -
|
| - case OP_WORDCHAR:
|
| - if (!isWordChar(c))
|
| - RRETURN;
|
| - break;
|
| -
|
| - default:
|
| - ASSERT_NOT_REACHED();
|
| - return matchError(JSRegExpErrorInternal, stack);
|
| - }
|
| - }
|
| - /* Control never reaches here */
|
| - }
|
| -
|
| - /* If maximizing it is worth using inline code for speed, doing the type
|
| - test once at the start (i.e. keep it out of the loop). */
|
| -
|
| - else {
|
| - stack.currentFrame->locals.subjectPtrAtStartOfInstruction = stack.currentFrame->args.subjectPtr; /* Remember where we started */
|
| -
|
| - switch (stack.currentFrame->locals.ctype) {
|
| - case OP_NOT_NEWLINE:
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject || isNewline(*stack.currentFrame->args.subjectPtr))
|
| - break;
|
| - stack.currentFrame->args.subjectPtr++;
|
| - }
|
| - break;
|
| -
|
| - case OP_NOT_DIGIT:
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - break;
|
| - int c = *stack.currentFrame->args.subjectPtr;
|
| - if (isASCIIDigit(c))
|
| - break;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - case OP_DIGIT:
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - break;
|
| - int c = *stack.currentFrame->args.subjectPtr;
|
| - if (!isASCIIDigit(c))
|
| - break;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - case OP_NOT_WHITESPACE:
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - break;
|
| - int c = *stack.currentFrame->args.subjectPtr;
|
| - if (isSpaceChar(c))
|
| - break;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - case OP_WHITESPACE:
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - break;
|
| - int c = *stack.currentFrame->args.subjectPtr;
|
| - if (!isSpaceChar(c))
|
| - break;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - case OP_NOT_WORDCHAR:
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - break;
|
| - int c = *stack.currentFrame->args.subjectPtr;
|
| - if (isWordChar(c))
|
| - break;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - case OP_WORDCHAR:
|
| - for (int i = min; i < stack.currentFrame->locals.max; i++) {
|
| - if (stack.currentFrame->args.subjectPtr >= md.endSubject)
|
| - break;
|
| - int c = *stack.currentFrame->args.subjectPtr;
|
| - if (!isWordChar(c))
|
| - break;
|
| - ++stack.currentFrame->args.subjectPtr;
|
| - }
|
| - break;
|
| -
|
| - default:
|
| - ASSERT_NOT_REACHED();
|
| - return matchError(JSRegExpErrorInternal, stack);
|
| - }
|
| -
|
| - /* stack.currentFrame->args.subjectPtr is now past the end of the maximum run */
|
| -
|
| - for (;;) {
|
| - RECURSIVE_MATCH(52, stack.currentFrame->args.instructionPtr, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - if (stack.currentFrame->args.subjectPtr-- == stack.currentFrame->locals.subjectPtrAtStartOfInstruction)
|
| - break; /* Stop if tried at original pos */
|
| - }
|
| -
|
| - /* Get here if we can't make it match with any permitted repetitions */
|
| -
|
| - RRETURN;
|
| - }
|
| - /* Control never reaches here */
|
| -
|
| - BEGIN_OPCODE(CRMINPLUS):
|
| - BEGIN_OPCODE(CRMINQUERY):
|
| - BEGIN_OPCODE(CRMINRANGE):
|
| - BEGIN_OPCODE(CRMINSTAR):
|
| - BEGIN_OPCODE(CRPLUS):
|
| - BEGIN_OPCODE(CRQUERY):
|
| - BEGIN_OPCODE(CRRANGE):
|
| - BEGIN_OPCODE(CRSTAR):
|
| - ASSERT_NOT_REACHED();
|
| - return matchError(JSRegExpErrorInternal, stack);
|
| -
|
| -#ifdef USE_COMPUTED_GOTO_FOR_MATCH_OPCODE_LOOP
|
| - CAPTURING_BRACKET:
|
| -#else
|
| - default:
|
| -#endif
|
| - /* Opening capturing bracket. If there is space in the offset vector, save
|
| - the current subject position in the working slot at the top of the vector. We
|
| - mustn't change the current values of the data slot, because they may be set
|
| - from a previous iteration of this group, and be referred to by a reference
|
| - inside the group.
|
| -
|
| - If the bracket fails to match, we need to restore this value and also the
|
| - values of the final offsets, in case they were set by a previous iteration of
|
| - the same bracket.
|
| -
|
| - If there isn't enough space in the offset vector, treat this as if it were a
|
| - non-capturing bracket. Don't worry about setting the flag for the error case
|
| - here; that is handled in the code for KET. */
|
| -
|
| - ASSERT(*stack.currentFrame->args.instructionPtr > OP_BRA);
|
| -
|
| - stack.currentFrame->locals.number = *stack.currentFrame->args.instructionPtr - OP_BRA;
|
| -
|
| - /* For extended extraction brackets (large number), we have to fish out the
|
| - number from a dummy opcode at the start. */
|
| -
|
| - if (stack.currentFrame->locals.number > EXTRACT_BASIC_MAX)
|
| - stack.currentFrame->locals.number = get2ByteValue(stack.currentFrame->args.instructionPtr + 2 + LINK_SIZE);
|
| - stack.currentFrame->locals.offset = stack.currentFrame->locals.number << 1;
|
| -
|
| -#ifdef DEBUG
|
| - printf("start bracket %d subject=", stack.currentFrame->locals.number);
|
| - pchars(stack.currentFrame->args.subjectPtr, 16, true, md);
|
| - printf("\n");
|
| -#endif
|
| -
|
| - if (stack.currentFrame->locals.offset < md.offsetMax) {
|
| - stack.currentFrame->locals.saveOffset1 = md.offsetVector[stack.currentFrame->locals.offset];
|
| - stack.currentFrame->locals.saveOffset2 = md.offsetVector[stack.currentFrame->locals.offset + 1];
|
| - stack.currentFrame->locals.saveOffset3 = md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number];
|
| -
|
| - DPRINTF(("saving %d %d %d\n", stack.currentFrame->locals.saveOffset1, stack.currentFrame->locals.saveOffset2, stack.currentFrame->locals.saveOffset3));
|
| - md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->args.subjectPtr - md.startSubject;
|
| -
|
| - do {
|
| - RECURSIVE_MATCH_STARTNG_NEW_GROUP(1, stack.currentFrame->args.instructionPtr + 1 + LINK_SIZE, stack.currentFrame->args.bracketChain);
|
| - if (isMatch)
|
| - RRETURN;
|
| - stack.currentFrame->args.instructionPtr += getLinkValue(stack.currentFrame->args.instructionPtr + 1);
|
| - } while (*stack.currentFrame->args.instructionPtr == OP_ALT);
|
| -
|
| - DPRINTF(("bracket %d failed\n", stack.currentFrame->locals.number));
|
| -
|
| - md.offsetVector[stack.currentFrame->locals.offset] = stack.currentFrame->locals.saveOffset1;
|
| - md.offsetVector[stack.currentFrame->locals.offset + 1] = stack.currentFrame->locals.saveOffset2;
|
| - md.offsetVector[md.offsetEnd - stack.currentFrame->locals.number] = stack.currentFrame->locals.saveOffset3;
|
| -
|
| - RRETURN;
|
| - }
|
| -
|
| - /* Insufficient room for saving captured contents */
|
| -
|
| - goto NON_CAPTURING_BRACKET;
|
| - }
|
| -
|
| - /* Do not stick any code in here without much thought; it is assumed
|
| - that "continue" in the code above comes out to here to repeat the main
|
| - loop. */
|
| -
|
| - } /* End of main loop */
|
| -
|
| - ASSERT_NOT_REACHED();
|
| -
|
| -#ifndef USE_COMPUTED_GOTO_FOR_MATCH_RECURSION
|
| -
|
| -RRETURN_SWITCH:
|
| - switch (stack.currentFrame->returnLocation) {
|
| - case 0: goto RETURN;
|
| - case 1: goto RRETURN_1;
|
| - case 2: goto RRETURN_2;
|
| - case 6: goto RRETURN_6;
|
| - case 7: goto RRETURN_7;
|
| - case 14: goto RRETURN_14;
|
| - case 15: goto RRETURN_15;
|
| - case 16: goto RRETURN_16;
|
| - case 17: goto RRETURN_17;
|
| - case 18: goto RRETURN_18;
|
| - case 19: goto RRETURN_19;
|
| - case 20: goto RRETURN_20;
|
| - case 21: goto RRETURN_21;
|
| - case 22: goto RRETURN_22;
|
| - case 24: goto RRETURN_24;
|
| - case 26: goto RRETURN_26;
|
| - case 27: goto RRETURN_27;
|
| - case 28: goto RRETURN_28;
|
| - case 29: goto RRETURN_29;
|
| - case 30: goto RRETURN_30;
|
| - case 31: goto RRETURN_31;
|
| - case 38: goto RRETURN_38;
|
| - case 40: goto RRETURN_40;
|
| - case 42: goto RRETURN_42;
|
| - case 44: goto RRETURN_44;
|
| - case 48: goto RRETURN_48;
|
| - case 52: goto RRETURN_52;
|
| - }
|
| -
|
| - ASSERT_NOT_REACHED();
|
| - return matchError(JSRegExpErrorInternal, stack);
|
| -
|
| -#endif
|
| -
|
| -RETURN:
|
| - return isMatch;
|
| -}
|
| -
|
| -
|
| -/*************************************************
|
| -* Execute a Regular Expression *
|
| -*************************************************/
|
| -
|
| -/* This function applies a compiled re to a subject string and picks out
|
| -portions of the string if it matches. Two elements in the vector are set for
|
| -each substring: the offsets to the start and end of the substring.
|
| -
|
| -Arguments:
|
| - re points to the compiled expression
|
| - extra_data points to extra data or is NULL
|
| - subject points to the subject string
|
| - length length of subject string (may contain binary zeros)
|
| - start_offset where to start in the subject string
|
| - options option bits
|
| - offsets points to a vector of ints to be filled in with offsets
|
| - offsetcount the number of elements in the vector
|
| -
|
| -Returns: > 0 => success; value is the number of elements filled in
|
| - = 0 => success, but offsets is not big enough
|
| - -1 => failed to match
|
| - < -1 => some kind of unexpected problem
|
| -*/
|
| -
|
| -static void tryFirstByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int first_byte, bool first_byte_caseless, bool useMultiLineFirstCharOptimization, const UChar* originalSubjectStart)
|
| -{
|
| - // If first_byte is set, try scanning to the first instance of that byte
|
| - // no need to try and match against any earlier part of the subject string.
|
| - if (first_byte >= 0) {
|
| - UChar first_char = first_byte;
|
| - if (first_byte_caseless)
|
| - while (subjectPtr < endSubject) {
|
| - int c = *subjectPtr;
|
| - if (c > 127)
|
| - break;
|
| - if (toLowerCase(c) == first_char)
|
| - break;
|
| - subjectPtr++;
|
| - }
|
| - else {
|
| - while (subjectPtr < endSubject && *subjectPtr != first_char)
|
| - subjectPtr++;
|
| - }
|
| - } else if (useMultiLineFirstCharOptimization) {
|
| - /* Or to just after \n for a multiline match if possible */
|
| - // I'm not sure why this != originalSubjectStart check is necessary -- ecs 11/18/07
|
| - if (subjectPtr > originalSubjectStart) {
|
| - while (subjectPtr < endSubject && !isNewline(subjectPtr[-1]))
|
| - subjectPtr++;
|
| - }
|
| - }
|
| -}
|
| -
|
| -static bool tryRequiredByteOptimization(const UChar*& subjectPtr, const UChar* endSubject, int req_byte, int req_byte2, bool req_byte_caseless, bool hasFirstByte, const UChar*& reqBytePtr)
|
| -{
|
| - /* If req_byte is set, we know that that character must appear in the subject
|
| - for the match to succeed. If the first character is set, req_byte must be
|
| - later in the subject; otherwise the test starts at the match point. This
|
| - optimization can save a huge amount of backtracking in patterns with nested
|
| - unlimited repeats that aren't going to match. Writing separate code for
|
| - cased/caseless versions makes it go faster, as does using an autoincrement
|
| - and backing off on a match.
|
| -
|
| - HOWEVER: when the subject string is very, very long, searching to its end can
|
| - take a long time, and give bad performance on quite ordinary patterns. This
|
| - showed up when somebody was matching /^C/ on a 32-megabyte string... so we
|
| - don't do this when the string is sufficiently long.
|
| - */
|
| -
|
| - if (req_byte >= 0 && endSubject - subjectPtr < REQ_BYTE_MAX) {
|
| - const UChar* p = subjectPtr + (hasFirstByte ? 1 : 0);
|
| -
|
| - /* We don't need to repeat the search if we haven't yet reached the
|
| - place we found it at last time. */
|
| -
|
| - if (p > reqBytePtr) {
|
| - if (req_byte_caseless) {
|
| - while (p < endSubject) {
|
| - int pp = *p++;
|
| - if (pp == req_byte || pp == req_byte2) {
|
| - p--;
|
| - break;
|
| - }
|
| - }
|
| - } else {
|
| - while (p < endSubject) {
|
| - if (*p++ == req_byte) {
|
| - p--;
|
| - break;
|
| - }
|
| - }
|
| - }
|
| -
|
| - /* If we can't find the required character, break the matching loop */
|
| -
|
| - if (p >= endSubject)
|
| - return true;
|
| -
|
| - /* If we have found the required character, save the point where we
|
| - found it, so that we don't search again next time round the loop if
|
| - the start hasn't passed this character yet. */
|
| -
|
| - reqBytePtr = p;
|
| - }
|
| - }
|
| - return false;
|
| -}
|
| -
|
| -int jsRegExpExecute(const JSRegExp* re,
|
| - const UChar* subject, int length, int start_offset, int* offsets,
|
| - int offsetcount)
|
| -{
|
| - ASSERT(re);
|
| - ASSERT(subject);
|
| - ASSERT(offsetcount >= 0);
|
| - ASSERT(offsets || offsetcount == 0);
|
| -
|
| - MatchData matchBlock;
|
| - matchBlock.startSubject = subject;
|
| - matchBlock.endSubject = matchBlock.startSubject + length;
|
| - const UChar* endSubject = matchBlock.endSubject;
|
| -
|
| - matchBlock.multiline = (re->options & MatchAcrossMultipleLinesOption);
|
| - matchBlock.ignoreCase = (re->options & IgnoreCaseOption);
|
| -
|
| - /* If the expression has got more back references than the offsets supplied can
|
| - hold, we get a temporary chunk of working store to use during the matching.
|
| - Otherwise, we can use the vector supplied, rounding down its size to a multiple
|
| - of 3. */
|
| -
|
| - int ocount = offsetcount - (offsetcount % 3);
|
| -
|
| - // FIXME: This is lame that we have to second-guess our caller here.
|
| - // The API should change to either fail-hard when we don't have enough offset space
|
| - // or that we shouldn't ask our callers to pre-allocate in the first place.
|
| - bool using_temporary_offsets = false;
|
| - if (re->top_backref > 0 && re->top_backref >= ocount/3) {
|
| - ocount = re->top_backref * 3 + 3;
|
| - matchBlock.offsetVector = new int[ocount];
|
| - if (!matchBlock.offsetVector)
|
| - return JSRegExpErrorNoMemory;
|
| - using_temporary_offsets = true;
|
| - } else
|
| - matchBlock.offsetVector = offsets;
|
| -
|
| - matchBlock.offsetEnd = ocount;
|
| - matchBlock.offsetMax = (2*ocount)/3;
|
| - matchBlock.offsetOverflow = false;
|
| -
|
| - /* Compute the minimum number of offsets that we need to reset each time. Doing
|
| - this makes a huge difference to execution time when there aren't many brackets
|
| - in the pattern. */
|
| -
|
| - int resetcount = 2 + re->top_bracket * 2;
|
| - if (resetcount > offsetcount)
|
| - resetcount = ocount;
|
| -
|
| - /* Reset the working variable associated with each extraction. These should
|
| - never be used unless previously set, but they get saved and restored, and so we
|
| - initialize them to avoid reading uninitialized locations. */
|
| -
|
| - if (matchBlock.offsetVector) {
|
| - int* iptr = matchBlock.offsetVector + ocount;
|
| - int* iend = iptr - resetcount/2 + 1;
|
| - while (--iptr >= iend)
|
| - *iptr = -1;
|
| - }
|
| -
|
| - /* Set up the first character to match, if available. The first_byte value is
|
| - never set for an anchored regular expression, but the anchoring may be forced
|
| - at run time, so we have to test for anchoring. The first char may be unset for
|
| - an unanchored pattern, of course. If there's no first char and the pattern was
|
| - studied, there may be a bitmap of possible first characters. */
|
| -
|
| - bool first_byte_caseless = false;
|
| - int first_byte = -1;
|
| - if (re->options & UseFirstByteOptimizationOption) {
|
| - first_byte = re->first_byte & 255;
|
| - if ((first_byte_caseless = (re->first_byte & REQ_IGNORE_CASE)))
|
| - first_byte = toLowerCase(first_byte);
|
| - }
|
| -
|
| - /* For anchored or unanchored matches, there may be a "last known required
|
| - character" set. */
|
| -
|
| - bool req_byte_caseless = false;
|
| - int req_byte = -1;
|
| - int req_byte2 = -1;
|
| - if (re->options & UseRequiredByteOptimizationOption) {
|
| - req_byte = re->req_byte & 255; // FIXME: This optimization could be made to work for UTF16 chars as well...
|
| - req_byte_caseless = (re->req_byte & REQ_IGNORE_CASE);
|
| - req_byte2 = flipCase(req_byte);
|
| - }
|
| -
|
| - /* Loop for handling unanchored repeated matching attempts; for anchored regexs
|
| - the loop runs just once. */
|
| -
|
| - const UChar* startMatch = subject + start_offset;
|
| - const UChar* reqBytePtr = startMatch - 1;
|
| - bool useMultiLineFirstCharOptimization = re->options & UseMultiLineFirstByteOptimizationOption;
|
| -
|
| - do {
|
| - /* Reset the maximum number of extractions we might see. */
|
| - if (matchBlock.offsetVector) {
|
| - int* iptr = matchBlock.offsetVector;
|
| - int* iend = iptr + resetcount;
|
| - while (iptr < iend)
|
| - *iptr++ = -1;
|
| - }
|
| -
|
| - tryFirstByteOptimization(startMatch, endSubject, first_byte, first_byte_caseless, useMultiLineFirstCharOptimization, matchBlock.startSubject + start_offset);
|
| - if (tryRequiredByteOptimization(startMatch, endSubject, req_byte, req_byte2, req_byte_caseless, first_byte >= 0, reqBytePtr))
|
| - break;
|
| -
|
| - /* When a match occurs, substrings will be set for all internal extractions;
|
| - we just need to set up the whole thing as substring 0 before returning. If
|
| - there were too many extractions, set the return code to zero. In the case
|
| - where we had to get some local store to hold offsets for backreferences, copy
|
| - those back references that we can. In this case there need not be overflow
|
| - if certain parts of the pattern were not used. */
|
| -
|
| - /* The code starts after the JSRegExp block and the capture name table. */
|
| - const unsigned char* start_code = reinterpret_cast<const unsigned char*>(re + 1);
|
| -
|
| - int returnCode = match(startMatch, start_code, 2, matchBlock);
|
| -
|
| - /* When the result is no match, advance the pointer to the next character
|
| - and continue. */
|
| - if (returnCode == 0) {
|
| - startMatch++;
|
| - continue;
|
| - }
|
| -
|
| - if (returnCode != 1) {
|
| - ASSERT(returnCode == JSRegExpErrorHitLimit || returnCode == JSRegExpErrorNoMemory);
|
| - DPRINTF((">>>> error: returning %d\n", returnCode));
|
| - return returnCode;
|
| - }
|
| -
|
| - /* We have a match! Copy the offset information from temporary store if
|
| - necessary */
|
| -
|
| - if (using_temporary_offsets) {
|
| - if (offsetcount >= 4) {
|
| - memcpy(offsets + 2, matchBlock.offsetVector + 2, (offsetcount - 2) * sizeof(int));
|
| - DPRINTF(("Copied offsets from temporary memory\n"));
|
| - }
|
| - if (matchBlock.endOffsetTop > offsetcount)
|
| - matchBlock.offsetOverflow = true;
|
| -
|
| - DPRINTF(("Freeing temporary memory\n"));
|
| - delete [] matchBlock.offsetVector;
|
| - }
|
| -
|
| - returnCode = matchBlock.offsetOverflow ? 0 : matchBlock.endOffsetTop / 2;
|
| -
|
| - if (offsetcount < 2)
|
| - returnCode = 0;
|
| - else {
|
| - offsets[0] = startMatch - matchBlock.startSubject;
|
| - offsets[1] = matchBlock.endMatchPtr - matchBlock.startSubject;
|
| - }
|
| -
|
| - DPRINTF((">>>> returning %d\n", returnCode));
|
| - return returnCode;
|
| - } while (!(re->options & IsAnchoredOption) && startMatch <= endSubject);
|
| -
|
| - if (using_temporary_offsets) {
|
| - DPRINTF(("Freeing temporary memory\n"));
|
| - delete [] matchBlock.offsetVector;
|
| - }
|
| -
|
| - DPRINTF((">>>> returning PCRE_ERROR_NOMATCH\n"));
|
| - return JSRegExpErrorNoMatch;
|
| -}
|
| -
|
| -} } // namespace dart::jscre
|
|
|