runtime/third_party/jscre/pcre_internal.h - Issue 1071713003: - Remove JSCRE from the runtime.

Unified Diff: runtime/third_party/jscre/pcre_internal.h

Issue 1071713003: - Remove JSCRE from the runtime. (Closed) Base URL: http://dart.googlecode.com/svn/branches/bleeding_edge/dart/

Patch Set: Created 5 years, 8 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: runtime/third_party/jscre/pcre_internal.h

===================================================================

--- runtime/third_party/jscre/pcre_internal.h (revision 44981)

+++ runtime/third_party/jscre/pcre_internal.h (working copy)

@@ -1,428 +0,0 @@

-/* This is JavaScriptCore's variant of the PCRE library. While this library

-started out as a copy of PCRE, many of the features of PCRE have been

-removed. This library now supports only the regular expression features

-required by the JavaScript language specification, and has only the functions

-needed by JavaScriptCore and the rest of WebKit.

- Originally written by Philip Hazel

------------------------------------------------------------------------------

-Redistribution and use in source and binary forms, with or without

-modification, are permitted provided that the following conditions are met:

- * Redistributions of source code must retain the above copyright notice,

- this list of conditions and the following disclaimer.

- * Redistributions in binary form must reproduce the above copyright

- notice, this list of conditions and the following disclaimer in the

- documentation and/or other materials provided with the distribution.

- * Neither the name of the University of Cambridge nor the names of its

- contributors may be used to endorse or promote products derived from

- this software without specific prior written permission.

-THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"

-AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE

-IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE

-ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE

-LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR

-CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF

-SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS

-INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN

-CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)

-ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE

-POSSIBILITY OF SUCH DAMAGE.

------------------------------------------------------------------------------

-*/

-/* This header contains definitions that are shared between the different

-modules, but which are not relevant to the exported API. This includes some

-functions whose names all begin with "_pcre_". */

-#ifndef THIRD_PARTY_JSCRE_PCRE_INTERNAL_H_

-#define THIRD_PARTY_JSCRE_PCRE_INTERNAL_H_

-#if defined(_WIN32)

-typedef unsigned __int32 uint32_t;

-#else

-#include <inttypes.h>

-#include <stdint.h>

-#endif

-#include <stdarg.h>

-#include <stddef.h>

-#include <stdlib.h>

-#include <string.h>

-#include <sys/types.h>

-/* Bit definitions for entries in the pcre_ctypes table. */

-#define ctype_space 0x01

-#define ctype_xdigit 0x08

-#define ctype_word 0x10 /* alphameric or '_' */

-/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set

-of bits for a class map. Some classes are built by combining these tables. */

-#define cbit_space 0 /* \s */

-#define cbit_digit 32 /* \d */

-#define cbit_word 64 /* \w */

-#define cbit_length 96 /* Length of the cbits table */

-/* Offsets of the various tables from the base tables pointer, and

-total length. */

-#define lcc_offset 0

-#define fcc_offset 128

-#define cbits_offset 256

-#define ctypes_offset (cbits_offset + cbit_length)

-#define tables_length (ctypes_offset + 128)

-#ifndef DFTABLES

-// TODO(xxx): Hook this up to something that checks assertions.

-#define ASSERT(x) if (!(x)) abort()

-#define ASSERT_NOT_REACHED() abort()

-#ifdef WIN32

-#pragma warning(disable: 4232)

-#pragma warning(disable: 4244)

-#endif

-#include "./pcre.h"

-/* The value of LINK_SIZE determines the number of bytes used to store links as

-offsets within the compiled regex. The default is 2, which allows for compiled

-patterns up to 64K long. */

-#define LINK_SIZE 2

-/* Define DEBUG to get debugging output on stdout. */

-#if 0

-#define DEBUG

-#endif

-/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef

-inline, and there are *still* stupid compilers about that don't like indented

-pre-processor statements, or at least there were when I first wrote this. After

-all, it had only been about 10 years then... */

-#ifdef DEBUG

-#define DPRINTF(p) printf p

-#else

-#define DPRINTF(p) /*nothing*/

-#endif

-namespace dart { namespace jscre {

-/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored

-in big-endian order) by default. These are used, for example, to link from the

-start of a subpattern to its alternatives and its end. The use of 2 bytes per

-offset limits the size of the compiled regex to around 64K, which is big enough

-for almost everybody. However, I received a request for an even bigger limit.

-For this reason, and also to make the code easier to maintain, the storing and

-loading of offsets from the byte string is now handled by the functions that are

-defined here. */

-/* PCRE uses some other 2-byte quantities that do not change when the size of

-offsets changes. There are used for repeat counts and for other things such as

-capturing parenthesis numbers in back references. */

-static inline void put2ByteValue(unsigned char* opcodePtr, int value) {

- ASSERT(value >= 0 && value <= 0xFFFF);

- opcodePtr[0] = value >> 8;

- opcodePtr[1] = value;

-static inline int get2ByteValue(const unsigned char* opcodePtr) {

- return (opcodePtr[0] << 8) | opcodePtr[1];

-static inline void put2ByteValueAndAdvance(unsigned char*& opcodePtr,

- int value) {

- put2ByteValue(opcodePtr, value);

- opcodePtr += 2;

-static inline void putLinkValueAllowZero(unsigned char* opcodePtr,

- int value) {

- put2ByteValue(opcodePtr, value);

-static inline int getLinkValueAllowZero(const unsigned char* opcodePtr) {

- return get2ByteValue(opcodePtr);

-#define MAX_PATTERN_SIZE (1 << 16)

-static inline void putLinkValue(unsigned char* opcodePtr, int value) {

- ASSERT(value);

- putLinkValueAllowZero(opcodePtr, value);

-static inline int getLinkValue(const unsigned char* opcodePtr) {

- int value = getLinkValueAllowZero(opcodePtr);

- ASSERT(value);

- return value;

-static inline void putLinkValueAndAdvance(unsigned char*& opcodePtr,

- int value) {

- putLinkValue(opcodePtr, value);

- opcodePtr += LINK_SIZE;

-static inline void putLinkValueAllowZeroAndAdvance(unsigned char*& opcodePtr,

- int value) {

- putLinkValueAllowZero(opcodePtr, value);

- opcodePtr += LINK_SIZE;

-// FIXME: These are really more of a "compiled regexp state"

-// than "regexp options"

-enum RegExpOptions {

- UseFirstByteOptimizationOption = 0x40000000, /* first_byte is set */

- UseRequiredByteOptimizationOption = 0x20000000, /* req_byte is set */

- UseMultiLineFirstByteOptimizationOption = 0x10000000,

- /* start after \n for multiline */

- IsAnchoredOption = 0x02000000, /* can't use partial with this regex */

- IgnoreCaseOption = 0x00000001,

- MatchAcrossMultipleLinesOption = 0x00000002

-};

-/* Flags added to firstbyte or reqbyte; a "non-literal" item is either a

-variable-length repeat, or a anything other than literal characters. */

-#define REQ_IGNORE_CASE 0x0100 /* indicates should ignore case */

-#define REQ_VARY 0x0200 /* reqbyte followed non-literal item */

-/* Miscellaneous definitions */

-/* Flag bits and data types for the extended class (OP_XCLASS) for classes that

-contain UTF-8 characters with values greater than 255. */

-#define XCL_NOT 0x01 /* Flag: this is a negative class */

-#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */

-#define XCL_END 0 /* Marks end of individual items */

-#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */

-#define XCL_RANGE 2 /* A range (two multibyte chars) follows */

-/* These are escaped items that aren't just an encoding of a particular data

-value such as \n. They must have non-zero values, as check_escape() returns

-their negation. Also, they must appear in the same order as in the opcode

-definitions below, up to ESC_w. The final one must be

-ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two

-tests in the code for an escape > ESC_b and <= ESC_w to

-detect the types that may be repeated. These are the types that consume

-characters. If any new escapes are put in between that don't consume a

-character, that code will have to change. */

-enum { ESC_B = 1, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_REF };

-/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets

-that extract substrings. Starting from 1 (i.e. after OP_END), the values up to

-OP_EOD must correspond in order to the list of escapes immediately above.

-Note that whenever this list is updated, the two macro definitions that follow

-must also be updated to match. */

-#define FOR_EACH_OPCODE(macro) \

- macro(END) \

- \

- macro(NOT_WORD_BOUNDARY) \

- macro(WORD_BOUNDARY) \

- macro(NOT_DIGIT) \

- macro(DIGIT) \

- macro(NOT_WHITESPACE) \

- macro(WHITESPACE) \

- macro(NOT_WORDCHAR) \

- macro(WORDCHAR) \

- \

- macro(NOT_NEWLINE) \

- \

- macro(CIRC) \

- macro(DOLL) \

- macro(BOL) \

- macro(EOL) \

- macro(CHAR) \

- macro(CHAR_IGNORING_CASE) \

- macro(ASCII_CHAR) \

- macro(ASCII_LETTER_IGNORING_CASE) \

- macro(NOT) \

- \

- macro(STAR) \

- macro(MINSTAR) \

- macro(PLUS) \

- macro(MINPLUS) \

- macro(QUERY) \

- macro(MINQUERY) \

- macro(UPTO) \

- macro(MINUPTO) \

- macro(EXACT) \

- \

- macro(NOTSTAR) \

- macro(NOTMINSTAR) \

- macro(NOTPLUS) \

- macro(NOTMINPLUS) \

- macro(NOTQUERY) \

- macro(NOTMINQUERY) \

- macro(NOTUPTO) \

- macro(NOTMINUPTO) \

- macro(NOTEXACT) \

- \

- macro(TYPESTAR) \

- macro(TYPEMINSTAR) \

- macro(TYPEPLUS) \

- macro(TYPEMINPLUS) \

- macro(TYPEQUERY) \

- macro(TYPEMINQUERY) \

- macro(TYPEUPTO) \

- macro(TYPEMINUPTO) \

- macro(TYPEEXACT) \

- \

- macro(CRSTAR) \

- macro(CRMINSTAR) \

- macro(CRPLUS) \

- macro(CRMINPLUS) \

- macro(CRQUERY) \

- macro(CRMINQUERY) \

- macro(CRRANGE) \

- macro(CRMINRANGE) \

- \

- macro(CLASS) \

- macro(NCLASS) \

- macro(XCLASS) \

- \

- macro(REF) \

- \

- macro(ALT) \

- macro(KET) \

- macro(KETRMAX) \

- macro(KETRMIN) \

- \

- macro(ASSERT) \

- macro(ASSERT_NOT) \

- \

- macro(BRAZERO) \

- macro(BRAMINZERO) \

- macro(BRANUMBER) \

- macro(BRA)

-#define OPCODE_ENUM_VALUE(opcode) OP_##opcode,

-enum { FOR_EACH_OPCODE(OPCODE_ENUM_VALUE) };

-/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and

-study.c that all opcodes are less than 128 in value. This makes handling UTF-8

-character sequences easier. */

-/* The highest extraction number before we have to start using additional

-bytes. (Originally PCRE didn't have support for extraction counts higher than

-this number.) The value is limited by the number of opcodes left after OP_BRA,

-i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional

-opcodes. */

-/* FIXME: Note that OP_BRA + 100 is > 128, so the two comments above

-are in conflict! */

-#define EXTRACT_BASIC_MAX 100

-/* The index of names and the

-code vector run on as long as necessary after the end. We store an explicit

-offset to the name table so that if a regex is compiled on one host, saved, and

-then run on another where the size of pointers is different, all might still

-be well. For the case of compiled-on-4 and run-on-8, we include an extra

-pointer that is always NULL.

-*/

-struct JSRegExp {

- uint32_t options;

- uint16_t top_bracket;

- uint16_t top_backref;

- uint16_t first_byte;

- uint16_t req_byte;

-};

-/* Internal shared data tables. These are tables that are used by more than one

- of the exported public functions. They have to be "external" in the C sense,

- but are not part of the PCRE public API. The data for these tables is in the

- pcre_tables.c module. */

-#define kjs_pcre_utf8_table1_size 6

-extern const int kjs_pcre_utf8_table1[6];

-extern const int kjs_pcre_utf8_table2[6];

-extern const int kjs_pcre_utf8_table3[6];

-extern const unsigned char kjs_pcre_utf8_table4[0x40];

-extern const unsigned char kjs_pcre_default_tables[tables_length];

-static inline unsigned char toLowerCase(unsigned char c) {

- static const unsigned char* lowerCaseChars =

- kjs_pcre_default_tables + lcc_offset;

- return lowerCaseChars[c];

-static inline unsigned char flipCase(unsigned char c) {

- static const unsigned char* flippedCaseChars =

- kjs_pcre_default_tables + fcc_offset;

- return flippedCaseChars[c];

-static inline unsigned char classBitmapForChar(unsigned char c) {

- static const unsigned char* charClassBitmaps =

- kjs_pcre_default_tables + cbits_offset;

- return charClassBitmaps[c];

-static inline unsigned char charTypeForChar(unsigned char c) {

- const unsigned char* charTypeMap = kjs_pcre_default_tables + ctypes_offset;

- return charTypeMap[c];

-static inline bool isWordChar(UChar c) {

- return c < 128 && (charTypeForChar(c) & ctype_word);

-static inline bool isSpaceChar(UChar c) {

- return (c < 128 && (charTypeForChar(c) & ctype_space));

-static inline bool isNewline(UChar nl) {

- return (nl == 0xA || nl == 0xD || nl == 0x2028 || nl == 0x2029);

-static inline bool isBracketStartOpcode(unsigned char opcode) {

- if (opcode >= OP_BRA)

- return true;

- switch (opcode) {

- case OP_ASSERT:

- case OP_ASSERT_NOT:

- return true;

- default:

- return false;

- }

-static inline void advanceToEndOfBracket(const unsigned char*& opcodePtr) {

- ASSERT(isBracketStartOpcode(*opcodePtr) || *opcodePtr == OP_ALT);

- do

- opcodePtr += getLinkValue(opcodePtr + 1);

- while (*opcodePtr == OP_ALT);

-/* Internal shared functions. These are functions that are used in more

-that one of the source files. They have to have external linkage, but

-but are not part of the public API and so not exported from the library. */

-extern int kjs_pcre_ucp_othercase(unsigned);

-extern bool kjs_pcre_xclass(int, const unsigned char*);

-} } // namespace dart::jscre

-#endif

-#endif // THIRD_PARTY_JSCRE_PCRE_INTERNAL_H_

« no previous file with comments | « runtime/third_party/jscre/pcre_exec.cpp ('k') | runtime/third_party/jscre/pcre_tables.cpp » ('j') | runtime/vm/object.h » ('J')