| OLD | NEW |
| 1 // | 1 // |
| 2 // Copyright (C) 2002-2012 International Business Machines Corporation | 2 // Copyright (C) 2002-2013 International Business Machines Corporation |
| 3 // and others. All rights reserved. | 3 // and others. All rights reserved. |
| 4 // | 4 // |
| 5 // file: regeximp.h | 5 // file: regeximp.h |
| 6 // | 6 // |
| 7 // ICU Regular Expressions, | 7 // ICU Regular Expressions, |
| 8 // Definitions of constant values used in the compiled form of | 8 // Definitions of constant values used in the compiled form of |
| 9 // a regular expression pattern. | 9 // a regular expression pattern. |
| 10 // | 10 // |
| 11 | 11 |
| 12 #ifndef _REGEXIMP_H | 12 #ifndef _REGEXIMP_H |
| 13 #define _REGEXIMP_H | 13 #define _REGEXIMP_H |
| 14 | 14 |
| 15 #include "unicode/utypes.h" | 15 #include "unicode/utypes.h" |
| 16 #include "unicode/uobject.h" | 16 #include "unicode/uobject.h" |
| 17 #include "unicode/uniset.h" | 17 #include "unicode/uniset.h" |
| 18 #include "unicode/utext.h" | 18 #include "unicode/utext.h" |
| 19 | 19 |
| 20 #include "cmemory.h" | 20 #include "cmemory.h" |
| 21 #include "ucase.h" | 21 #include "ucase.h" |
| 22 | 22 |
| 23 U_NAMESPACE_BEGIN | 23 U_NAMESPACE_BEGIN |
| 24 | 24 |
| 25 // For debugging, define REGEX_DEBUG | 25 // For debugging, define REGEX_DEBUG |
| 26 // To define with configure, | 26 // To define with configure, |
| 27 // ./runConfigureICU --enable-debug --disable-release Linux CPPFLAGS="-DREGEX_
DEBUG" | 27 // CPPFLAGS="-DREGEX_DEBUG" ./runConfigureICU --enable-debug --disable-release
Linux |
| 28 | 28 |
| 29 #ifdef REGEX_DEBUG | 29 #ifdef REGEX_DEBUG |
| 30 // | 30 // |
| 31 // debugging options. Enable one or more of the three #defines immediately fol
lowing | 31 // debugging options. Enable one or more of the three #defines immediately fol
lowing |
| 32 // | 32 // |
| 33 | 33 |
| 34 //#define REGEX_SCAN_DEBUG | 34 //#define REGEX_SCAN_DEBUG |
| 35 #define REGEX_DUMP_DEBUG | 35 #define REGEX_DUMP_DEBUG |
| 36 #define REGEX_RUN_DEBUG | 36 #define REGEX_RUN_DEBUG |
| 37 | 37 |
| 38 // End of #defines inteded to be directly set. | 38 // End of #defines inteded to be directly set. |
| 39 | 39 |
| 40 #include <stdio.h> | 40 #include <stdio.h> |
| 41 #endif | 41 #endif |
| 42 | 42 |
| 43 #ifdef REGEX_SCAN_DEBUG | 43 #ifdef REGEX_SCAN_DEBUG |
| 44 #define REGEX_SCAN_DEBUG_PRINTF(a) printf a | 44 #define REGEX_SCAN_DEBUG_PRINTF(a) printf a |
| 45 #else | 45 #else |
| 46 #define REGEX_SCAN_DEBUG_PRINTF(a) | 46 #define REGEX_SCAN_DEBUG_PRINTF(a) |
| 47 #endif | 47 #endif |
| 48 | 48 |
| 49 #ifdef REGEX_DUMP_DEBUG | |
| 50 #define REGEX_DUMP_DEBUG_PRINTF(a) printf a | |
| 51 #else | |
| 52 #define REGEX_DUMP_DEBUG_PRINTF(a) | |
| 53 #endif | |
| 54 | |
| 55 #ifdef REGEX_RUN_DEBUG | |
| 56 #define REGEX_RUN_DEBUG_PRINTF(a) printf a | |
| 57 #define REGEX_DUMP_DEBUG_PRINTF(a) printf a | |
| 58 #else | |
| 59 #define REGEX_RUN_DEBUG_PRINTF(a) | |
| 60 #endif | |
| 61 | |
| 62 | 49 |
| 63 // | 50 // |
| 64 // Opcode types In the compiled form of the regexp, these are the type, or
opcodes, | 51 // Opcode types In the compiled form of the regexp, these are the type, or
opcodes, |
| 65 // of the entries. | 52 // of the entries. |
| 66 // | 53 // |
| 67 enum { | 54 enum { |
| 68 URX_RESERVED_OP = 0, // For multi-operand ops, most non-first words. | 55 URX_RESERVED_OP = 0, // For multi-operand ops, most non-first words. |
| 69 URX_RESERVED_OP_N = 255, // For multi-operand ops, negative operand values
. | 56 URX_RESERVED_OP_N = 255, // For multi-operand ops, negative operand values
. |
| 70 URX_BACKTRACK = 1, // Force a backtrack, as if a match test had fail
ed. | 57 URX_BACKTRACK = 1, // Force a backtrack, as if a match test had fail
ed. |
| 71 URX_END = 2, | 58 URX_END = 2, |
| (...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 247 "LOOP_C", \ | 234 "LOOP_C", \ |
| 248 "LOOP_DOT_I", \ | 235 "LOOP_DOT_I", \ |
| 249 "BACKSLASH_BU", \ | 236 "BACKSLASH_BU", \ |
| 250 "DOLLAR_D", \ | 237 "DOLLAR_D", \ |
| 251 "DOLLAR_MD" | 238 "DOLLAR_MD" |
| 252 | 239 |
| 253 | 240 |
| 254 // | 241 // |
| 255 // Convenience macros for assembling and disassembling a compiled operation. | 242 // Convenience macros for assembling and disassembling a compiled operation. |
| 256 // | 243 // |
| 257 int32_t URX_BUILD(int32_t val, int32_t type); | 244 #define URX_BUILD(type, val) (int32_t)((type << 24) | (val)) |
| 258 #define URX_TYPE(x) ((uint32_t)(x) >> 24) | 245 #define URX_TYPE(x) ((uint32_t)(x) >> 24) |
| 259 #define URX_VAL(x) ((x) & 0xffffff) | 246 #define URX_VAL(x) ((x) & 0xffffff) |
| 260 | 247 |
| 261 | 248 |
| 262 // | 249 // |
| 263 // Access to Unicode Sets composite character properties | 250 // Access to Unicode Sets composite character properties |
| 264 // The sets are accessed by the match engine for things like \w (word bounda
ry) | 251 // The sets are accessed by the match engine for things like \w (word bounda
ry) |
| 265 // | 252 // |
| 266 enum { | 253 enum { |
| 267 URX_ISWORD_SET = 1, | 254 URX_ISWORD_SET = 1, |
| (...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 366 // Case folded UText Iterator helper class. | 353 // Case folded UText Iterator helper class. |
| 367 // Wraps a UText, provides a case-folded enumeration over its contents. | 354 // Wraps a UText, provides a case-folded enumeration over its contents. |
| 368 // Used in implementing case insensitive matching constructs. | 355 // Used in implementing case insensitive matching constructs. |
| 369 // Implementation in rematch.cpp | 356 // Implementation in rematch.cpp |
| 370 | 357 |
| 371 class CaseFoldingUTextIterator: public UMemory { | 358 class CaseFoldingUTextIterator: public UMemory { |
| 372 public: | 359 public: |
| 373 CaseFoldingUTextIterator(UText &text); | 360 CaseFoldingUTextIterator(UText &text); |
| 374 ~CaseFoldingUTextIterator(); | 361 ~CaseFoldingUTextIterator(); |
| 375 | 362 |
| 376 UChar32 next(); // Next case folded character | 363 UChar32 next(); // Next case folded character |
| 377 | 364 |
| 378 UBool inExpansion(); // True if last char returned from next() and
the | 365 UBool inExpansion(); // True if last char returned from next() and
the |
| 379 // next to be returned both originated from a
string | 366 // next to be returned both originated from a
string |
| 380 // folding of the same code point from the or
ignal UText. | 367 // folding of the same code point from the or
ignal UText. |
| 381 private: | 368 private: |
| 382 UText &fUText; | 369 UText &fUText; |
| 383 const UCaseProps *fcsp; | 370 const UCaseProps *fcsp; |
| 384 const UChar *fFoldChars; | 371 const UChar *fFoldChars; |
| 385 int32_t fFoldLength; | 372 int32_t fFoldLength; |
| 386 int32_t fFoldIndex; | 373 int32_t fFoldIndex; |
| 387 | 374 |
| 388 }; | 375 }; |
| 389 | 376 |
| 390 | 377 |
| 391 // Case folded UChar * string iterator. | 378 // Case folded UChar * string iterator. |
| 392 // Wraps a UChar *, provides a case-folded enumeration over its contents. | 379 // Wraps a UChar *, provides a case-folded enumeration over its contents. |
| 393 // Used in implementing case insensitive matching constructs. | 380 // Used in implementing case insensitive matching constructs. |
| 394 // Implementation in rematch.cpp | 381 // Implementation in rematch.cpp |
| 395 | 382 |
| 396 class CaseFoldingUCharIterator: public UMemory { | 383 class CaseFoldingUCharIterator: public UMemory { |
| 397 public: | 384 public: |
| 398 CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limi
t); | 385 CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limi
t); |
| 399 ~CaseFoldingUCharIterator(); | 386 ~CaseFoldingUCharIterator(); |
| 400 | 387 |
| 401 UChar32 next(); // Next case folded character | 388 UChar32 next(); // Next case folded character |
| 402 | 389 |
| 403 UBool inExpansion(); // True if last char returned from next() and
the | 390 UBool inExpansion(); // True if last char returned from next() and
the |
| 404 // next to be returned both originated from a
string | 391 // next to be returned both originated from a
string |
| 405 // folding of the same code point from the or
ignal UText. | 392 // folding of the same code point from the or
ignal UText. |
| 406 | 393 |
| 407 int64_t getIndex(); // Return the current input buffer index. | 394 int64_t getIndex(); // Return the current input buffer index. |
| 408 | 395 |
| 409 private: | 396 private: |
| 410 const UChar *fChars; | 397 const UChar *fChars; |
| 411 int64_t fIndex; | 398 int64_t fIndex; |
| 412 int64_t fLimit; | 399 int64_t fLimit; |
| 413 const UCaseProps *fcsp; | 400 const UCaseProps *fcsp; |
| 414 const UChar *fFoldChars; | 401 const UChar *fFoldChars; |
| 415 int32_t fFoldLength; | 402 int32_t fFoldLength; |
| 416 int32_t fFoldIndex; | 403 int32_t fFoldIndex; |
| 417 | 404 |
| 418 }; | 405 }; |
| 419 | 406 |
| 420 U_NAMESPACE_END | 407 U_NAMESPACE_END |
| 421 #endif | 408 #endif |
| 422 | 409 |
| OLD | NEW |