OLD | NEW |
1 // | 1 // |
2 // Copyright (C) 2002-2012 International Business Machines Corporation | 2 // Copyright (C) 2002-2013 International Business Machines Corporation |
3 // and others. All rights reserved. | 3 // and others. All rights reserved. |
4 // | 4 // |
5 // file: regeximp.h | 5 // file: regeximp.h |
6 // | 6 // |
7 // ICU Regular Expressions, | 7 // ICU Regular Expressions, |
8 // Definitions of constant values used in the compiled form of | 8 // Definitions of constant values used in the compiled form of |
9 // a regular expression pattern. | 9 // a regular expression pattern. |
10 // | 10 // |
11 | 11 |
12 #ifndef _REGEXIMP_H | 12 #ifndef _REGEXIMP_H |
13 #define _REGEXIMP_H | 13 #define _REGEXIMP_H |
14 | 14 |
15 #include "unicode/utypes.h" | 15 #include "unicode/utypes.h" |
16 #include "unicode/uobject.h" | 16 #include "unicode/uobject.h" |
17 #include "unicode/uniset.h" | 17 #include "unicode/uniset.h" |
18 #include "unicode/utext.h" | 18 #include "unicode/utext.h" |
19 | 19 |
20 #include "cmemory.h" | 20 #include "cmemory.h" |
21 #include "ucase.h" | 21 #include "ucase.h" |
22 | 22 |
23 U_NAMESPACE_BEGIN | 23 U_NAMESPACE_BEGIN |
24 | 24 |
25 // For debugging, define REGEX_DEBUG | 25 // For debugging, define REGEX_DEBUG |
26 // To define with configure, | 26 // To define with configure, |
27 // ./runConfigureICU --enable-debug --disable-release Linux CPPFLAGS="-DREGEX_
DEBUG" | 27 // CPPFLAGS="-DREGEX_DEBUG" ./runConfigureICU --enable-debug --disable-release
Linux |
28 | 28 |
29 #ifdef REGEX_DEBUG | 29 #ifdef REGEX_DEBUG |
30 // | 30 // |
31 // debugging options. Enable one or more of the three #defines immediately fol
lowing | 31 // debugging options. Enable one or more of the three #defines immediately fol
lowing |
32 // | 32 // |
33 | 33 |
34 //#define REGEX_SCAN_DEBUG | 34 //#define REGEX_SCAN_DEBUG |
35 #define REGEX_DUMP_DEBUG | 35 #define REGEX_DUMP_DEBUG |
36 #define REGEX_RUN_DEBUG | 36 #define REGEX_RUN_DEBUG |
37 | 37 |
38 // End of #defines inteded to be directly set. | 38 // End of #defines inteded to be directly set. |
39 | 39 |
40 #include <stdio.h> | 40 #include <stdio.h> |
41 #endif | 41 #endif |
42 | 42 |
43 #ifdef REGEX_SCAN_DEBUG | 43 #ifdef REGEX_SCAN_DEBUG |
44 #define REGEX_SCAN_DEBUG_PRINTF(a) printf a | 44 #define REGEX_SCAN_DEBUG_PRINTF(a) printf a |
45 #else | 45 #else |
46 #define REGEX_SCAN_DEBUG_PRINTF(a) | 46 #define REGEX_SCAN_DEBUG_PRINTF(a) |
47 #endif | 47 #endif |
48 | 48 |
49 #ifdef REGEX_DUMP_DEBUG | |
50 #define REGEX_DUMP_DEBUG_PRINTF(a) printf a | |
51 #else | |
52 #define REGEX_DUMP_DEBUG_PRINTF(a) | |
53 #endif | |
54 | |
55 #ifdef REGEX_RUN_DEBUG | |
56 #define REGEX_RUN_DEBUG_PRINTF(a) printf a | |
57 #define REGEX_DUMP_DEBUG_PRINTF(a) printf a | |
58 #else | |
59 #define REGEX_RUN_DEBUG_PRINTF(a) | |
60 #endif | |
61 | |
62 | 49 |
63 // | 50 // |
64 // Opcode types In the compiled form of the regexp, these are the type, or
opcodes, | 51 // Opcode types In the compiled form of the regexp, these are the type, or
opcodes, |
65 // of the entries. | 52 // of the entries. |
66 // | 53 // |
67 enum { | 54 enum { |
68 URX_RESERVED_OP = 0, // For multi-operand ops, most non-first words. | 55 URX_RESERVED_OP = 0, // For multi-operand ops, most non-first words. |
69 URX_RESERVED_OP_N = 255, // For multi-operand ops, negative operand values
. | 56 URX_RESERVED_OP_N = 255, // For multi-operand ops, negative operand values
. |
70 URX_BACKTRACK = 1, // Force a backtrack, as if a match test had fail
ed. | 57 URX_BACKTRACK = 1, // Force a backtrack, as if a match test had fail
ed. |
71 URX_END = 2, | 58 URX_END = 2, |
(...skipping 175 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
247 "LOOP_C", \ | 234 "LOOP_C", \ |
248 "LOOP_DOT_I", \ | 235 "LOOP_DOT_I", \ |
249 "BACKSLASH_BU", \ | 236 "BACKSLASH_BU", \ |
250 "DOLLAR_D", \ | 237 "DOLLAR_D", \ |
251 "DOLLAR_MD" | 238 "DOLLAR_MD" |
252 | 239 |
253 | 240 |
254 // | 241 // |
255 // Convenience macros for assembling and disassembling a compiled operation. | 242 // Convenience macros for assembling and disassembling a compiled operation. |
256 // | 243 // |
257 int32_t URX_BUILD(int32_t val, int32_t type); | 244 #define URX_BUILD(type, val) (int32_t)((type << 24) | (val)) |
258 #define URX_TYPE(x) ((uint32_t)(x) >> 24) | 245 #define URX_TYPE(x) ((uint32_t)(x) >> 24) |
259 #define URX_VAL(x) ((x) & 0xffffff) | 246 #define URX_VAL(x) ((x) & 0xffffff) |
260 | 247 |
261 | 248 |
262 // | 249 // |
263 // Access to Unicode Sets composite character properties | 250 // Access to Unicode Sets composite character properties |
264 // The sets are accessed by the match engine for things like \w (word bounda
ry) | 251 // The sets are accessed by the match engine for things like \w (word bounda
ry) |
265 // | 252 // |
266 enum { | 253 enum { |
267 URX_ISWORD_SET = 1, | 254 URX_ISWORD_SET = 1, |
(...skipping 98 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
366 // Case folded UText Iterator helper class. | 353 // Case folded UText Iterator helper class. |
367 // Wraps a UText, provides a case-folded enumeration over its contents. | 354 // Wraps a UText, provides a case-folded enumeration over its contents. |
368 // Used in implementing case insensitive matching constructs. | 355 // Used in implementing case insensitive matching constructs. |
369 // Implementation in rematch.cpp | 356 // Implementation in rematch.cpp |
370 | 357 |
371 class CaseFoldingUTextIterator: public UMemory { | 358 class CaseFoldingUTextIterator: public UMemory { |
372 public: | 359 public: |
373 CaseFoldingUTextIterator(UText &text); | 360 CaseFoldingUTextIterator(UText &text); |
374 ~CaseFoldingUTextIterator(); | 361 ~CaseFoldingUTextIterator(); |
375 | 362 |
376 UChar32 next(); // Next case folded character | 363 UChar32 next(); // Next case folded character |
377 | 364 |
378 UBool inExpansion(); // True if last char returned from next() and
the | 365 UBool inExpansion(); // True if last char returned from next() and
the |
379 // next to be returned both originated from a
string | 366 // next to be returned both originated from a
string |
380 // folding of the same code point from the or
ignal UText. | 367 // folding of the same code point from the or
ignal UText. |
381 private: | 368 private: |
382 UText &fUText; | 369 UText &fUText; |
383 const UCaseProps *fcsp; | 370 const UCaseProps *fcsp; |
384 const UChar *fFoldChars; | 371 const UChar *fFoldChars; |
385 int32_t fFoldLength; | 372 int32_t fFoldLength; |
386 int32_t fFoldIndex; | 373 int32_t fFoldIndex; |
387 | 374 |
388 }; | 375 }; |
389 | 376 |
390 | 377 |
391 // Case folded UChar * string iterator. | 378 // Case folded UChar * string iterator. |
392 // Wraps a UChar *, provides a case-folded enumeration over its contents. | 379 // Wraps a UChar *, provides a case-folded enumeration over its contents. |
393 // Used in implementing case insensitive matching constructs. | 380 // Used in implementing case insensitive matching constructs. |
394 // Implementation in rematch.cpp | 381 // Implementation in rematch.cpp |
395 | 382 |
396 class CaseFoldingUCharIterator: public UMemory { | 383 class CaseFoldingUCharIterator: public UMemory { |
397 public: | 384 public: |
398 CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limi
t); | 385 CaseFoldingUCharIterator(const UChar *chars, int64_t start, int64_t limi
t); |
399 ~CaseFoldingUCharIterator(); | 386 ~CaseFoldingUCharIterator(); |
400 | 387 |
401 UChar32 next(); // Next case folded character | 388 UChar32 next(); // Next case folded character |
402 | 389 |
403 UBool inExpansion(); // True if last char returned from next() and
the | 390 UBool inExpansion(); // True if last char returned from next() and
the |
404 // next to be returned both originated from a
string | 391 // next to be returned both originated from a
string |
405 // folding of the same code point from the or
ignal UText. | 392 // folding of the same code point from the or
ignal UText. |
406 | 393 |
407 int64_t getIndex(); // Return the current input buffer index. | 394 int64_t getIndex(); // Return the current input buffer index. |
408 | 395 |
409 private: | 396 private: |
410 const UChar *fChars; | 397 const UChar *fChars; |
411 int64_t fIndex; | 398 int64_t fIndex; |
412 int64_t fLimit; | 399 int64_t fLimit; |
413 const UCaseProps *fcsp; | 400 const UCaseProps *fcsp; |
414 const UChar *fFoldChars; | 401 const UChar *fFoldChars; |
415 int32_t fFoldLength; | 402 int32_t fFoldLength; |
416 int32_t fFoldIndex; | 403 int32_t fFoldIndex; |
417 | 404 |
418 }; | 405 }; |
419 | 406 |
420 U_NAMESPACE_END | 407 U_NAMESPACE_END |
421 #endif | 408 #endif |
422 | 409 |
OLD | NEW |