OLD | NEW |
1 /* | 1 /* |
2 ********************************************************************** | 2 ********************************************************************** |
3 * Copyright (C) 2002-2013, International Business Machines | 3 * Copyright (C) 2002-2014, International Business Machines |
4 * Corporation and others. All Rights Reserved. | 4 * Corporation and others. All Rights Reserved. |
5 ********************************************************************** | 5 ********************************************************************** |
6 * file name: regex.h | 6 * file name: regex.h |
7 * encoding: US-ASCII | 7 * encoding: US-ASCII |
8 * indentation:4 | 8 * indentation:4 |
9 * | 9 * |
10 * created on: 2002oct22 | 10 * created on: 2002oct22 |
11 * created by: Andy Heninger | 11 * created by: Andy Heninger |
12 * | 12 * |
13 * ICU Regular Expressions, API for C++ | 13 * ICU Regular Expressions, API for C++ |
(...skipping 47 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
61 class RegexCImpl; | 61 class RegexCImpl; |
62 class RegexMatcher; | 62 class RegexMatcher; |
63 class RegexPattern; | 63 class RegexPattern; |
64 struct REStackFrame; | 64 struct REStackFrame; |
65 class RuleBasedBreakIterator; | 65 class RuleBasedBreakIterator; |
66 class UnicodeSet; | 66 class UnicodeSet; |
67 class UVector; | 67 class UVector; |
68 class UVector32; | 68 class UVector32; |
69 class UVector64; | 69 class UVector64; |
70 | 70 |
71 #ifndef U_HIDE_INTERNAL_API | |
72 /** | |
73 * RBBIPatternDump Debug function, displays the compiled form of a pattern. | |
74 * @internal | |
75 */ | |
76 #ifdef REGEX_DEBUG | |
77 U_INTERNAL void U_EXPORT2 | |
78 RegexPatternDump(const RegexPattern *pat); | |
79 #else | |
80 #undef RegexPatternDump | |
81 #define RegexPatternDump(pat) | |
82 #endif | |
83 #endif /* U_HIDE_INTERNAL_API */ | |
84 | |
85 | |
86 | 71 |
87 /** | 72 /** |
88 * Class <code>RegexPattern</code> represents a compiled regular expression. I
t includes | 73 * Class <code>RegexPattern</code> represents a compiled regular expression. I
t includes |
89 * factory methods for creating a RegexPattern object from the source (string)
form | 74 * factory methods for creating a RegexPattern object from the source (string)
form |
90 * of a regular expression, methods for creating RegexMatchers that allow the p
attern | 75 * of a regular expression, methods for creating RegexMatchers that allow the p
attern |
91 * to be applied to input text, and a few convenience methods for simple common | 76 * to be applied to input text, and a few convenience methods for simple common |
92 * uses of regular expressions. | 77 * uses of regular expressions. |
93 * | 78 * |
94 * <p>Class RegexPattern is not intended to be subclassed.</p> | 79 * <p>Class RegexPattern is not intended to be subclassed.</p> |
95 * | 80 * |
96 * @stable ICU 2.4 | 81 * @stable ICU 2.4 |
97 */ | 82 */ |
98 class U_I18N_API RegexPattern: public UObject { | 83 class U_I18N_API RegexPattern U_FINAL : public UObject { |
99 public: | 84 public: |
100 | 85 |
101 /** | 86 /** |
102 * default constructor. Create a RegexPattern object that refers to no actu
al | 87 * default constructor. Create a RegexPattern object that refers to no actu
al |
103 * pattern. Not normally needed; RegexPattern objects are usually | 88 * pattern. Not normally needed; RegexPattern objects are usually |
104 * created using the factory method <code>compile()</code>. | 89 * created using the factory method <code>compile()</code>. |
105 * | 90 * |
106 * @stable ICU 2.4 | 91 * @stable ICU 2.4 |
107 */ | 92 */ |
108 RegexPattern(); | 93 RegexPattern(); |
(...skipping 497 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
606 | 591 |
607 friend class RegexCompile; | 592 friend class RegexCompile; |
608 friend class RegexMatcher; | 593 friend class RegexMatcher; |
609 friend class RegexCImpl; | 594 friend class RegexCImpl; |
610 | 595 |
611 // | 596 // |
612 // Implementation Methods | 597 // Implementation Methods |
613 // | 598 // |
614 void init(); // Common initialization, for use by construc
tors. | 599 void init(); // Common initialization, for use by construc
tors. |
615 void zap(); // Common cleanup | 600 void zap(); // Common cleanup |
616 #ifdef REGEX_DEBUG | 601 |
617 void dumpOp(int32_t index) const; | 602 void dumpOp(int32_t index) const; |
618 friend void U_EXPORT2 RegexPatternDump(const RegexPattern *); | |
619 #endif | |
620 | 603 |
| 604 public: |
| 605 #ifndef U_HIDE_INTERNAL_API |
| 606 /** |
| 607 * Dump a compiled pattern. Internal debug function. |
| 608 * @internal |
| 609 */ |
| 610 void dumpPattern() const; |
| 611 #endif /* U_HIDE_INTERNAL_API */ |
621 }; | 612 }; |
622 | 613 |
623 | 614 |
624 | 615 |
625 /** | 616 /** |
626 * class RegexMatcher bundles together a regular expression pattern and | 617 * class RegexMatcher bundles together a regular expression pattern and |
627 * input text to which the expression can be applied. It includes methods | 618 * input text to which the expression can be applied. It includes methods |
628 * for testing for matches, and for find and replace operations. | 619 * for testing for matches, and for find and replace operations. |
629 * | 620 * |
630 * <p>Class RegexMatcher is not intended to be subclassed.</p> | 621 * <p>Class RegexMatcher is not intended to be subclassed.</p> |
631 * | 622 * |
632 * @stable ICU 2.4 | 623 * @stable ICU 2.4 |
633 */ | 624 */ |
634 class U_I18N_API RegexMatcher: public UObject { | 625 class U_I18N_API RegexMatcher U_FINAL : public UObject { |
635 public: | 626 public: |
636 | 627 |
637 /** | 628 /** |
638 * Construct a RegexMatcher for a regular expression. | 629 * Construct a RegexMatcher for a regular expression. |
639 * This is a convenience method that avoids the need to explicitly create | 630 * This is a convenience method that avoids the need to explicitly create |
640 * a RegexPattern object. Note that if several RegexMatchers need to be | 631 * a RegexPattern object. Note that if several RegexMatchers need to be |
641 * created for the same expression, it will be more efficient to | 632 * created for the same expression, it will be more efficient to |
642 * separately create and cache a RegexPattern object, and use | 633 * separately create and cache a RegexPattern object, and use |
643 * its matcher() method to create the RegexMatcher objects. | 634 * its matcher() method to create the RegexMatcher objects. |
644 * | 635 * |
(...skipping 159 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
804 * <p>Note that if the input string is changed by the application, | 795 * <p>Note that if the input string is changed by the application, |
805 * use find(startPos, status) instead of find(), because the saved starti
ng | 796 * use find(startPos, status) instead of find(), because the saved starti
ng |
806 * position may not be valid with the altered input string.</p> | 797 * position may not be valid with the altered input string.</p> |
807 * @return TRUE if a match is found. | 798 * @return TRUE if a match is found. |
808 * @stable ICU 2.4 | 799 * @stable ICU 2.4 |
809 */ | 800 */ |
810 virtual UBool find(); | 801 virtual UBool find(); |
811 | 802 |
812 | 803 |
813 /** | 804 /** |
| 805 * Find the next pattern match in the input string. |
| 806 * The find begins searching the input at the location following the end of |
| 807 * the previous match, or at the start of the string if there is no previous
match. |
| 808 * If a match is found, <code>start(), end()</code> and <code>group()</code> |
| 809 * will provide more information regarding the match. |
| 810 * <p>Note that if the input string is changed by the application, |
| 811 * use find(startPos, status) instead of find(), because the saved starti
ng |
| 812 * position may not be valid with the altered input string.</p> |
| 813 * @param status A reference to a UErrorCode to receive any errors. |
| 814 * @return TRUE if a match is found. |
| 815 * @internal |
| 816 */ |
| 817 virtual UBool find(UErrorCode &status); |
| 818 |
| 819 /** |
814 * Resets this RegexMatcher and then attempts to find the next substring of
the | 820 * Resets this RegexMatcher and then attempts to find the next substring of
the |
815 * input string that matches the pattern, starting at the specified index. | 821 * input string that matches the pattern, starting at the specified index. |
816 * | 822 * |
817 * @param start The (native) index in the input string to begin the s
earch. | 823 * @param start The (native) index in the input string to begin the s
earch. |
818 * @param status A reference to a UErrorCode to receive any errors. | 824 * @param status A reference to a UErrorCode to receive any errors. |
819 * @return TRUE if a match is found. | 825 * @return TRUE if a match is found. |
820 * @stable ICU 2.4 | 826 * @stable ICU 2.4 |
821 */ | 827 */ |
822 virtual UBool find(int64_t start, UErrorCode &status); | 828 virtual UBool find(int64_t start, UErrorCode &status); |
823 | 829 |
(...skipping 922 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1746 // MatchAt This is the internal interface to the match engine itself. | 1752 // MatchAt This is the internal interface to the match engine itself. |
1747 // Match status comes back in matcher member variables. | 1753 // Match status comes back in matcher member variables. |
1748 // | 1754 // |
1749 void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &stat
us); | 1755 void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &stat
us); |
1750 inline void backTrack(int64_t &inputIdx, int32_t &patIdx); | 1756 inline void backTrack(int64_t &inputIdx, int32_t &patIdx); |
1751 UBool isWordBoundary(int64_t pos); // perform Perl-li
ke \b test | 1757 UBool isWordBoundary(int64_t pos); // perform Perl-li
ke \b test |
1752 UBool isUWordBoundary(int64_t pos); // perform RBBI ba
sed \b test | 1758 UBool isUWordBoundary(int64_t pos); // perform RBBI ba
sed \b test |
1753 REStackFrame *resetStack(); | 1759 REStackFrame *resetStack(); |
1754 inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorC
ode &status); | 1760 inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorC
ode &status); |
1755 void IncrementTime(UErrorCode &status); | 1761 void IncrementTime(UErrorCode &status); |
1756 UBool ReportFindProgress(int64_t matchIndex, UErrorCode &stat
us); | 1762 |
| 1763 // Call user find callback function, if set. Return TRUE if operation should
be interrupted. |
| 1764 inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &s
tatus); |
1757 | 1765 |
1758 int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &
status) const; | 1766 int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &
status) const; |
1759 | 1767 |
1760 UBool findUsingChunk(); | 1768 UBool findUsingChunk(UErrorCode &status); |
1761 void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode
&status); | 1769 void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode
&status); |
1762 UBool isChunkWordBoundary(int32_t pos); | 1770 UBool isChunkWordBoundary(int32_t pos); |
1763 | 1771 |
1764 const RegexPattern *fPattern; | 1772 const RegexPattern *fPattern; |
1765 RegexPattern *fPatternOwned; // Non-NULL if this matcher owns the
pattern, and | 1773 RegexPattern *fPatternOwned; // Non-NULL if this matcher owns the
pattern, and |
1766 // should delete it when through. | 1774 // should delete it when through. |
1767 | 1775 |
1768 const UnicodeString *fInput; // The string being matched. Only use
d for input() | 1776 const UnicodeString *fInput; // The string being matched. Only use
d for input() |
1769 UText *fInputText; // The text being matched. Is never N
ULL. | 1777 UText *fInputText; // The text being matched. Is never N
ULL. |
1770 UText *fAltInputText; // A shallow copy of the text being m
atched. | 1778 UText *fAltInputText; // A shallow copy of the text being m
atched. |
(...skipping 69 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1840 | 1848 |
1841 UErrorCode fDeferredStatus; // Save error state that cannot be im
mediately | 1849 UErrorCode fDeferredStatus; // Save error state that cannot be im
mediately |
1842 // reported, or that permanently di
sables this matcher. | 1850 // reported, or that permanently di
sables this matcher. |
1843 | 1851 |
1844 RuleBasedBreakIterator *fWordBreakItr; | 1852 RuleBasedBreakIterator *fWordBreakItr; |
1845 }; | 1853 }; |
1846 | 1854 |
1847 U_NAMESPACE_END | 1855 U_NAMESPACE_END |
1848 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS | 1856 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS |
1849 #endif | 1857 #endif |
OLD | NEW |