OLD | NEW |
| (Empty) |
1 /* | |
2 * Copyright (C) 2001-2005, International Business Machines Corporation and other
s. All Rights Reserved. | |
3 ********************************************************************** | |
4 * Date Name Description | |
5 * 07/18/01 aliu Creation. | |
6 ********************************************************************** | |
7 */ | |
8 #ifndef UNIMATCH_H | |
9 #define UNIMATCH_H | |
10 | |
11 #include "unicode/utypes.h" | |
12 | |
13 /** | |
14 * \file | |
15 * \brief C++ API: Unicode Matcher | |
16 */ | |
17 | |
18 | |
19 U_NAMESPACE_BEGIN | |
20 | |
21 class Replaceable; | |
22 class UnicodeString; | |
23 class UnicodeSet; | |
24 | |
25 /** | |
26 * Constants returned by <code>UnicodeMatcher::matches()</code> | |
27 * indicating the degree of match. | |
28 * @stable ICU 2.4 | |
29 */ | |
30 enum UMatchDegree { | |
31 /** | |
32 * Constant returned by <code>matches()</code> indicating a | |
33 * mismatch between the text and this matcher. The text contains | |
34 * a character which does not match, or the text does not contain | |
35 * all desired characters for a non-incremental match. | |
36 * @stable ICU 2.4 | |
37 */ | |
38 U_MISMATCH, | |
39 | |
40 /** | |
41 * Constant returned by <code>matches()</code> indicating a | |
42 * partial match between the text and this matcher. This value is | |
43 * only returned for incremental match operations. All characters | |
44 * of the text match, but more characters are required for a | |
45 * complete match. Alternatively, for variable-length matchers, | |
46 * all characters of the text match, and if more characters were | |
47 * supplied at limit, they might also match. | |
48 * @stable ICU 2.4 | |
49 */ | |
50 U_PARTIAL_MATCH, | |
51 | |
52 /** | |
53 * Constant returned by <code>matches()</code> indicating a | |
54 * complete match between the text and this matcher. For an | |
55 * incremental variable-length match, this value is returned if | |
56 * the given text matches, and it is known that additional | |
57 * characters would not alter the extent of the match. | |
58 * @stable ICU 2.4 | |
59 */ | |
60 U_MATCH | |
61 }; | |
62 | |
63 /** | |
64 * <code>UnicodeMatcher</code> defines a protocol for objects that can | |
65 * match a range of characters in a Replaceable string. | |
66 * @stable ICU 2.4 | |
67 */ | |
68 class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an int
erface/mixin class */ { | |
69 | |
70 public: | |
71 /** | |
72 * Destructor. | |
73 * @stable ICU 2.4 | |
74 */ | |
75 virtual ~UnicodeMatcher(); | |
76 | |
77 /** | |
78 * Return a UMatchDegree value indicating the degree of match for | |
79 * the given text at the given offset. Zero, one, or more | |
80 * characters may be matched. | |
81 * | |
82 * Matching in the forward direction is indicated by limit > | |
83 * offset. Characters from offset forwards to limit-1 will be | |
84 * considered for matching. | |
85 * | |
86 * Matching in the reverse direction is indicated by limit < | |
87 * offset. Characters from offset backwards to limit+1 will be | |
88 * considered for matching. | |
89 * | |
90 * If limit == offset then the only match possible is a zero | |
91 * character match (which subclasses may implement if desired). | |
92 * | |
93 * As a side effect, advance the offset parameter to the limit of | |
94 * the matched substring. In the forward direction, this will be | |
95 * the index of the last matched character plus one. In the | |
96 * reverse direction, this will be the index of the last matched | |
97 * character minus one. | |
98 * | |
99 * <p>Note: This method is not const because some classes may | |
100 * modify their state as the result of a match. | |
101 * | |
102 * @param text the text to be matched | |
103 * @param offset on input, the index into text at which to begin | |
104 * matching. On output, the limit of the matched text. The | |
105 * number of matched characters is the output value of offset | |
106 * minus the input value. Offset should always point to the | |
107 * HIGH SURROGATE (leading code unit) of a pair of surrogates, | |
108 * both on entry and upon return. | |
109 * @param limit the limit index of text to be matched. Greater | |
110 * than offset for a forward direction match, less than offset for | |
111 * a backward direction match. The last character to be | |
112 * considered for matching will be text.charAt(limit-1) in the | |
113 * forward direction or text.charAt(limit+1) in the backward | |
114 * direction. | |
115 * @param incremental if TRUE, then assume further characters may | |
116 * be inserted at limit and check for partial matching. Otherwise | |
117 * assume the text as given is complete. | |
118 * @return a match degree value indicating a full match, a partial | |
119 * match, or a mismatch. If incremental is FALSE then | |
120 * U_PARTIAL_MATCH should never be returned. | |
121 * @stable ICU 2.4 | |
122 */ | |
123 virtual UMatchDegree matches(const Replaceable& text, | |
124 int32_t& offset, | |
125 int32_t limit, | |
126 UBool incremental) = 0; | |
127 | |
128 /** | |
129 * Returns a string representation of this matcher. If the result of | |
130 * calling this function is passed to the appropriate parser, it | |
131 * will produce another matcher that is equal to this one. | |
132 * @param result the string to receive the pattern. Previous | |
133 * contents will be deleted. | |
134 * @param escapeUnprintable if TRUE then convert unprintable | |
135 * character to their hex escape representations, \\uxxxx or | |
136 * \\Uxxxxxxxx. Unprintable characters are those other than | |
137 * U+000A, U+0020..U+007E. | |
138 * @stable ICU 2.4 | |
139 */ | |
140 virtual UnicodeString& toPattern(UnicodeString& result, | |
141 UBool escapeUnprintable = FALSE) const = 0; | |
142 | |
143 /** | |
144 * Returns TRUE if this matcher will match a character c, where c | |
145 * & 0xFF == v, at offset, in the forward direction (with limit > | |
146 * offset). This is used by <tt>RuleBasedTransliterator</tt> for | |
147 * indexing. | |
148 * @stable ICU 2.4 | |
149 */ | |
150 virtual UBool matchesIndexValue(uint8_t v) const = 0; | |
151 | |
152 /** | |
153 * Union the set of all characters that may be matched by this object | |
154 * into the given set. | |
155 * @param toUnionTo the set into which to union the source characters | |
156 * @stable ICU 2.4 | |
157 */ | |
158 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0; | |
159 }; | |
160 | |
161 U_NAMESPACE_END | |
162 | |
163 #endif | |
OLD | NEW |