OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * | 3 * |
4 * Copyright (C) 2002-2012, International Business Machines | 4 * Copyright (C) 2002-2014, International Business Machines |
5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
6 * | 6 * |
7 ******************************************************************************* | 7 ******************************************************************************* |
8 * file name: uset.h | 8 * file name: uset.h |
9 * encoding: US-ASCII | 9 * encoding: US-ASCII |
10 * tab size: 8 (not used) | 10 * tab size: 8 (not used) |
11 * indentation:4 | 11 * indentation:4 |
12 * | 12 * |
13 * created on: 2002mar07 | 13 * created on: 2002mar07 |
14 * created by: Markus W. Scherer | 14 * created by: Markus W. Scherer |
(...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
91 */ | 91 */ |
92 USET_ADD_CASE_MAPPINGS = 4 | 92 USET_ADD_CASE_MAPPINGS = 4 |
93 }; | 93 }; |
94 | 94 |
95 /** | 95 /** |
96 * Argument values for whether span() and similar functions continue while | 96 * Argument values for whether span() and similar functions continue while |
97 * the current character is contained vs. not contained in the set. | 97 * the current character is contained vs. not contained in the set. |
98 * | 98 * |
99 * The functionality is straightforward for sets with only single code points, | 99 * The functionality is straightforward for sets with only single code points, |
100 * without strings (which is the common case): | 100 * without strings (which is the common case): |
101 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE | 101 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same. |
102 * work the same. | 102 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONT
AINED. |
103 * - span() and spanBack() partition any string the same way when | 103 * - span() and spanBack() partition any string the same way when |
104 * alternating between span(USET_SPAN_NOT_CONTAINED) and | 104 * alternating between span(USET_SPAN_NOT_CONTAINED) and |
105 * span(either "contained" condition). | 105 * span(either "contained" condition). |
106 * - Using a complemented (inverted) set and the opposite span conditions | 106 * - Using a complemented (inverted) set and the opposite span conditions |
107 * yields the same results. | 107 * yields the same results. |
108 * | 108 * |
109 * When a set contains multi-code point strings, then these statements may not | 109 * When a set contains multi-code point strings, then these statements may not |
110 * be true, depending on the strings in the set (for example, whether they | 110 * be true, depending on the strings in the set (for example, whether they |
111 * overlap with each other) and the string that is processed. | 111 * overlap with each other) and the string that is processed. |
112 * For a set with strings: | 112 * For a set with strings: |
(...skipping 29 matching lines...) Expand all Loading... |
142 * never in the middle of a surrogate pair. | 142 * never in the middle of a surrogate pair. |
143 * Illegal UTF-8 sequences are treated like U+FFFD. | 143 * Illegal UTF-8 sequences are treated like U+FFFD. |
144 * When processing UTF-8 strings, malformed set strings | 144 * When processing UTF-8 strings, malformed set strings |
145 * (strings with unpaired surrogates which cannot be converted to UTF-8) | 145 * (strings with unpaired surrogates which cannot be converted to UTF-8) |
146 * are ignored. | 146 * are ignored. |
147 * | 147 * |
148 * @stable ICU 3.8 | 148 * @stable ICU 3.8 |
149 */ | 149 */ |
150 typedef enum USetSpanCondition { | 150 typedef enum USetSpanCondition { |
151 /** | 151 /** |
152 * Continue a span() while there is no set element at the current position. | 152 * Continues a span() while there is no set element at the current position. |
| 153 * Increments by one code point at a time. |
153 * Stops before the first set element (character or string). | 154 * Stops before the first set element (character or string). |
154 * (For code points only, this is like while contains(current)==FALSE). | 155 * (For code points only, this is like while contains(current)==FALSE). |
155 * | 156 * |
156 * When span() returns, the substring between where it started and the posit
ion | 157 * When span() returns, the substring between where it started and the posit
ion |
157 * it returned consists only of characters that are not in the set, | 158 * it returned consists only of characters that are not in the set, |
158 * and none of its strings overlap with the span. | 159 * and none of its strings overlap with the span. |
159 * | 160 * |
160 * @stable ICU 3.8 | 161 * @stable ICU 3.8 |
161 */ | 162 */ |
162 USET_SPAN_NOT_CONTAINED = 0, | 163 USET_SPAN_NOT_CONTAINED = 0, |
163 /** | 164 /** |
164 * Continue a span() while there is a set element at the current position. | 165 * Spans the longest substring that is a concatenation of set elements (char
acters or strings). |
165 * (For characters only, this is like while contains(current)==TRUE). | 166 * (For characters only, this is like while contains(current)==TRUE). |
166 * | 167 * |
167 * When span() returns, the substring between where it started and the posit
ion | 168 * When span() returns, the substring between where it started and the posit
ion |
168 * it returned consists only of set elements (characters or strings) that ar
e in the set. | 169 * it returned consists only of set elements (characters or strings) that ar
e in the set. |
169 * | 170 * |
170 * If a set contains strings, then the span will be the longest substring | 171 * If a set contains strings, then the span will be the longest substring fo
r which there |
171 * matching any of the possible concatenations of set elements (characters o
r strings). | 172 * exists at least one non-overlapping concatenation of set elements (charac
ters or strings). |
172 * (There must be a single, non-overlapping concatenation of characters or s
trings.) | 173 * This is equivalent to a POSIX regular expression for <code>(OR of each se
t element)*</code>. |
173 * This is equivalent to a POSIX regular expression for (OR of each set elem
ent)*. | 174 * (Java/ICU/Perl regex stops at the first match of an OR.) |
174 * | 175 * |
175 * @stable ICU 3.8 | 176 * @stable ICU 3.8 |
176 */ | 177 */ |
177 USET_SPAN_CONTAINED = 1, | 178 USET_SPAN_CONTAINED = 1, |
178 /** | 179 /** |
179 * Continue a span() while there is a set element at the current position. | 180 * Continues a span() while there is a set element at the current position. |
| 181 * Increments by the longest matching element at each position. |
180 * (For characters only, this is like while contains(current)==TRUE). | 182 * (For characters only, this is like while contains(current)==TRUE). |
181 * | 183 * |
182 * When span() returns, the substring between where it started and the posit
ion | 184 * When span() returns, the substring between where it started and the posit
ion |
183 * it returned consists only of set elements (characters or strings) that ar
e in the set. | 185 * it returned consists only of set elements (characters or strings) that ar
e in the set. |
184 * | 186 * |
185 * If a set only contains single characters, then this is the same | 187 * If a set only contains single characters, then this is the same |
186 * as USET_SPAN_CONTAINED. | 188 * as USET_SPAN_CONTAINED. |
187 * | 189 * |
188 * If a set contains strings, then the span will be the longest substring | 190 * If a set contains strings, then the span will be the longest substring |
189 * with a match at each position with the longest single set element (charac
ter or string). | 191 * with a match at each position with the longest single set element (charac
ter or string). |
(...skipping 925 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
1115 * @param pEnd pointer to variable to receive last character in range, | 1117 * @param pEnd pointer to variable to receive last character in range, |
1116 * inclusive | 1118 * inclusive |
1117 * @return true if rangeIndex is valid, otherwise false | 1119 * @return true if rangeIndex is valid, otherwise false |
1118 * @stable ICU 2.4 | 1120 * @stable ICU 2.4 |
1119 */ | 1121 */ |
1120 U_STABLE UBool U_EXPORT2 | 1122 U_STABLE UBool U_EXPORT2 |
1121 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, | 1123 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, |
1122 UChar32* pStart, UChar32* pEnd); | 1124 UChar32* pStart, UChar32* pEnd); |
1123 | 1125 |
1124 #endif | 1126 #endif |
OLD | NEW |