| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * | 3 * |
| 4 * Copyright (C) 2002-2012, International Business Machines | 4 * Copyright (C) 2002-2014, International Business Machines |
| 5 * Corporation and others. All Rights Reserved. | 5 * Corporation and others. All Rights Reserved. |
| 6 * | 6 * |
| 7 ******************************************************************************* | 7 ******************************************************************************* |
| 8 * file name: uset.h | 8 * file name: uset.h |
| 9 * encoding: US-ASCII | 9 * encoding: US-ASCII |
| 10 * tab size: 8 (not used) | 10 * tab size: 8 (not used) |
| 11 * indentation:4 | 11 * indentation:4 |
| 12 * | 12 * |
| 13 * created on: 2002mar07 | 13 * created on: 2002mar07 |
| 14 * created by: Markus W. Scherer | 14 * created by: Markus W. Scherer |
| (...skipping 76 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 91 */ | 91 */ |
| 92 USET_ADD_CASE_MAPPINGS = 4 | 92 USET_ADD_CASE_MAPPINGS = 4 |
| 93 }; | 93 }; |
| 94 | 94 |
| 95 /** | 95 /** |
| 96 * Argument values for whether span() and similar functions continue while | 96 * Argument values for whether span() and similar functions continue while |
| 97 * the current character is contained vs. not contained in the set. | 97 * the current character is contained vs. not contained in the set. |
| 98 * | 98 * |
| 99 * The functionality is straightforward for sets with only single code points, | 99 * The functionality is straightforward for sets with only single code points, |
| 100 * without strings (which is the common case): | 100 * without strings (which is the common case): |
| 101 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE | 101 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same. |
| 102 * work the same. | 102 * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONT
AINED. |
| 103 * - span() and spanBack() partition any string the same way when | 103 * - span() and spanBack() partition any string the same way when |
| 104 * alternating between span(USET_SPAN_NOT_CONTAINED) and | 104 * alternating between span(USET_SPAN_NOT_CONTAINED) and |
| 105 * span(either "contained" condition). | 105 * span(either "contained" condition). |
| 106 * - Using a complemented (inverted) set and the opposite span conditions | 106 * - Using a complemented (inverted) set and the opposite span conditions |
| 107 * yields the same results. | 107 * yields the same results. |
| 108 * | 108 * |
| 109 * When a set contains multi-code point strings, then these statements may not | 109 * When a set contains multi-code point strings, then these statements may not |
| 110 * be true, depending on the strings in the set (for example, whether they | 110 * be true, depending on the strings in the set (for example, whether they |
| 111 * overlap with each other) and the string that is processed. | 111 * overlap with each other) and the string that is processed. |
| 112 * For a set with strings: | 112 * For a set with strings: |
| (...skipping 29 matching lines...) Expand all Loading... |
| 142 * never in the middle of a surrogate pair. | 142 * never in the middle of a surrogate pair. |
| 143 * Illegal UTF-8 sequences are treated like U+FFFD. | 143 * Illegal UTF-8 sequences are treated like U+FFFD. |
| 144 * When processing UTF-8 strings, malformed set strings | 144 * When processing UTF-8 strings, malformed set strings |
| 145 * (strings with unpaired surrogates which cannot be converted to UTF-8) | 145 * (strings with unpaired surrogates which cannot be converted to UTF-8) |
| 146 * are ignored. | 146 * are ignored. |
| 147 * | 147 * |
| 148 * @stable ICU 3.8 | 148 * @stable ICU 3.8 |
| 149 */ | 149 */ |
| 150 typedef enum USetSpanCondition { | 150 typedef enum USetSpanCondition { |
| 151 /** | 151 /** |
| 152 * Continue a span() while there is no set element at the current position. | 152 * Continues a span() while there is no set element at the current position. |
| 153 * Increments by one code point at a time. |
| 153 * Stops before the first set element (character or string). | 154 * Stops before the first set element (character or string). |
| 154 * (For code points only, this is like while contains(current)==FALSE). | 155 * (For code points only, this is like while contains(current)==FALSE). |
| 155 * | 156 * |
| 156 * When span() returns, the substring between where it started and the posit
ion | 157 * When span() returns, the substring between where it started and the posit
ion |
| 157 * it returned consists only of characters that are not in the set, | 158 * it returned consists only of characters that are not in the set, |
| 158 * and none of its strings overlap with the span. | 159 * and none of its strings overlap with the span. |
| 159 * | 160 * |
| 160 * @stable ICU 3.8 | 161 * @stable ICU 3.8 |
| 161 */ | 162 */ |
| 162 USET_SPAN_NOT_CONTAINED = 0, | 163 USET_SPAN_NOT_CONTAINED = 0, |
| 163 /** | 164 /** |
| 164 * Continue a span() while there is a set element at the current position. | 165 * Spans the longest substring that is a concatenation of set elements (char
acters or strings). |
| 165 * (For characters only, this is like while contains(current)==TRUE). | 166 * (For characters only, this is like while contains(current)==TRUE). |
| 166 * | 167 * |
| 167 * When span() returns, the substring between where it started and the posit
ion | 168 * When span() returns, the substring between where it started and the posit
ion |
| 168 * it returned consists only of set elements (characters or strings) that ar
e in the set. | 169 * it returned consists only of set elements (characters or strings) that ar
e in the set. |
| 169 * | 170 * |
| 170 * If a set contains strings, then the span will be the longest substring | 171 * If a set contains strings, then the span will be the longest substring fo
r which there |
| 171 * matching any of the possible concatenations of set elements (characters o
r strings). | 172 * exists at least one non-overlapping concatenation of set elements (charac
ters or strings). |
| 172 * (There must be a single, non-overlapping concatenation of characters or s
trings.) | 173 * This is equivalent to a POSIX regular expression for <code>(OR of each se
t element)*</code>. |
| 173 * This is equivalent to a POSIX regular expression for (OR of each set elem
ent)*. | 174 * (Java/ICU/Perl regex stops at the first match of an OR.) |
| 174 * | 175 * |
| 175 * @stable ICU 3.8 | 176 * @stable ICU 3.8 |
| 176 */ | 177 */ |
| 177 USET_SPAN_CONTAINED = 1, | 178 USET_SPAN_CONTAINED = 1, |
| 178 /** | 179 /** |
| 179 * Continue a span() while there is a set element at the current position. | 180 * Continues a span() while there is a set element at the current position. |
| 181 * Increments by the longest matching element at each position. |
| 180 * (For characters only, this is like while contains(current)==TRUE). | 182 * (For characters only, this is like while contains(current)==TRUE). |
| 181 * | 183 * |
| 182 * When span() returns, the substring between where it started and the posit
ion | 184 * When span() returns, the substring between where it started and the posit
ion |
| 183 * it returned consists only of set elements (characters or strings) that ar
e in the set. | 185 * it returned consists only of set elements (characters or strings) that ar
e in the set. |
| 184 * | 186 * |
| 185 * If a set only contains single characters, then this is the same | 187 * If a set only contains single characters, then this is the same |
| 186 * as USET_SPAN_CONTAINED. | 188 * as USET_SPAN_CONTAINED. |
| 187 * | 189 * |
| 188 * If a set contains strings, then the span will be the longest substring | 190 * If a set contains strings, then the span will be the longest substring |
| 189 * with a match at each position with the longest single set element (charac
ter or string). | 191 * with a match at each position with the longest single set element (charac
ter or string). |
| (...skipping 925 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 1115 * @param pEnd pointer to variable to receive last character in range, | 1117 * @param pEnd pointer to variable to receive last character in range, |
| 1116 * inclusive | 1118 * inclusive |
| 1117 * @return true if rangeIndex is valid, otherwise false | 1119 * @return true if rangeIndex is valid, otherwise false |
| 1118 * @stable ICU 2.4 | 1120 * @stable ICU 2.4 |
| 1119 */ | 1121 */ |
| 1120 U_STABLE UBool U_EXPORT2 | 1122 U_STABLE UBool U_EXPORT2 |
| 1121 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, | 1123 uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex, |
| 1122 UChar32* pStart, UChar32* pEnd); | 1124 UChar32* pStart, UChar32* pEnd); |
| 1123 | 1125 |
| 1124 #endif | 1126 #endif |
| OLD | NEW |