OLD | NEW |
1 /* | 1 /* |
2 ******************************************************************************* | 2 ******************************************************************************* |
3 * Copyright (C) 2014, International Business Machines Corporation and | 3 * Copyright (C) 2014-2015, International Business Machines Corporation and |
4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
5 ******************************************************************************* | 5 ******************************************************************************* |
6 */ | 6 */ |
7 | 7 |
8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
9 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION | 9 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
10 | 10 |
11 #include "cmemory.h" | 11 #include "cmemory.h" |
12 | 12 |
13 #include "unicode/filteredbrk.h" | 13 #include "unicode/filteredbrk.h" |
(...skipping 22 matching lines...) Expand all Loading... |
36 } | 36 } |
37 fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n", | 37 fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n", |
38 f, l, m, buf, (const void*)s, b?'T':'F',(int)d); | 38 f, l, m, buf, (const void*)s, b?'T':'F',(int)d); |
39 } | 39 } |
40 | 40 |
41 #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__) | 41 #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__) |
42 #else | 42 #else |
43 #define FB_TRACE(m,s,b,d) | 43 #define FB_TRACE(m,s,b,d) |
44 #endif | 44 #endif |
45 | 45 |
| 46 /** |
| 47 * Used with sortedInsert() |
| 48 */ |
46 static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { | 49 static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { |
47 const UnicodeString &a = *(const UnicodeString*)t1.pointer; | 50 const UnicodeString &a = *(const UnicodeString*)t1.pointer; |
48 const UnicodeString &b = *(const UnicodeString*)t2.pointer; | 51 const UnicodeString &b = *(const UnicodeString*)t2.pointer; |
49 return a.compare(b); | 52 return a.compare(b); |
50 } | 53 } |
51 | 54 |
52 /** | 55 /** |
53 * A UVector which implements a set of strings. | 56 * A UVector which implements a set of strings. |
54 */ | 57 */ |
55 class U_I18N_API UStringSet : public UVector { | 58 class U_COMMON_API UStringSet : public UVector { |
56 public: | 59 public: |
57 UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject, | 60 UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject, |
58 uhash_compareUnicodeString, | 61 uhash_compareUnicodeString, |
59 1, | 62 1, |
60 status) {} | 63 status) {} |
61 virtual ~UStringSet(); | 64 virtual ~UStringSet(); |
62 /** | 65 /** |
63 * Is this UnicodeSet contained? | 66 * Is this UnicodeSet contained? |
64 */ | 67 */ |
65 inline UBool contains(const UnicodeString& s) { | 68 inline UBool contains(const UnicodeString& s) { |
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
110 if(U_FAILURE(status)) return false; | 113 if(U_FAILURE(status)) return false; |
111 return removeElement((void*) &s); | 114 return removeElement((void*) &s); |
112 } | 115 } |
113 }; | 116 }; |
114 | 117 |
115 /** | 118 /** |
116 * Virtual, won't be inlined | 119 * Virtual, won't be inlined |
117 */ | 120 */ |
118 UStringSet::~UStringSet() {} | 121 UStringSet::~UStringSet() {} |
119 | 122 |
| 123 /* ----------------------------------------------------------- */ |
120 | 124 |
| 125 |
| 126 /* Filtered Break constants */ |
121 static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forwar
d trie | 127 static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forwar
d trie |
122 static const int32_t kMATCH = (1<<1); //< exact match - skip this one. | 128 static const int32_t kMATCH = (1<<1); //< exact match - skip this one. |
123 static const int32_t kSuppressInReverse = (1<<0); | 129 static const int32_t kSuppressInReverse = (1<<0); |
124 static const int32_t kAddToForward = (1<<1); | 130 static const int32_t kAddToForward = (1<<1); |
125 static const UChar kFULLSTOP = 0x002E; // '.' | 131 static const UChar kFULLSTOP = 0x002E; // '.' |
126 | 132 |
| 133 /** |
| 134 * Shared data for SimpleFilteredSentenceBreakIterator |
| 135 */ |
| 136 class SimpleFilteredSentenceBreakData : public UMemory { |
| 137 public: |
| 138 SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards ) |
| 139 : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) {
} |
| 140 SimpleFilteredSentenceBreakData *incr() { refcount++; return this; } |
| 141 SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; r
eturn 0; } |
| 142 virtual ~SimpleFilteredSentenceBreakData(); |
| 143 |
| 144 LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M." |
| 145 LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs. |
| 146 int32_t refcount; |
| 147 }; |
| 148 |
| 149 SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {} |
| 150 |
| 151 /** |
| 152 * Concrete implementation |
| 153 */ |
127 class SimpleFilteredSentenceBreakIterator : public BreakIterator { | 154 class SimpleFilteredSentenceBreakIterator : public BreakIterator { |
128 public: | 155 public: |
129 SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards
, UCharsTrie *backwards, UErrorCode &status); | 156 SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards
, UCharsTrie *backwards, UErrorCode &status); |
130 SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator&
other); | 157 SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator&
other); |
131 virtual ~SimpleFilteredSentenceBreakIterator(); | 158 virtual ~SimpleFilteredSentenceBreakIterator(); |
132 private: | 159 private: |
| 160 SimpleFilteredSentenceBreakData *fData; |
133 LocalPointer<BreakIterator> fDelegate; | 161 LocalPointer<BreakIterator> fDelegate; |
134 LocalUTextPointer fText; | 162 LocalUTextPointer fText; |
135 LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs. | |
136 LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M." | |
137 | 163 |
138 /* -- subclass interface -- */ | 164 /* -- subclass interface -- */ |
139 public: | 165 public: |
140 /* -- cloning and other subclass stuff -- */ | 166 /* -- cloning and other subclass stuff -- */ |
141 virtual BreakIterator * createBufferClone(void * /*stackBuffer*/, | 167 virtual BreakIterator * createBufferClone(void * /*stackBuffer*/, |
142 int32_t &/*BufferSize*/, | 168 int32_t &/*BufferSize*/, |
143 UErrorCode &status) { | 169 UErrorCode &status) { |
144 // for now - always deep clone | 170 // for now - always deep clone |
145 status = U_SAFECLONE_ALLOCATED_WARNING; | 171 status = U_SAFECLONE_ALLOCATED_WARNING; |
146 return clone(); | 172 return clone(); |
147 } | 173 } |
148 virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBr
eakIterator(*this); } | 174 virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBr
eakIterator(*this); } |
149 virtual UClassID getDynamicClassID(void) const { return NULL; } | 175 virtual UClassID getDynamicClassID(void) const { return NULL; } |
150 virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return t
rue; return false; } | 176 virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return t
rue; return false; } |
151 | 177 |
152 /* -- text modifying -- */ | 178 /* -- text modifying -- */ |
153 virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(tex
t,status); } | 179 virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(tex
t,status); } |
154 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fD
elegate->refreshInputText(input,status); return *this; } | 180 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fD
elegate->refreshInputText(input,status); return *this; } |
155 virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); } | 181 virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); } |
156 virtual void setText(const UnicodeString &text) { fDelegate->setText(text); } | 182 virtual void setText(const UnicodeString &text) { fDelegate->setText(text); } |
157 | 183 |
158 /* -- other functions that are just delegated -- */ | 184 /* -- other functions that are just delegated -- */ |
159 virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDel
egate->getUText(fillIn,status); } | 185 virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDel
egate->getUText(fillIn,status); } |
160 virtual CharacterIterator& getText(void) const { return fDelegate->getText();
} | 186 virtual CharacterIterator& getText(void) const { return fDelegate->getText();
} |
161 | 187 |
162 /* -- ITERATION -- */ | 188 /* -- ITERATION -- */ |
163 virtual int32_t first(void) { return fDelegate->first(); } | 189 virtual int32_t first(void); |
164 virtual int32_t preceding(int32_t /*offset*/) { /* TODO: not implemented */ re
turn UBRK_DONE; } | 190 virtual int32_t preceding(int32_t offset); |
165 virtual int32_t previous(void) { /* TODO: not implemented */ return UBRK_DONE;
} | 191 virtual int32_t previous(void); |
166 virtual UBool isBoundary(int32_t offset) { return fDelegate->isBoundary(offset
); } | 192 virtual UBool isBoundary(int32_t offset); |
167 virtual int32_t current(void) const { return fDelegate->current(); } | 193 virtual int32_t current(void) const { return fDelegate->current(); } // we kee
p the delegate current, so this should be correct. |
168 | 194 |
169 virtual int32_t next(void); | 195 virtual int32_t next(void); |
170 | 196 |
171 virtual int32_t next(int32_t /*n*/) { /* TODO: not implemented */ return UBRK_
DONE; } | 197 virtual int32_t next(int32_t n); |
172 virtual int32_t following(int32_t /*offset*/) { /* TODO: not implemented */ re
turn UBRK_DONE; } | 198 virtual int32_t following(int32_t offset); |
173 virtual int32_t last(void) { return fDelegate->last(); } | 199 virtual int32_t last(void); |
174 | 200 |
| 201 private: |
| 202 /** |
| 203 * Given that the fDelegate has already given its "initial" answer, |
| 204 * find the NEXT actual (non-excepted) break. |
| 205 * @param n initial position from delegate |
| 206 * @return new break position or UBRK_DONE |
| 207 */ |
| 208 int32_t internalNext(int32_t n); |
| 209 /** |
| 210 * Given that the fDelegate has already given its "initial" answer, |
| 211 * find the PREV actual (non-excepted) break. |
| 212 * @param n initial position from delegate |
| 213 * @return new break position or UBRK_DONE |
| 214 */ |
| 215 int32_t internalPrev(int32_t n); |
| 216 /** |
| 217 * set up the UText with the value of the fDelegate. |
| 218 * Call this before calling breakExceptionAt. |
| 219 * May be able to avoid excess calls |
| 220 */ |
| 221 void resetState(UErrorCode &status); |
| 222 /** |
| 223 * Is there a match (exception) at this spot? |
| 224 */ |
| 225 enum EFBMatchResult { kNoExceptionHere, kExceptionHere }; |
| 226 /** |
| 227 * Determine if there is an exception at this spot |
| 228 * @param n spot to check |
| 229 * @return kNoExceptionHere or kExceptionHere |
| 230 **/ |
| 231 enum EFBMatchResult breakExceptionAt(int32_t n); |
175 }; | 232 }; |
176 | 233 |
177 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const S
impleFilteredSentenceBreakIterator& other) | 234 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const S
impleFilteredSentenceBreakIterator& other) |
178 : BreakIterator(other), fDelegate(other.fDelegate->clone()) | 235 : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate-
>clone()) |
179 { | 236 { |
180 /* | |
181 TODO: not able to clone Tries. Should be a refcounted hidden master instead. | |
182 if(other.fBackwardsTrie.isValid()) { | |
183 fBackwardsTrie.adoptInstead(other.fBackwardsTrie->clone()); | |
184 } | |
185 if(other.fForwardsPartialTrie.isValid()) { | |
186 fForwardsPartialTrie.adoptInstead(other.fForwardsPartialTrie->clone()); | |
187 } | |
188 */ | |
189 } | 237 } |
190 | 238 |
191 | 239 |
192 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
erator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status)
: | 240 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
erator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status)
: |
193 BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC
_ACTUAL_LOCALE,status)), | 241 BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC
_ACTUAL_LOCALE,status)), |
194 fDelegate(adopt), | 242 fData(new SimpleFilteredSentenceBreakData(forwards, backwards)), |
195 fBackwardsTrie(backwards), | 243 fDelegate(adopt) |
196 fForwardsPartialTrie(forwards) | |
197 { | 244 { |
198 // all set.. | 245 // all set.. |
199 } | 246 } |
200 | 247 |
201 SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {} | 248 SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() { |
| 249 fData = fData->decr(); |
| 250 } |
202 | 251 |
203 int32_t SimpleFilteredSentenceBreakIterator::next() { | 252 void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) { |
204 int32_t n = fDelegate->next(); | |
205 if(n == UBRK_DONE || // at end or | |
206 fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions | |
207 return n; | |
208 } | |
209 // OK, do we need to break here? | |
210 UErrorCode status = U_ZERO_ERROR; | |
211 // refresh text | |
212 fText.adoptInstead(fDelegate->getUText(fText.orphan(), status)); | 253 fText.adoptInstead(fDelegate->getUText(fText.orphan(), status)); |
213 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlia
s())); | 254 } |
214 do { // outer loop runs once per underlying break (from fDelegate). | 255 |
| 256 SimpleFilteredSentenceBreakIterator::EFBMatchResult |
| 257 SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) { |
| 258 int64_t bestPosn = -1; |
| 259 int32_t bestValue = -1; |
215 // loops while 'n' points to an exception. | 260 // loops while 'n' points to an exception. |
216 utext_setNativeIndex(fText.getAlias(), n); // from n.. | 261 utext_setNativeIndex(fText.getAlias(), n); // from n.. |
217 fBackwardsTrie->reset(); | 262 fData->fBackwardsTrie->reset(); |
218 UChar32 uch; | 263 UChar32 uch; |
| 264 |
219 //if(debug2) u_printf(" n@ %d\n", n); | 265 //if(debug2) u_printf(" n@ %d\n", n); |
220 // Assume a space is following the '.' (so we handle the case: "Mr. /Brown
") | 266 // Assume a space is following the '.' (so we handle the case: "Mr. /Brown
") |
221 if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: sk
ip a class of chars here?? | 267 if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: sk
ip a class of chars here?? |
222 // TODO only do this the 1st time? | 268 // TODO only do this the 1st time? |
223 //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch); | 269 //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch); |
224 } else { | 270 } else { |
225 //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch); | 271 //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch); |
226 uch = utext_next32(fText.getAlias()); | 272 uch = utext_next32(fText.getAlias()); |
227 //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch); | 273 //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch); |
228 } | 274 } |
| 275 |
229 UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE; | 276 UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE; |
230 | 277 |
231 int32_t bestPosn = -1; | |
232 int32_t bestValue = -1; | |
233 | |
234 while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to
consume backwards and.. | 278 while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to
consume backwards and.. |
235 USTRINGTRIE_HAS_NEXT(r=fBackwardsTrie->nextForCodePoint(uch))) {// mor
e in the trie | 279 USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch)))
{// more in the trie |
236 if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far | 280 if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far |
237 bestPosn = utext_getNativeIndex(fText.getAlias()); | 281 bestPosn = utext_getNativeIndex(fText.getAlias()); |
238 bestValue = fBackwardsTrie->getValue(); | 282 bestValue = fData->fBackwardsTrie->getValue(); |
239 } | 283 } |
240 //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getN
ativeIndex(fText.getAlias())); | 284 //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getN
ativeIndex(fText.getAlias())); |
241 } | 285 } |
242 | 286 |
243 if(USTRINGTRIE_MATCHES(r)) { // exact match? | 287 if(USTRINGTRIE_MATCHES(r)) { // exact match? |
244 //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); | 288 //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); |
245 bestValue = fBackwardsTrie->getValue(); | 289 bestValue = fData->fBackwardsTrie->getValue(); |
246 bestPosn = utext_getNativeIndex(fText.getAlias()); | 290 bestPosn = utext_getNativeIndex(fText.getAlias()); |
247 //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); | 291 //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); |
248 } | 292 } |
249 | 293 |
250 if(bestPosn>=0) { | 294 if(bestPosn>=0) { |
251 //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); | 295 //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); |
252 | 296 |
253 //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what? | 297 //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what? |
254 //int32_t bestValue = fBackwardsTrie->getValue(); | 298 //int32_t bestValue = fBackwardsTrie->getValue(); |
255 ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UC
har)uch, r, bestValue); | 299 ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UC
har)uch, r, bestValue); |
256 | 300 |
257 if(bestValue == kMATCH) { // exact match! | 301 if(bestValue == kMATCH) { // exact match! |
258 //if(debug2) u_printf(" exact backward match\n"); | 302 //if(debug2) u_printf(" exact backward match\n"); |
259 n = fDelegate->next(); // skip this one. Find the next lowerlevel break. | 303 return kExceptionHere; // See if the next is another exception. |
260 if(n==UBRK_DONE) return n; | |
261 continue; // See if the next is another exception. | |
262 } else if(bestValue == kPARTIAL | 304 } else if(bestValue == kPARTIAL |
263 && fForwardsPartialTrie.isValid()) { // make sure there's a forw
ard trie | 305 && fData->fForwardsPartialTrie.isValid()) { // make sure there's
a forward trie |
264 //if(debug2) u_printf(" partial backward match\n"); | 306 //if(debug2) u_printf(" partial backward match\n"); |
265 // We matched the "Ph." in "Ph.D." - now we need to run everything throu
gh the forwards trie | 307 // We matched the "Ph." in "Ph.D." - now we need to run everything throu
gh the forwards trie |
266 // to see if it matches something going forward. | 308 // to see if it matches something going forward. |
267 fForwardsPartialTrie->reset(); | 309 fData->fForwardsPartialTrie->reset(); |
268 UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE; | 310 UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE; |
269 utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close .
. | 311 utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close .
. |
270 //if(debug2) u_printf("Retrying at %d\n", bestPosn); | 312 //if(debug2) u_printf("Retrying at %d\n", bestPosn); |
271 while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL && | 313 while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL && |
272 USTRINGTRIE_HAS_NEXT(rfwd=fForwardsPartialTrie->nextForCodePoint(u
ch))) { | 314 USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCode
Point(uch))) { |
273 //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, ute
xt_getNativeIndex(fText.getAlias())); | 315 //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, ute
xt_getNativeIndex(fText.getAlias())); |
274 } | 316 } |
275 if(USTRINGTRIE_MATCHES(rfwd)) { | 317 if(USTRINGTRIE_MATCHES(rfwd)) { |
276 //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch); | 318 //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch); |
277 // only full matches here, nothing to check | 319 // only full matches here, nothing to check |
278 // skip the next: | 320 // skip the next: |
279 n = fDelegate->next(); | 321 return kExceptionHere; |
280 if(n==UBRK_DONE) return n; | |
281 continue; | |
282 } else { | 322 } else { |
283 //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch); | 323 //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch); |
284 // no match (no exception) -return the 'underlying' break | 324 // no match (no exception) -return the 'underlying' break |
285 return n; | 325 return kNoExceptionHere; |
286 } | 326 } |
287 } else { | 327 } else { |
288 return n; // internal error and/or no forwards trie | 328 return kNoExceptionHere; // internal error and/or no forwards trie |
289 } | 329 } |
290 } else { | 330 } else { |
291 //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // n
o best match | 331 //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // n
o best match |
292 return n; // No match - so exit. Not an exception. | 332 return kNoExceptionHere; // No match - so exit. Not an exception. |
293 } | 333 } |
294 } while(n != UBRK_DONE); | 334 } |
| 335 |
| 336 // the workhorse single next. |
| 337 int32_t |
| 338 SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) { |
| 339 if(n == UBRK_DONE || // at end or |
| 340 fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exce
ptions |
| 341 return n; |
| 342 } |
| 343 // OK, do we need to break here? |
| 344 UErrorCode status = U_ZERO_ERROR; |
| 345 // refresh text |
| 346 resetState(status); |
| 347 if(U_FAILURE(status)) return UBRK_DONE; // bail out |
| 348 int64_t utextLen = utext_nativeLength(fText.getAlias()); |
| 349 |
| 350 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlia
s())); |
| 351 while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlyi
ng break (from fDelegate). |
| 352 SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n); |
| 353 |
| 354 switch(m) { |
| 355 case kExceptionHere: |
| 356 n = fDelegate->next(); // skip this one. Find the next lowerlevel break. |
| 357 continue; |
| 358 |
| 359 default: |
| 360 case kNoExceptionHere: |
| 361 return n; |
| 362 } |
| 363 } |
295 return n; | 364 return n; |
296 } | 365 } |
297 | 366 |
| 367 int32_t |
| 368 SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) { |
| 369 if(n == 0 || n == UBRK_DONE || // at end or |
| 370 fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exce
ptions |
| 371 return n; |
| 372 } |
| 373 // OK, do we need to break here? |
| 374 UErrorCode status = U_ZERO_ERROR; |
| 375 // refresh text |
| 376 resetState(status); |
| 377 if(U_FAILURE(status)) return UBRK_DONE; // bail out |
| 378 |
| 379 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlia
s())); |
| 380 while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying brea
k (from fDelegate). |
| 381 SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n); |
| 382 |
| 383 switch(m) { |
| 384 case kExceptionHere: |
| 385 n = fDelegate->previous(); // skip this one. Find the next lowerlevel brea
k. |
| 386 continue; |
| 387 |
| 388 default: |
| 389 case kNoExceptionHere: |
| 390 return n; |
| 391 } |
| 392 } |
| 393 return n; |
| 394 } |
| 395 |
| 396 |
| 397 int32_t |
| 398 SimpleFilteredSentenceBreakIterator::next() { |
| 399 return internalNext(fDelegate->next()); |
| 400 } |
| 401 |
| 402 int32_t |
| 403 SimpleFilteredSentenceBreakIterator::first(void) { |
| 404 return internalNext(fDelegate->first()); |
| 405 } |
| 406 |
| 407 int32_t |
| 408 SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) { |
| 409 return internalPrev(fDelegate->preceding(offset)); |
| 410 } |
| 411 |
| 412 int32_t |
| 413 SimpleFilteredSentenceBreakIterator::previous(void) { |
| 414 return internalPrev(fDelegate->previous()); |
| 415 } |
| 416 |
| 417 UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) { |
| 418 if(!fDelegate->isBoundary(offset)) return false; // no break to suppress |
| 419 |
| 420 UErrorCode status = U_ZERO_ERROR; |
| 421 resetState(status); |
| 422 |
| 423 SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offse
t); |
| 424 |
| 425 switch(m) { |
| 426 case kExceptionHere: |
| 427 return false; |
| 428 default: |
| 429 case kNoExceptionHere: |
| 430 return true; |
| 431 } |
| 432 } |
| 433 |
| 434 int32_t |
| 435 SimpleFilteredSentenceBreakIterator::next(int32_t offset) { |
| 436 return internalNext(fDelegate->next(offset)); |
| 437 } |
| 438 |
| 439 int32_t |
| 440 SimpleFilteredSentenceBreakIterator::following(int32_t offset) { |
| 441 return internalNext(fDelegate->following(offset)); |
| 442 } |
| 443 |
| 444 int32_t |
| 445 SimpleFilteredSentenceBreakIterator::last(void) { |
| 446 // Don't suppress a break opportunity at the end of text. |
| 447 return fDelegate->last(); |
| 448 } |
| 449 |
| 450 |
298 /** | 451 /** |
299 * Concrete implementation of builder class. | 452 * Concrete implementation of builder class. |
300 */ | 453 */ |
301 class U_I18N_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIterat
orBuilder { | 454 class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIter
atorBuilder { |
302 public: | 455 public: |
303 virtual ~SimpleFilteredBreakIteratorBuilder(); | 456 virtual ~SimpleFilteredBreakIteratorBuilder(); |
304 SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &statu
s); | 457 SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &statu
s); |
305 SimpleFilteredBreakIteratorBuilder(UErrorCode &status); | 458 SimpleFilteredBreakIteratorBuilder(UErrorCode &status); |
306 virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& s
tatus); | 459 virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& s
tatus); |
307 virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode&
status); | 460 virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode&
status); |
308 virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& st
atus); | 461 virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& st
atus); |
309 private: | 462 private: |
310 UStringSet fSet; | 463 UStringSet fSet; |
311 }; | 464 }; |
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
367 * on MSVC2012 for some reason. | 520 * on MSVC2012 for some reason. |
368 */ | 521 */ |
369 static inline UnicodeString* newUnicodeStringArray(size_t count) { | 522 static inline UnicodeString* newUnicodeStringArray(size_t count) { |
370 return new UnicodeString[count ? count : 1]; | 523 return new UnicodeString[count ? count : 1]; |
371 } | 524 } |
372 | 525 |
373 BreakIterator * | 526 BreakIterator * |
374 SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
rorCode& status) { | 527 SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
rorCode& status) { |
375 LocalPointer<BreakIterator> adopt(adoptBreakIterator); | 528 LocalPointer<BreakIterator> adopt(adoptBreakIterator); |
376 | 529 |
| 530 LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status)
; |
| 531 LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status
); |
377 if(U_FAILURE(status)) { | 532 if(U_FAILURE(status)) { |
378 return NULL; | 533 return NULL; |
379 } | 534 } |
380 | 535 |
381 LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status)); | |
382 LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status)); | |
383 | |
384 int32_t revCount = 0; | 536 int32_t revCount = 0; |
385 int32_t fwdCount = 0; | 537 int32_t fwdCount = 0; |
386 | 538 |
387 int32_t subCount = fSet.size(); | 539 int32_t subCount = fSet.size(); |
388 | 540 |
389 UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount); | 541 UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount); |
390 | 542 |
391 LocalArray<UnicodeString> ustrs(ustrs_ptr); | 543 LocalArray<UnicodeString> ustrs(ustrs_ptr); |
392 | 544 |
393 LocalMemory<int> partials; | 545 LocalMemory<int> partials; |
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
496 | 648 |
497 FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() { | 649 FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() { |
498 } | 650 } |
499 | 651 |
500 FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() { | 652 FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() { |
501 } | 653 } |
502 | 654 |
503 FilteredBreakIteratorBuilder * | 655 FilteredBreakIteratorBuilder * |
504 FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
atus) { | 656 FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
atus) { |
505 if(U_FAILURE(status)) return NULL; | 657 if(U_FAILURE(status)) return NULL; |
506 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator
Builder(where, status)); | 658 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator
Builder(where, status), status); |
507 if(U_SUCCESS(status) && !ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; | 659 return (U_SUCCESS(status))? ret.orphan(): NULL; |
508 return ret.orphan(); | |
509 } | 660 } |
510 | 661 |
511 FilteredBreakIteratorBuilder * | 662 FilteredBreakIteratorBuilder * |
512 FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) { | 663 FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) { |
513 if(U_FAILURE(status)) return NULL; | 664 if(U_FAILURE(status)) return NULL; |
514 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator
Builder(status)); | 665 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator
Builder(status), status); |
515 if(U_SUCCESS(status) && !ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; | 666 return (U_SUCCESS(status))? ret.orphan(): NULL; |
516 return ret.orphan(); | |
517 } | 667 } |
518 | 668 |
519 U_NAMESPACE_END | 669 U_NAMESPACE_END |
520 | 670 |
521 #endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FIL
TERED_BREAK_ITERATION | 671 #endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FIL
TERED_BREAK_ITERATION |
OLD | NEW |