| OLD | NEW |
| 1 /* | 1 /* |
| 2 ******************************************************************************* | 2 ******************************************************************************* |
| 3 * Copyright (C) 2014, International Business Machines Corporation and | 3 * Copyright (C) 2014-2015, International Business Machines Corporation and |
| 4 * others. All Rights Reserved. | 4 * others. All Rights Reserved. |
| 5 ******************************************************************************* | 5 ******************************************************************************* |
| 6 */ | 6 */ |
| 7 | 7 |
| 8 #include "unicode/utypes.h" | 8 #include "unicode/utypes.h" |
| 9 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION | 9 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION |
| 10 | 10 |
| 11 #include "cmemory.h" | 11 #include "cmemory.h" |
| 12 | 12 |
| 13 #include "unicode/filteredbrk.h" | 13 #include "unicode/filteredbrk.h" |
| (...skipping 22 matching lines...) Expand all Loading... |
| 36 } | 36 } |
| 37 fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n", | 37 fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n", |
| 38 f, l, m, buf, (const void*)s, b?'T':'F',(int)d); | 38 f, l, m, buf, (const void*)s, b?'T':'F',(int)d); |
| 39 } | 39 } |
| 40 | 40 |
| 41 #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__) | 41 #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__) |
| 42 #else | 42 #else |
| 43 #define FB_TRACE(m,s,b,d) | 43 #define FB_TRACE(m,s,b,d) |
| 44 #endif | 44 #endif |
| 45 | 45 |
| 46 /** |
| 47 * Used with sortedInsert() |
| 48 */ |
| 46 static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { | 49 static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { |
| 47 const UnicodeString &a = *(const UnicodeString*)t1.pointer; | 50 const UnicodeString &a = *(const UnicodeString*)t1.pointer; |
| 48 const UnicodeString &b = *(const UnicodeString*)t2.pointer; | 51 const UnicodeString &b = *(const UnicodeString*)t2.pointer; |
| 49 return a.compare(b); | 52 return a.compare(b); |
| 50 } | 53 } |
| 51 | 54 |
| 52 /** | 55 /** |
| 53 * A UVector which implements a set of strings. | 56 * A UVector which implements a set of strings. |
| 54 */ | 57 */ |
| 55 class U_I18N_API UStringSet : public UVector { | 58 class U_COMMON_API UStringSet : public UVector { |
| 56 public: | 59 public: |
| 57 UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject, | 60 UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject, |
| 58 uhash_compareUnicodeString, | 61 uhash_compareUnicodeString, |
| 59 1, | 62 1, |
| 60 status) {} | 63 status) {} |
| 61 virtual ~UStringSet(); | 64 virtual ~UStringSet(); |
| 62 /** | 65 /** |
| 63 * Is this UnicodeSet contained? | 66 * Is this UnicodeSet contained? |
| 64 */ | 67 */ |
| 65 inline UBool contains(const UnicodeString& s) { | 68 inline UBool contains(const UnicodeString& s) { |
| (...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 110 if(U_FAILURE(status)) return false; | 113 if(U_FAILURE(status)) return false; |
| 111 return removeElement((void*) &s); | 114 return removeElement((void*) &s); |
| 112 } | 115 } |
| 113 }; | 116 }; |
| 114 | 117 |
| 115 /** | 118 /** |
| 116 * Virtual, won't be inlined | 119 * Virtual, won't be inlined |
| 117 */ | 120 */ |
| 118 UStringSet::~UStringSet() {} | 121 UStringSet::~UStringSet() {} |
| 119 | 122 |
| 123 /* ----------------------------------------------------------- */ |
| 120 | 124 |
| 125 |
| 126 /* Filtered Break constants */ |
| 121 static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forwar
d trie | 127 static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forwar
d trie |
| 122 static const int32_t kMATCH = (1<<1); //< exact match - skip this one. | 128 static const int32_t kMATCH = (1<<1); //< exact match - skip this one. |
| 123 static const int32_t kSuppressInReverse = (1<<0); | 129 static const int32_t kSuppressInReverse = (1<<0); |
| 124 static const int32_t kAddToForward = (1<<1); | 130 static const int32_t kAddToForward = (1<<1); |
| 125 static const UChar kFULLSTOP = 0x002E; // '.' | 131 static const UChar kFULLSTOP = 0x002E; // '.' |
| 126 | 132 |
| 133 /** |
| 134 * Shared data for SimpleFilteredSentenceBreakIterator |
| 135 */ |
| 136 class SimpleFilteredSentenceBreakData : public UMemory { |
| 137 public: |
| 138 SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards ) |
| 139 : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) {
} |
| 140 SimpleFilteredSentenceBreakData *incr() { refcount++; return this; } |
| 141 SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; r
eturn 0; } |
| 142 virtual ~SimpleFilteredSentenceBreakData(); |
| 143 |
| 144 LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M." |
| 145 LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs. |
| 146 int32_t refcount; |
| 147 }; |
| 148 |
| 149 SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {} |
| 150 |
| 151 /** |
| 152 * Concrete implementation |
| 153 */ |
| 127 class SimpleFilteredSentenceBreakIterator : public BreakIterator { | 154 class SimpleFilteredSentenceBreakIterator : public BreakIterator { |
| 128 public: | 155 public: |
| 129 SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards
, UCharsTrie *backwards, UErrorCode &status); | 156 SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards
, UCharsTrie *backwards, UErrorCode &status); |
| 130 SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator&
other); | 157 SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator&
other); |
| 131 virtual ~SimpleFilteredSentenceBreakIterator(); | 158 virtual ~SimpleFilteredSentenceBreakIterator(); |
| 132 private: | 159 private: |
| 160 SimpleFilteredSentenceBreakData *fData; |
| 133 LocalPointer<BreakIterator> fDelegate; | 161 LocalPointer<BreakIterator> fDelegate; |
| 134 LocalUTextPointer fText; | 162 LocalUTextPointer fText; |
| 135 LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs. | |
| 136 LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M." | |
| 137 | 163 |
| 138 /* -- subclass interface -- */ | 164 /* -- subclass interface -- */ |
| 139 public: | 165 public: |
| 140 /* -- cloning and other subclass stuff -- */ | 166 /* -- cloning and other subclass stuff -- */ |
| 141 virtual BreakIterator * createBufferClone(void * /*stackBuffer*/, | 167 virtual BreakIterator * createBufferClone(void * /*stackBuffer*/, |
| 142 int32_t &/*BufferSize*/, | 168 int32_t &/*BufferSize*/, |
| 143 UErrorCode &status) { | 169 UErrorCode &status) { |
| 144 // for now - always deep clone | 170 // for now - always deep clone |
| 145 status = U_SAFECLONE_ALLOCATED_WARNING; | 171 status = U_SAFECLONE_ALLOCATED_WARNING; |
| 146 return clone(); | 172 return clone(); |
| 147 } | 173 } |
| 148 virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBr
eakIterator(*this); } | 174 virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBr
eakIterator(*this); } |
| 149 virtual UClassID getDynamicClassID(void) const { return NULL; } | 175 virtual UClassID getDynamicClassID(void) const { return NULL; } |
| 150 virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return t
rue; return false; } | 176 virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return t
rue; return false; } |
| 151 | 177 |
| 152 /* -- text modifying -- */ | 178 /* -- text modifying -- */ |
| 153 virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(tex
t,status); } | 179 virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(tex
t,status); } |
| 154 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fD
elegate->refreshInputText(input,status); return *this; } | 180 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fD
elegate->refreshInputText(input,status); return *this; } |
| 155 virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); } | 181 virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); } |
| 156 virtual void setText(const UnicodeString &text) { fDelegate->setText(text); } | 182 virtual void setText(const UnicodeString &text) { fDelegate->setText(text); } |
| 157 | 183 |
| 158 /* -- other functions that are just delegated -- */ | 184 /* -- other functions that are just delegated -- */ |
| 159 virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDel
egate->getUText(fillIn,status); } | 185 virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDel
egate->getUText(fillIn,status); } |
| 160 virtual CharacterIterator& getText(void) const { return fDelegate->getText();
} | 186 virtual CharacterIterator& getText(void) const { return fDelegate->getText();
} |
| 161 | 187 |
| 162 /* -- ITERATION -- */ | 188 /* -- ITERATION -- */ |
| 163 virtual int32_t first(void) { return fDelegate->first(); } | 189 virtual int32_t first(void); |
| 164 virtual int32_t preceding(int32_t /*offset*/) { /* TODO: not implemented */ re
turn UBRK_DONE; } | 190 virtual int32_t preceding(int32_t offset); |
| 165 virtual int32_t previous(void) { /* TODO: not implemented */ return UBRK_DONE;
} | 191 virtual int32_t previous(void); |
| 166 virtual UBool isBoundary(int32_t offset) { return fDelegate->isBoundary(offset
); } | 192 virtual UBool isBoundary(int32_t offset); |
| 167 virtual int32_t current(void) const { return fDelegate->current(); } | 193 virtual int32_t current(void) const { return fDelegate->current(); } // we kee
p the delegate current, so this should be correct. |
| 168 | 194 |
| 169 virtual int32_t next(void); | 195 virtual int32_t next(void); |
| 170 | 196 |
| 171 virtual int32_t next(int32_t /*n*/) { /* TODO: not implemented */ return UBRK_
DONE; } | 197 virtual int32_t next(int32_t n); |
| 172 virtual int32_t following(int32_t /*offset*/) { /* TODO: not implemented */ re
turn UBRK_DONE; } | 198 virtual int32_t following(int32_t offset); |
| 173 virtual int32_t last(void) { return fDelegate->last(); } | 199 virtual int32_t last(void); |
| 174 | 200 |
| 201 private: |
| 202 /** |
| 203 * Given that the fDelegate has already given its "initial" answer, |
| 204 * find the NEXT actual (non-excepted) break. |
| 205 * @param n initial position from delegate |
| 206 * @return new break position or UBRK_DONE |
| 207 */ |
| 208 int32_t internalNext(int32_t n); |
| 209 /** |
| 210 * Given that the fDelegate has already given its "initial" answer, |
| 211 * find the PREV actual (non-excepted) break. |
| 212 * @param n initial position from delegate |
| 213 * @return new break position or UBRK_DONE |
| 214 */ |
| 215 int32_t internalPrev(int32_t n); |
| 216 /** |
| 217 * set up the UText with the value of the fDelegate. |
| 218 * Call this before calling breakExceptionAt. |
| 219 * May be able to avoid excess calls |
| 220 */ |
| 221 void resetState(UErrorCode &status); |
| 222 /** |
| 223 * Is there a match (exception) at this spot? |
| 224 */ |
| 225 enum EFBMatchResult { kNoExceptionHere, kExceptionHere }; |
| 226 /** |
| 227 * Determine if there is an exception at this spot |
| 228 * @param n spot to check |
| 229 * @return kNoExceptionHere or kExceptionHere |
| 230 **/ |
| 231 enum EFBMatchResult breakExceptionAt(int32_t n); |
| 175 }; | 232 }; |
| 176 | 233 |
| 177 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const S
impleFilteredSentenceBreakIterator& other) | 234 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const S
impleFilteredSentenceBreakIterator& other) |
| 178 : BreakIterator(other), fDelegate(other.fDelegate->clone()) | 235 : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate-
>clone()) |
| 179 { | 236 { |
| 180 /* | |
| 181 TODO: not able to clone Tries. Should be a refcounted hidden master instead. | |
| 182 if(other.fBackwardsTrie.isValid()) { | |
| 183 fBackwardsTrie.adoptInstead(other.fBackwardsTrie->clone()); | |
| 184 } | |
| 185 if(other.fForwardsPartialTrie.isValid()) { | |
| 186 fForwardsPartialTrie.adoptInstead(other.fForwardsPartialTrie->clone()); | |
| 187 } | |
| 188 */ | |
| 189 } | 237 } |
| 190 | 238 |
| 191 | 239 |
| 192 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
erator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status)
: | 240 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
erator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status)
: |
| 193 BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC
_ACTUAL_LOCALE,status)), | 241 BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC
_ACTUAL_LOCALE,status)), |
| 194 fDelegate(adopt), | 242 fData(new SimpleFilteredSentenceBreakData(forwards, backwards)), |
| 195 fBackwardsTrie(backwards), | 243 fDelegate(adopt) |
| 196 fForwardsPartialTrie(forwards) | |
| 197 { | 244 { |
| 198 // all set.. | 245 // all set.. |
| 199 } | 246 } |
| 200 | 247 |
| 201 SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {} | 248 SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() { |
| 249 fData = fData->decr(); |
| 250 } |
| 202 | 251 |
| 203 int32_t SimpleFilteredSentenceBreakIterator::next() { | 252 void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) { |
| 204 int32_t n = fDelegate->next(); | |
| 205 if(n == UBRK_DONE || // at end or | |
| 206 fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions | |
| 207 return n; | |
| 208 } | |
| 209 // OK, do we need to break here? | |
| 210 UErrorCode status = U_ZERO_ERROR; | |
| 211 // refresh text | |
| 212 fText.adoptInstead(fDelegate->getUText(fText.orphan(), status)); | 253 fText.adoptInstead(fDelegate->getUText(fText.orphan(), status)); |
| 213 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlia
s())); | 254 } |
| 214 do { // outer loop runs once per underlying break (from fDelegate). | 255 |
| 256 SimpleFilteredSentenceBreakIterator::EFBMatchResult |
| 257 SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) { |
| 258 int64_t bestPosn = -1; |
| 259 int32_t bestValue = -1; |
| 215 // loops while 'n' points to an exception. | 260 // loops while 'n' points to an exception. |
| 216 utext_setNativeIndex(fText.getAlias(), n); // from n.. | 261 utext_setNativeIndex(fText.getAlias(), n); // from n.. |
| 217 fBackwardsTrie->reset(); | 262 fData->fBackwardsTrie->reset(); |
| 218 UChar32 uch; | 263 UChar32 uch; |
| 264 |
| 219 //if(debug2) u_printf(" n@ %d\n", n); | 265 //if(debug2) u_printf(" n@ %d\n", n); |
| 220 // Assume a space is following the '.' (so we handle the case: "Mr. /Brown
") | 266 // Assume a space is following the '.' (so we handle the case: "Mr. /Brown
") |
| 221 if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: sk
ip a class of chars here?? | 267 if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: sk
ip a class of chars here?? |
| 222 // TODO only do this the 1st time? | 268 // TODO only do this the 1st time? |
| 223 //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch); | 269 //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch); |
| 224 } else { | 270 } else { |
| 225 //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch); | 271 //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch); |
| 226 uch = utext_next32(fText.getAlias()); | 272 uch = utext_next32(fText.getAlias()); |
| 227 //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch); | 273 //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch); |
| 228 } | 274 } |
| 275 |
| 229 UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE; | 276 UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE; |
| 230 | 277 |
| 231 int32_t bestPosn = -1; | |
| 232 int32_t bestValue = -1; | |
| 233 | |
| 234 while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to
consume backwards and.. | 278 while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to
consume backwards and.. |
| 235 USTRINGTRIE_HAS_NEXT(r=fBackwardsTrie->nextForCodePoint(uch))) {// mor
e in the trie | 279 USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch)))
{// more in the trie |
| 236 if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far | 280 if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far |
| 237 bestPosn = utext_getNativeIndex(fText.getAlias()); | 281 bestPosn = utext_getNativeIndex(fText.getAlias()); |
| 238 bestValue = fBackwardsTrie->getValue(); | 282 bestValue = fData->fBackwardsTrie->getValue(); |
| 239 } | 283 } |
| 240 //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getN
ativeIndex(fText.getAlias())); | 284 //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getN
ativeIndex(fText.getAlias())); |
| 241 } | 285 } |
| 242 | 286 |
| 243 if(USTRINGTRIE_MATCHES(r)) { // exact match? | 287 if(USTRINGTRIE_MATCHES(r)) { // exact match? |
| 244 //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); | 288 //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); |
| 245 bestValue = fBackwardsTrie->getValue(); | 289 bestValue = fData->fBackwardsTrie->getValue(); |
| 246 bestPosn = utext_getNativeIndex(fText.getAlias()); | 290 bestPosn = utext_getNativeIndex(fText.getAlias()); |
| 247 //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); | 291 //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); |
| 248 } | 292 } |
| 249 | 293 |
| 250 if(bestPosn>=0) { | 294 if(bestPosn>=0) { |
| 251 //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); | 295 //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); |
| 252 | 296 |
| 253 //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what? | 297 //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what? |
| 254 //int32_t bestValue = fBackwardsTrie->getValue(); | 298 //int32_t bestValue = fBackwardsTrie->getValue(); |
| 255 ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UC
har)uch, r, bestValue); | 299 ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UC
har)uch, r, bestValue); |
| 256 | 300 |
| 257 if(bestValue == kMATCH) { // exact match! | 301 if(bestValue == kMATCH) { // exact match! |
| 258 //if(debug2) u_printf(" exact backward match\n"); | 302 //if(debug2) u_printf(" exact backward match\n"); |
| 259 n = fDelegate->next(); // skip this one. Find the next lowerlevel break. | 303 return kExceptionHere; // See if the next is another exception. |
| 260 if(n==UBRK_DONE) return n; | |
| 261 continue; // See if the next is another exception. | |
| 262 } else if(bestValue == kPARTIAL | 304 } else if(bestValue == kPARTIAL |
| 263 && fForwardsPartialTrie.isValid()) { // make sure there's a forw
ard trie | 305 && fData->fForwardsPartialTrie.isValid()) { // make sure there's
a forward trie |
| 264 //if(debug2) u_printf(" partial backward match\n"); | 306 //if(debug2) u_printf(" partial backward match\n"); |
| 265 // We matched the "Ph." in "Ph.D." - now we need to run everything throu
gh the forwards trie | 307 // We matched the "Ph." in "Ph.D." - now we need to run everything throu
gh the forwards trie |
| 266 // to see if it matches something going forward. | 308 // to see if it matches something going forward. |
| 267 fForwardsPartialTrie->reset(); | 309 fData->fForwardsPartialTrie->reset(); |
| 268 UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE; | 310 UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE; |
| 269 utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close .
. | 311 utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close .
. |
| 270 //if(debug2) u_printf("Retrying at %d\n", bestPosn); | 312 //if(debug2) u_printf("Retrying at %d\n", bestPosn); |
| 271 while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL && | 313 while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL && |
| 272 USTRINGTRIE_HAS_NEXT(rfwd=fForwardsPartialTrie->nextForCodePoint(u
ch))) { | 314 USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCode
Point(uch))) { |
| 273 //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, ute
xt_getNativeIndex(fText.getAlias())); | 315 //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, ute
xt_getNativeIndex(fText.getAlias())); |
| 274 } | 316 } |
| 275 if(USTRINGTRIE_MATCHES(rfwd)) { | 317 if(USTRINGTRIE_MATCHES(rfwd)) { |
| 276 //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch); | 318 //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch); |
| 277 // only full matches here, nothing to check | 319 // only full matches here, nothing to check |
| 278 // skip the next: | 320 // skip the next: |
| 279 n = fDelegate->next(); | 321 return kExceptionHere; |
| 280 if(n==UBRK_DONE) return n; | |
| 281 continue; | |
| 282 } else { | 322 } else { |
| 283 //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch); | 323 //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch); |
| 284 // no match (no exception) -return the 'underlying' break | 324 // no match (no exception) -return the 'underlying' break |
| 285 return n; | 325 return kNoExceptionHere; |
| 286 } | 326 } |
| 287 } else { | 327 } else { |
| 288 return n; // internal error and/or no forwards trie | 328 return kNoExceptionHere; // internal error and/or no forwards trie |
| 289 } | 329 } |
| 290 } else { | 330 } else { |
| 291 //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // n
o best match | 331 //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // n
o best match |
| 292 return n; // No match - so exit. Not an exception. | 332 return kNoExceptionHere; // No match - so exit. Not an exception. |
| 293 } | 333 } |
| 294 } while(n != UBRK_DONE); | 334 } |
| 335 |
| 336 // the workhorse single next. |
| 337 int32_t |
| 338 SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) { |
| 339 if(n == UBRK_DONE || // at end or |
| 340 fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exce
ptions |
| 341 return n; |
| 342 } |
| 343 // OK, do we need to break here? |
| 344 UErrorCode status = U_ZERO_ERROR; |
| 345 // refresh text |
| 346 resetState(status); |
| 347 if(U_FAILURE(status)) return UBRK_DONE; // bail out |
| 348 int64_t utextLen = utext_nativeLength(fText.getAlias()); |
| 349 |
| 350 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlia
s())); |
| 351 while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlyi
ng break (from fDelegate). |
| 352 SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n); |
| 353 |
| 354 switch(m) { |
| 355 case kExceptionHere: |
| 356 n = fDelegate->next(); // skip this one. Find the next lowerlevel break. |
| 357 continue; |
| 358 |
| 359 default: |
| 360 case kNoExceptionHere: |
| 361 return n; |
| 362 } |
| 363 } |
| 295 return n; | 364 return n; |
| 296 } | 365 } |
| 297 | 366 |
| 367 int32_t |
| 368 SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) { |
| 369 if(n == 0 || n == UBRK_DONE || // at end or |
| 370 fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exce
ptions |
| 371 return n; |
| 372 } |
| 373 // OK, do we need to break here? |
| 374 UErrorCode status = U_ZERO_ERROR; |
| 375 // refresh text |
| 376 resetState(status); |
| 377 if(U_FAILURE(status)) return UBRK_DONE; // bail out |
| 378 |
| 379 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlia
s())); |
| 380 while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying brea
k (from fDelegate). |
| 381 SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n); |
| 382 |
| 383 switch(m) { |
| 384 case kExceptionHere: |
| 385 n = fDelegate->previous(); // skip this one. Find the next lowerlevel brea
k. |
| 386 continue; |
| 387 |
| 388 default: |
| 389 case kNoExceptionHere: |
| 390 return n; |
| 391 } |
| 392 } |
| 393 return n; |
| 394 } |
| 395 |
| 396 |
| 397 int32_t |
| 398 SimpleFilteredSentenceBreakIterator::next() { |
| 399 return internalNext(fDelegate->next()); |
| 400 } |
| 401 |
| 402 int32_t |
| 403 SimpleFilteredSentenceBreakIterator::first(void) { |
| 404 return internalNext(fDelegate->first()); |
| 405 } |
| 406 |
| 407 int32_t |
| 408 SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) { |
| 409 return internalPrev(fDelegate->preceding(offset)); |
| 410 } |
| 411 |
| 412 int32_t |
| 413 SimpleFilteredSentenceBreakIterator::previous(void) { |
| 414 return internalPrev(fDelegate->previous()); |
| 415 } |
| 416 |
| 417 UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) { |
| 418 if(!fDelegate->isBoundary(offset)) return false; // no break to suppress |
| 419 |
| 420 UErrorCode status = U_ZERO_ERROR; |
| 421 resetState(status); |
| 422 |
| 423 SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offse
t); |
| 424 |
| 425 switch(m) { |
| 426 case kExceptionHere: |
| 427 return false; |
| 428 default: |
| 429 case kNoExceptionHere: |
| 430 return true; |
| 431 } |
| 432 } |
| 433 |
| 434 int32_t |
| 435 SimpleFilteredSentenceBreakIterator::next(int32_t offset) { |
| 436 return internalNext(fDelegate->next(offset)); |
| 437 } |
| 438 |
| 439 int32_t |
| 440 SimpleFilteredSentenceBreakIterator::following(int32_t offset) { |
| 441 return internalNext(fDelegate->following(offset)); |
| 442 } |
| 443 |
| 444 int32_t |
| 445 SimpleFilteredSentenceBreakIterator::last(void) { |
| 446 // Don't suppress a break opportunity at the end of text. |
| 447 return fDelegate->last(); |
| 448 } |
| 449 |
| 450 |
| 298 /** | 451 /** |
| 299 * Concrete implementation of builder class. | 452 * Concrete implementation of builder class. |
| 300 */ | 453 */ |
| 301 class U_I18N_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIterat
orBuilder { | 454 class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIter
atorBuilder { |
| 302 public: | 455 public: |
| 303 virtual ~SimpleFilteredBreakIteratorBuilder(); | 456 virtual ~SimpleFilteredBreakIteratorBuilder(); |
| 304 SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &statu
s); | 457 SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &statu
s); |
| 305 SimpleFilteredBreakIteratorBuilder(UErrorCode &status); | 458 SimpleFilteredBreakIteratorBuilder(UErrorCode &status); |
| 306 virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& s
tatus); | 459 virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& s
tatus); |
| 307 virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode&
status); | 460 virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode&
status); |
| 308 virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& st
atus); | 461 virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& st
atus); |
| 309 private: | 462 private: |
| 310 UStringSet fSet; | 463 UStringSet fSet; |
| 311 }; | 464 }; |
| (...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 367 * on MSVC2012 for some reason. | 520 * on MSVC2012 for some reason. |
| 368 */ | 521 */ |
| 369 static inline UnicodeString* newUnicodeStringArray(size_t count) { | 522 static inline UnicodeString* newUnicodeStringArray(size_t count) { |
| 370 return new UnicodeString[count ? count : 1]; | 523 return new UnicodeString[count ? count : 1]; |
| 371 } | 524 } |
| 372 | 525 |
| 373 BreakIterator * | 526 BreakIterator * |
| 374 SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
rorCode& status) { | 527 SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
rorCode& status) { |
| 375 LocalPointer<BreakIterator> adopt(adoptBreakIterator); | 528 LocalPointer<BreakIterator> adopt(adoptBreakIterator); |
| 376 | 529 |
| 530 LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status)
; |
| 531 LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status
); |
| 377 if(U_FAILURE(status)) { | 532 if(U_FAILURE(status)) { |
| 378 return NULL; | 533 return NULL; |
| 379 } | 534 } |
| 380 | 535 |
| 381 LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status)); | |
| 382 LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status)); | |
| 383 | |
| 384 int32_t revCount = 0; | 536 int32_t revCount = 0; |
| 385 int32_t fwdCount = 0; | 537 int32_t fwdCount = 0; |
| 386 | 538 |
| 387 int32_t subCount = fSet.size(); | 539 int32_t subCount = fSet.size(); |
| 388 | 540 |
| 389 UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount); | 541 UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount); |
| 390 | 542 |
| 391 LocalArray<UnicodeString> ustrs(ustrs_ptr); | 543 LocalArray<UnicodeString> ustrs(ustrs_ptr); |
| 392 | 544 |
| 393 LocalMemory<int> partials; | 545 LocalMemory<int> partials; |
| (...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after Loading... |
| 496 | 648 |
| 497 FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() { | 649 FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() { |
| 498 } | 650 } |
| 499 | 651 |
| 500 FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() { | 652 FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() { |
| 501 } | 653 } |
| 502 | 654 |
| 503 FilteredBreakIteratorBuilder * | 655 FilteredBreakIteratorBuilder * |
| 504 FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
atus) { | 656 FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
atus) { |
| 505 if(U_FAILURE(status)) return NULL; | 657 if(U_FAILURE(status)) return NULL; |
| 506 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator
Builder(where, status)); | 658 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator
Builder(where, status), status); |
| 507 if(U_SUCCESS(status) && !ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; | 659 return (U_SUCCESS(status))? ret.orphan(): NULL; |
| 508 return ret.orphan(); | |
| 509 } | 660 } |
| 510 | 661 |
| 511 FilteredBreakIteratorBuilder * | 662 FilteredBreakIteratorBuilder * |
| 512 FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) { | 663 FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) { |
| 513 if(U_FAILURE(status)) return NULL; | 664 if(U_FAILURE(status)) return NULL; |
| 514 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator
Builder(status)); | 665 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator
Builder(status), status); |
| 515 if(U_SUCCESS(status) && !ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; | 666 return (U_SUCCESS(status))? ret.orphan(): NULL; |
| 516 return ret.orphan(); | |
| 517 } | 667 } |
| 518 | 668 |
| 519 U_NAMESPACE_END | 669 U_NAMESPACE_END |
| 520 | 670 |
| 521 #endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FIL
TERED_BREAK_ITERATION | 671 #endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FIL
TERED_BREAK_ITERATION |
| OLD | NEW |