| OLD | NEW |
| (Empty) |
| 1 /* | |
| 2 ******************************************************************************* | |
| 3 * Copyright (C) 2014, International Business Machines Corporation and | |
| 4 * others. All Rights Reserved. | |
| 5 ******************************************************************************* | |
| 6 */ | |
| 7 | |
| 8 #include "unicode/utypes.h" | |
| 9 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION | |
| 10 | |
| 11 #include "cmemory.h" | |
| 12 | |
| 13 #include "unicode/filteredbrk.h" | |
| 14 #include "unicode/ucharstriebuilder.h" | |
| 15 #include "unicode/ures.h" | |
| 16 | |
| 17 #include "uresimp.h" // ures_getByKeyWithFallback | |
| 18 #include "ubrkimpl.h" // U_ICUDATA_BRKITR | |
| 19 #include "uvector.h" | |
| 20 #include "cmemory.h" | |
| 21 | |
| 22 U_NAMESPACE_BEGIN | |
| 23 | |
| 24 #ifndef FB_DEBUG | |
| 25 #define FB_DEBUG 0 | |
| 26 #endif | |
| 27 | |
| 28 #if FB_DEBUG | |
| 29 #include <stdio.h> | |
| 30 static void _fb_trace(const char *m, const UnicodeString *s, UBool b, int32_t d,
const char *f, int l) { | |
| 31 char buf[2048]; | |
| 32 if(s) { | |
| 33 s->extract(0,s->length(),buf,2048); | |
| 34 } else { | |
| 35 strcpy(buf,"NULL"); | |
| 36 } | |
| 37 fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n", | |
| 38 f, l, m, buf, (const void*)s, b?'T':'F',(int)d); | |
| 39 } | |
| 40 | |
| 41 #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__) | |
| 42 #else | |
| 43 #define FB_TRACE(m,s,b,d) | |
| 44 #endif | |
| 45 | |
| 46 static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { | |
| 47 const UnicodeString &a = *(const UnicodeString*)t1.pointer; | |
| 48 const UnicodeString &b = *(const UnicodeString*)t2.pointer; | |
| 49 return a.compare(b); | |
| 50 } | |
| 51 | |
| 52 /** | |
| 53 * A UVector which implements a set of strings. | |
| 54 */ | |
| 55 class U_I18N_API UStringSet : public UVector { | |
| 56 public: | |
| 57 UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject, | |
| 58 uhash_compareUnicodeString, | |
| 59 1, | |
| 60 status) {} | |
| 61 virtual ~UStringSet(); | |
| 62 /** | |
| 63 * Is this UnicodeSet contained? | |
| 64 */ | |
| 65 inline UBool contains(const UnicodeString& s) { | |
| 66 return contains((void*) &s); | |
| 67 } | |
| 68 using UVector::contains; | |
| 69 /** | |
| 70 * Return the ith UnicodeString alias | |
| 71 */ | |
| 72 inline const UnicodeString* getStringAt(int32_t i) const { | |
| 73 return (const UnicodeString*)elementAt(i); | |
| 74 } | |
| 75 /** | |
| 76 * Adopt the UnicodeString if not already contained. | |
| 77 * Caller no longer owns the pointer in any case. | |
| 78 * @return true if adopted successfully, false otherwise (error, or else dupli
cate) | |
| 79 */ | |
| 80 inline UBool adopt(UnicodeString *str, UErrorCode &status) { | |
| 81 if(U_FAILURE(status) || contains(*str)) { | |
| 82 delete str; | |
| 83 return false; | |
| 84 } else { | |
| 85 sortedInsert(str, compareUnicodeString, status); | |
| 86 if(U_FAILURE(status)) { | |
| 87 delete str; | |
| 88 return false; | |
| 89 } | |
| 90 return true; | |
| 91 } | |
| 92 } | |
| 93 /** | |
| 94 * Add by value. | |
| 95 * @return true if successfully adopted. | |
| 96 */ | |
| 97 inline UBool add(const UnicodeString& str, UErrorCode &status) { | |
| 98 if(U_FAILURE(status)) return false; | |
| 99 UnicodeString *t = new UnicodeString(str); | |
| 100 if(t==NULL) { | |
| 101 status = U_MEMORY_ALLOCATION_ERROR; return false; | |
| 102 } | |
| 103 return adopt(t, status); | |
| 104 } | |
| 105 /** | |
| 106 * Remove this string. | |
| 107 * @return true if successfully removed, false otherwise (error, or else it wa
sn't there) | |
| 108 */ | |
| 109 inline UBool remove(const UnicodeString &s, UErrorCode &status) { | |
| 110 if(U_FAILURE(status)) return false; | |
| 111 return removeElement((void*) &s); | |
| 112 } | |
| 113 }; | |
| 114 | |
| 115 /** | |
| 116 * Virtual, won't be inlined | |
| 117 */ | |
| 118 UStringSet::~UStringSet() {} | |
| 119 | |
| 120 | |
| 121 static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forwar
d trie | |
| 122 static const int32_t kMATCH = (1<<1); //< exact match - skip this one. | |
| 123 static const int32_t kSuppressInReverse = (1<<0); | |
| 124 static const int32_t kAddToForward = (1<<1); | |
| 125 static const UChar kFULLSTOP = 0x002E; // '.' | |
| 126 | |
| 127 class SimpleFilteredSentenceBreakIterator : public BreakIterator { | |
| 128 public: | |
| 129 SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards
, UCharsTrie *backwards, UErrorCode &status); | |
| 130 SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator&
other); | |
| 131 virtual ~SimpleFilteredSentenceBreakIterator(); | |
| 132 private: | |
| 133 LocalPointer<BreakIterator> fDelegate; | |
| 134 LocalUTextPointer fText; | |
| 135 LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs. | |
| 136 LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M." | |
| 137 | |
| 138 /* -- subclass interface -- */ | |
| 139 public: | |
| 140 /* -- cloning and other subclass stuff -- */ | |
| 141 virtual BreakIterator * createBufferClone(void * /*stackBuffer*/, | |
| 142 int32_t &/*BufferSize*/, | |
| 143 UErrorCode &status) { | |
| 144 // for now - always deep clone | |
| 145 status = U_SAFECLONE_ALLOCATED_WARNING; | |
| 146 return clone(); | |
| 147 } | |
| 148 virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBr
eakIterator(*this); } | |
| 149 virtual UClassID getDynamicClassID(void) const { return NULL; } | |
| 150 virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return t
rue; return false; } | |
| 151 | |
| 152 /* -- text modifying -- */ | |
| 153 virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(tex
t,status); } | |
| 154 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fD
elegate->refreshInputText(input,status); return *this; } | |
| 155 virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); } | |
| 156 virtual void setText(const UnicodeString &text) { fDelegate->setText(text); } | |
| 157 | |
| 158 /* -- other functions that are just delegated -- */ | |
| 159 virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDel
egate->getUText(fillIn,status); } | |
| 160 virtual CharacterIterator& getText(void) const { return fDelegate->getText();
} | |
| 161 | |
| 162 /* -- ITERATION -- */ | |
| 163 virtual int32_t first(void) { return fDelegate->first(); } | |
| 164 virtual int32_t preceding(int32_t /*offset*/) { /* TODO: not implemented */ re
turn UBRK_DONE; } | |
| 165 virtual int32_t previous(void) { /* TODO: not implemented */ return UBRK_DONE;
} | |
| 166 virtual UBool isBoundary(int32_t offset) { return fDelegate->isBoundary(offset
); } | |
| 167 virtual int32_t current(void) const { return fDelegate->current(); } | |
| 168 | |
| 169 virtual int32_t next(void); | |
| 170 | |
| 171 virtual int32_t next(int32_t /*n*/) { /* TODO: not implemented */ return UBRK_
DONE; } | |
| 172 virtual int32_t following(int32_t /*offset*/) { /* TODO: not implemented */ re
turn UBRK_DONE; } | |
| 173 virtual int32_t last(void) { return fDelegate->last(); } | |
| 174 | |
| 175 }; | |
| 176 | |
| 177 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const S
impleFilteredSentenceBreakIterator& other) | |
| 178 : BreakIterator(other), fDelegate(other.fDelegate->clone()) | |
| 179 { | |
| 180 /* | |
| 181 TODO: not able to clone Tries. Should be a refcounted hidden master instead. | |
| 182 if(other.fBackwardsTrie.isValid()) { | |
| 183 fBackwardsTrie.adoptInstead(other.fBackwardsTrie->clone()); | |
| 184 } | |
| 185 if(other.fForwardsPartialTrie.isValid()) { | |
| 186 fForwardsPartialTrie.adoptInstead(other.fForwardsPartialTrie->clone()); | |
| 187 } | |
| 188 */ | |
| 189 } | |
| 190 | |
| 191 | |
| 192 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt
erator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status)
: | |
| 193 BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC
_ACTUAL_LOCALE,status)), | |
| 194 fDelegate(adopt), | |
| 195 fBackwardsTrie(backwards), | |
| 196 fForwardsPartialTrie(forwards) | |
| 197 { | |
| 198 // all set.. | |
| 199 } | |
| 200 | |
| 201 SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {} | |
| 202 | |
| 203 int32_t SimpleFilteredSentenceBreakIterator::next() { | |
| 204 int32_t n = fDelegate->next(); | |
| 205 if(n == UBRK_DONE || // at end or | |
| 206 fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions | |
| 207 return n; | |
| 208 } | |
| 209 // OK, do we need to break here? | |
| 210 UErrorCode status = U_ZERO_ERROR; | |
| 211 // refresh text | |
| 212 fText.adoptInstead(fDelegate->getUText(fText.orphan(), status)); | |
| 213 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlia
s())); | |
| 214 do { // outer loop runs once per underlying break (from fDelegate). | |
| 215 // loops while 'n' points to an exception. | |
| 216 utext_setNativeIndex(fText.getAlias(), n); // from n.. | |
| 217 fBackwardsTrie->reset(); | |
| 218 UChar32 uch; | |
| 219 //if(debug2) u_printf(" n@ %d\n", n); | |
| 220 // Assume a space is following the '.' (so we handle the case: "Mr. /Brown
") | |
| 221 if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: sk
ip a class of chars here?? | |
| 222 // TODO only do this the 1st time? | |
| 223 //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch); | |
| 224 } else { | |
| 225 //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch); | |
| 226 uch = utext_next32(fText.getAlias()); | |
| 227 //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch); | |
| 228 } | |
| 229 UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE; | |
| 230 | |
| 231 int32_t bestPosn = -1; | |
| 232 int32_t bestValue = -1; | |
| 233 | |
| 234 while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to
consume backwards and.. | |
| 235 USTRINGTRIE_HAS_NEXT(r=fBackwardsTrie->nextForCodePoint(uch))) {// mor
e in the trie | |
| 236 if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far | |
| 237 bestPosn = utext_getNativeIndex(fText.getAlias()); | |
| 238 bestValue = fBackwardsTrie->getValue(); | |
| 239 } | |
| 240 //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getN
ativeIndex(fText.getAlias())); | |
| 241 } | |
| 242 | |
| 243 if(USTRINGTRIE_MATCHES(r)) { // exact match? | |
| 244 //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); | |
| 245 bestValue = fBackwardsTrie->getValue(); | |
| 246 bestPosn = utext_getNativeIndex(fText.getAlias()); | |
| 247 //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); | |
| 248 } | |
| 249 | |
| 250 if(bestPosn>=0) { | |
| 251 //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue
=%d\n", (UChar)uch, r, bestPosn, bestValue); | |
| 252 | |
| 253 //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what? | |
| 254 //int32_t bestValue = fBackwardsTrie->getValue(); | |
| 255 ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UC
har)uch, r, bestValue); | |
| 256 | |
| 257 if(bestValue == kMATCH) { // exact match! | |
| 258 //if(debug2) u_printf(" exact backward match\n"); | |
| 259 n = fDelegate->next(); // skip this one. Find the next lowerlevel break. | |
| 260 if(n==UBRK_DONE) return n; | |
| 261 continue; // See if the next is another exception. | |
| 262 } else if(bestValue == kPARTIAL | |
| 263 && fForwardsPartialTrie.isValid()) { // make sure there's a forw
ard trie | |
| 264 //if(debug2) u_printf(" partial backward match\n"); | |
| 265 // We matched the "Ph." in "Ph.D." - now we need to run everything throu
gh the forwards trie | |
| 266 // to see if it matches something going forward. | |
| 267 fForwardsPartialTrie->reset(); | |
| 268 UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE; | |
| 269 utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close .
. | |
| 270 //if(debug2) u_printf("Retrying at %d\n", bestPosn); | |
| 271 while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL && | |
| 272 USTRINGTRIE_HAS_NEXT(rfwd=fForwardsPartialTrie->nextForCodePoint(u
ch))) { | |
| 273 //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, ute
xt_getNativeIndex(fText.getAlias())); | |
| 274 } | |
| 275 if(USTRINGTRIE_MATCHES(rfwd)) { | |
| 276 //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch); | |
| 277 // only full matches here, nothing to check | |
| 278 // skip the next: | |
| 279 n = fDelegate->next(); | |
| 280 if(n==UBRK_DONE) return n; | |
| 281 continue; | |
| 282 } else { | |
| 283 //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch); | |
| 284 // no match (no exception) -return the 'underlying' break | |
| 285 return n; | |
| 286 } | |
| 287 } else { | |
| 288 return n; // internal error and/or no forwards trie | |
| 289 } | |
| 290 } else { | |
| 291 //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // n
o best match | |
| 292 return n; // No match - so exit. Not an exception. | |
| 293 } | |
| 294 } while(n != UBRK_DONE); | |
| 295 return n; | |
| 296 } | |
| 297 | |
| 298 /** | |
| 299 * Concrete implementation of builder class. | |
| 300 */ | |
| 301 class U_I18N_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIterat
orBuilder { | |
| 302 public: | |
| 303 virtual ~SimpleFilteredBreakIteratorBuilder(); | |
| 304 SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &statu
s); | |
| 305 SimpleFilteredBreakIteratorBuilder(UErrorCode &status); | |
| 306 virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& s
tatus); | |
| 307 virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode&
status); | |
| 308 virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& st
atus); | |
| 309 private: | |
| 310 UStringSet fSet; | |
| 311 }; | |
| 312 | |
| 313 SimpleFilteredBreakIteratorBuilder::~SimpleFilteredBreakIteratorBuilder() | |
| 314 { | |
| 315 } | |
| 316 | |
| 317 SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(UErrorCod
e &status) | |
| 318 : fSet(status) | |
| 319 { | |
| 320 } | |
| 321 | |
| 322 SimpleFilteredBreakIteratorBuilder::SimpleFilteredBreakIteratorBuilder(const Loc
ale &fromLocale, UErrorCode &status) | |
| 323 : fSet(status) | |
| 324 { | |
| 325 if(U_SUCCESS(status)) { | |
| 326 LocalUResourceBundlePointer b(ures_open(U_ICUDATA_BRKITR, fromLocale.getBase
Name(), &status)); | |
| 327 LocalUResourceBundlePointer exceptions(ures_getByKeyWithFallback(b.getAlias(
), "exceptions", NULL, &status)); | |
| 328 LocalUResourceBundlePointer breaks(ures_getByKeyWithFallback(exceptions.getA
lias(), "SentenceBreak", NULL, &status)); | |
| 329 if(U_FAILURE(status)) return; // leaves the builder empty, if you try to use
it. | |
| 330 | |
| 331 LocalUResourceBundlePointer strs; | |
| 332 UErrorCode subStatus = status; | |
| 333 do { | |
| 334 strs.adoptInstead(ures_getNextResource(breaks.getAlias(), strs.orphan(), &
subStatus)); | |
| 335 if(strs.isValid() && U_SUCCESS(subStatus)) { | |
| 336 UnicodeString str(ures_getUnicodeString(strs.getAlias(), &status)); | |
| 337 suppressBreakAfter(str, status); // load the string | |
| 338 } | |
| 339 } while (strs.isValid() && U_SUCCESS(subStatus)); | |
| 340 if(U_FAILURE(subStatus)&&subStatus!=U_INDEX_OUTOFBOUNDS_ERROR&&U_SUCCESS(sta
tus)) { | |
| 341 status = subStatus; | |
| 342 } | |
| 343 } | |
| 344 } | |
| 345 | |
| 346 UBool | |
| 347 SimpleFilteredBreakIteratorBuilder::suppressBreakAfter(const UnicodeString& exce
ption, UErrorCode& status) | |
| 348 { | |
| 349 UBool r = fSet.add(exception, status); | |
| 350 FB_TRACE("suppressBreakAfter",&exception,r,0); | |
| 351 return r; | |
| 352 } | |
| 353 | |
| 354 UBool | |
| 355 SimpleFilteredBreakIteratorBuilder::unsuppressBreakAfter(const UnicodeString& ex
ception, UErrorCode& status) | |
| 356 { | |
| 357 UBool r = fSet.remove(exception, status); | |
| 358 FB_TRACE("unsuppressBreakAfter",&exception,r,0); | |
| 359 return r; | |
| 360 } | |
| 361 | |
| 362 /** | |
| 363 * Jitterbug 2974: MSVC has a bug whereby new X[0] behaves badly. | |
| 364 * Work around this. | |
| 365 * | |
| 366 * Note: "new UnicodeString[subCount]" ends up calling global operator new | |
| 367 * on MSVC2012 for some reason. | |
| 368 */ | |
| 369 static inline UnicodeString* newUnicodeStringArray(size_t count) { | |
| 370 return new UnicodeString[count ? count : 1]; | |
| 371 } | |
| 372 | |
| 373 BreakIterator * | |
| 374 SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr
rorCode& status) { | |
| 375 LocalPointer<BreakIterator> adopt(adoptBreakIterator); | |
| 376 | |
| 377 if(U_FAILURE(status)) { | |
| 378 return NULL; | |
| 379 } | |
| 380 | |
| 381 LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status)); | |
| 382 LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status)); | |
| 383 | |
| 384 int32_t revCount = 0; | |
| 385 int32_t fwdCount = 0; | |
| 386 | |
| 387 int32_t subCount = fSet.size(); | |
| 388 | |
| 389 UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount); | |
| 390 | |
| 391 LocalArray<UnicodeString> ustrs(ustrs_ptr); | |
| 392 | |
| 393 LocalMemory<int> partials; | |
| 394 partials.allocateInsteadAndReset(subCount); | |
| 395 | |
| 396 LocalPointer<UCharsTrie> backwardsTrie; // i.e. ".srM" for Mrs. | |
| 397 LocalPointer<UCharsTrie> forwardsPartialTrie; // Has ".a" for "a.M." | |
| 398 | |
| 399 int n=0; | |
| 400 for ( int32_t i = 0; | |
| 401 i<fSet.size(); | |
| 402 i++) { | |
| 403 const UnicodeString *abbr = fSet.getStringAt(i); | |
| 404 if(abbr) { | |
| 405 FB_TRACE("build",abbr,TRUE,i); | |
| 406 ustrs[n] = *abbr; // copy by value | |
| 407 FB_TRACE("ustrs[n]",&ustrs[n],TRUE,i); | |
| 408 } else { | |
| 409 FB_TRACE("build",abbr,FALSE,i); | |
| 410 status = U_MEMORY_ALLOCATION_ERROR; | |
| 411 return NULL; | |
| 412 } | |
| 413 partials[n] = 0; // default: not partial | |
| 414 n++; | |
| 415 } | |
| 416 // first pass - find partials. | |
| 417 for(int i=0;i<subCount;i++) { | |
| 418 int nn = ustrs[i].indexOf(kFULLSTOP); // TODO: non-'.' abbreviations | |
| 419 if(nn>-1 && (nn+1)!=ustrs[i].length()) { | |
| 420 FB_TRACE("partial",&ustrs[i],FALSE,i); | |
| 421 // is partial. | |
| 422 // is it unique? | |
| 423 int sameAs = -1; | |
| 424 for(int j=0;j<subCount;j++) { | |
| 425 if(j==i) continue; | |
| 426 if(ustrs[i].compare(0,nn+1,ustrs[j],0,nn+1)==0) { | |
| 427 FB_TRACE("prefix",&ustrs[j],FALSE,nn+1); | |
| 428 //UBool otherIsPartial = ((nn+1)!=ustrs[j].length()); // true if ustr
s[j] doesn't end at nn | |
| 429 if(partials[j]==0) { // hasn't been processed yet | |
| 430 partials[j] = kSuppressInReverse | kAddToForward; | |
| 431 FB_TRACE("suppressing",&ustrs[j],FALSE,j); | |
| 432 } else if(partials[j] & kSuppressInReverse) { | |
| 433 sameAs = j; // the other entry is already in the reverse table. | |
| 434 } | |
| 435 } | |
| 436 } | |
| 437 FB_TRACE("for partial same-",&ustrs[i],FALSE,sameAs); | |
| 438 FB_TRACE(" == partial #",&ustrs[i],FALSE,partials[i]); | |
| 439 UnicodeString prefix(ustrs[i], 0, nn+1); | |
| 440 if(sameAs == -1 && partials[i] == 0) { | |
| 441 // first one - add the prefix to the reverse table. | |
| 442 prefix.reverse(); | |
| 443 builder->add(prefix, kPARTIAL, status); | |
| 444 revCount++; | |
| 445 FB_TRACE("Added partial",&prefix,FALSE, i); | |
| 446 FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i); | |
| 447 partials[i] = kSuppressInReverse | kAddToForward; | |
| 448 } else { | |
| 449 FB_TRACE("NOT adding partial",&prefix,FALSE, i); | |
| 450 FB_TRACE(u_errorName(status),&ustrs[i],FALSE,i); | |
| 451 } | |
| 452 } | |
| 453 } | |
| 454 for(int i=0;i<subCount;i++) { | |
| 455 if(partials[i]==0) { | |
| 456 ustrs[i].reverse(); | |
| 457 builder->add(ustrs[i], kMATCH, status); | |
| 458 revCount++; | |
| 459 FB_TRACE(u_errorName(status), &ustrs[i], FALSE, i); | |
| 460 } else { | |
| 461 FB_TRACE("Adding fwd",&ustrs[i], FALSE, i); | |
| 462 | |
| 463 // an optimization would be to only add the portion after the '.' | |
| 464 // for example, for "Ph.D." we store ".hP" in the reverse table. We could
just store "D." in the forward, | |
| 465 // instead of "Ph.D." since we already know the "Ph." part is a match. | |
| 466 // would need the trie to be able to hold 0-length strings, though. | |
| 467 builder2->add(ustrs[i], kMATCH, status); // forward | |
| 468 fwdCount++; | |
| 469 //ustrs[i].reverse(); | |
| 470 ////if(debug2) u_printf("SUPPRESS- not Added(%d): /%S/ status=%s\n",parti
als[i], ustrs[i].getTerminatedBuffer(), u_errorName(status)); | |
| 471 } | |
| 472 } | |
| 473 FB_TRACE("AbbrCount",NULL,FALSE, subCount); | |
| 474 | |
| 475 if(revCount>0) { | |
| 476 backwardsTrie.adoptInstead(builder->build(USTRINGTRIE_BUILD_FAST, status)); | |
| 477 if(U_FAILURE(status)) { | |
| 478 FB_TRACE(u_errorName(status),NULL,FALSE, -1); | |
| 479 return NULL; | |
| 480 } | |
| 481 } | |
| 482 | |
| 483 if(fwdCount>0) { | |
| 484 forwardsPartialTrie.adoptInstead(builder2->build(USTRINGTRIE_BUILD_FAST, sta
tus)); | |
| 485 if(U_FAILURE(status)) { | |
| 486 FB_TRACE(u_errorName(status),NULL,FALSE, -1); | |
| 487 return NULL; | |
| 488 } | |
| 489 } | |
| 490 | |
| 491 return new SimpleFilteredSentenceBreakIterator(adopt.orphan(), forwardsPartial
Trie.orphan(), backwardsTrie.orphan(), status); | |
| 492 } | |
| 493 | |
| 494 | |
| 495 // ----------- Base class implementation | |
| 496 | |
| 497 FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() { | |
| 498 } | |
| 499 | |
| 500 FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() { | |
| 501 } | |
| 502 | |
| 503 FilteredBreakIteratorBuilder * | |
| 504 FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st
atus) { | |
| 505 if(U_FAILURE(status)) return NULL; | |
| 506 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator
Builder(where, status)); | |
| 507 if(U_SUCCESS(status) && !ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; | |
| 508 return ret.orphan(); | |
| 509 } | |
| 510 | |
| 511 FilteredBreakIteratorBuilder * | |
| 512 FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) { | |
| 513 if(U_FAILURE(status)) return NULL; | |
| 514 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator
Builder(status)); | |
| 515 if(U_SUCCESS(status) && !ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; | |
| 516 return ret.orphan(); | |
| 517 } | |
| 518 | |
| 519 U_NAMESPACE_END | |
| 520 | |
| 521 #endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FIL
TERED_BREAK_ITERATION | |
| OLD | NEW |