Chromium Code Reviews
chromiumcodereview-hr@appspot.gserviceaccount.com (chromiumcodereview-hr) | Please choose your nickname with Settings | Help | Chromium Project | Gerrit Changes | Sign out
(278)

Side by Side Diff: source/common/filteredbrk.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561
Patch Set: Created 4 years, 11 months ago
Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.
Jump to:
View unified diff | Download patch
« no previous file with comments | « source/common/dictbe.cpp ('k') | source/common/hash.h » ('j') | no next file with comments »
Toggle Intra-line Diffs ('i') | Expand Comments ('e') | Collapse Comments ('c') | Show Comments Hide Comments ('s')
OLDNEW
1 /* 1 /*
2 ******************************************************************************* 2 *******************************************************************************
3 * Copyright (C) 2014, International Business Machines Corporation and 3 * Copyright (C) 2014-2015, International Business Machines Corporation and
4 * others. All Rights Reserved. 4 * others. All Rights Reserved.
5 ******************************************************************************* 5 *******************************************************************************
6 */ 6 */
7 7
8 #include "unicode/utypes.h" 8 #include "unicode/utypes.h"
9 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION 9 #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
10 10
11 #include "cmemory.h" 11 #include "cmemory.h"
12 12
13 #include "unicode/filteredbrk.h" 13 #include "unicode/filteredbrk.h"
(...skipping 22 matching lines...) Expand all
36 } 36 }
37 fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n", 37 fprintf(stderr,"%s:%d: %s. s='%s'(%p), b=%c, d=%d\n",
38 f, l, m, buf, (const void*)s, b?'T':'F',(int)d); 38 f, l, m, buf, (const void*)s, b?'T':'F',(int)d);
39 } 39 }
40 40
41 #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__) 41 #define FB_TRACE(m,s,b,d) _fb_trace(m,s,b,d,__FILE__,__LINE__)
42 #else 42 #else
43 #define FB_TRACE(m,s,b,d) 43 #define FB_TRACE(m,s,b,d)
44 #endif 44 #endif
45 45
46 /**
47 * Used with sortedInsert()
48 */
46 static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) { 49 static int8_t U_CALLCONV compareUnicodeString(UElement t1, UElement t2) {
47 const UnicodeString &a = *(const UnicodeString*)t1.pointer; 50 const UnicodeString &a = *(const UnicodeString*)t1.pointer;
48 const UnicodeString &b = *(const UnicodeString*)t2.pointer; 51 const UnicodeString &b = *(const UnicodeString*)t2.pointer;
49 return a.compare(b); 52 return a.compare(b);
50 } 53 }
51 54
52 /** 55 /**
53 * A UVector which implements a set of strings. 56 * A UVector which implements a set of strings.
54 */ 57 */
55 class U_I18N_API UStringSet : public UVector { 58 class U_COMMON_API UStringSet : public UVector {
56 public: 59 public:
57 UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject, 60 UStringSet(UErrorCode &status) : UVector(uprv_deleteUObject,
58 uhash_compareUnicodeString, 61 uhash_compareUnicodeString,
59 1, 62 1,
60 status) {} 63 status) {}
61 virtual ~UStringSet(); 64 virtual ~UStringSet();
62 /** 65 /**
63 * Is this UnicodeSet contained? 66 * Is this UnicodeSet contained?
64 */ 67 */
65 inline UBool contains(const UnicodeString& s) { 68 inline UBool contains(const UnicodeString& s) {
(...skipping 44 matching lines...) Expand 10 before | Expand all | Expand 10 after
110 if(U_FAILURE(status)) return false; 113 if(U_FAILURE(status)) return false;
111 return removeElement((void*) &s); 114 return removeElement((void*) &s);
112 } 115 }
113 }; 116 };
114 117
115 /** 118 /**
116 * Virtual, won't be inlined 119 * Virtual, won't be inlined
117 */ 120 */
118 UStringSet::~UStringSet() {} 121 UStringSet::~UStringSet() {}
119 122
123 /* ----------------------------------------------------------- */
120 124
125
126 /* Filtered Break constants */
121 static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forwar d trie 127 static const int32_t kPARTIAL = (1<<0); //< partial - need to run through forwar d trie
122 static const int32_t kMATCH = (1<<1); //< exact match - skip this one. 128 static const int32_t kMATCH = (1<<1); //< exact match - skip this one.
123 static const int32_t kSuppressInReverse = (1<<0); 129 static const int32_t kSuppressInReverse = (1<<0);
124 static const int32_t kAddToForward = (1<<1); 130 static const int32_t kAddToForward = (1<<1);
125 static const UChar kFULLSTOP = 0x002E; // '.' 131 static const UChar kFULLSTOP = 0x002E; // '.'
126 132
133 /**
134 * Shared data for SimpleFilteredSentenceBreakIterator
135 */
136 class SimpleFilteredSentenceBreakData : public UMemory {
137 public:
138 SimpleFilteredSentenceBreakData(UCharsTrie *forwards, UCharsTrie *backwards )
139 : fForwardsPartialTrie(forwards), fBackwardsTrie(backwards), refcount(1) { }
140 SimpleFilteredSentenceBreakData *incr() { refcount++; return this; }
141 SimpleFilteredSentenceBreakData *decr() { if((--refcount) <= 0) delete this; r eturn 0; }
142 virtual ~SimpleFilteredSentenceBreakData();
143
144 LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
145 LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
146 int32_t refcount;
147 };
148
149 SimpleFilteredSentenceBreakData::~SimpleFilteredSentenceBreakData() {}
150
151 /**
152 * Concrete implementation
153 */
127 class SimpleFilteredSentenceBreakIterator : public BreakIterator { 154 class SimpleFilteredSentenceBreakIterator : public BreakIterator {
128 public: 155 public:
129 SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards , UCharsTrie *backwards, UErrorCode &status); 156 SimpleFilteredSentenceBreakIterator(BreakIterator *adopt, UCharsTrie *forwards , UCharsTrie *backwards, UErrorCode &status);
130 SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other); 157 SimpleFilteredSentenceBreakIterator(const SimpleFilteredSentenceBreakIterator& other);
131 virtual ~SimpleFilteredSentenceBreakIterator(); 158 virtual ~SimpleFilteredSentenceBreakIterator();
132 private: 159 private:
160 SimpleFilteredSentenceBreakData *fData;
133 LocalPointer<BreakIterator> fDelegate; 161 LocalPointer<BreakIterator> fDelegate;
134 LocalUTextPointer fText; 162 LocalUTextPointer fText;
135 LocalPointer<UCharsTrie> fBackwardsTrie; // i.e. ".srM" for Mrs.
136 LocalPointer<UCharsTrie> fForwardsPartialTrie; // Has ".a" for "a.M."
137 163
138 /* -- subclass interface -- */ 164 /* -- subclass interface -- */
139 public: 165 public:
140 /* -- cloning and other subclass stuff -- */ 166 /* -- cloning and other subclass stuff -- */
141 virtual BreakIterator * createBufferClone(void * /*stackBuffer*/, 167 virtual BreakIterator * createBufferClone(void * /*stackBuffer*/,
142 int32_t &/*BufferSize*/, 168 int32_t &/*BufferSize*/,
143 UErrorCode &status) { 169 UErrorCode &status) {
144 // for now - always deep clone 170 // for now - always deep clone
145 status = U_SAFECLONE_ALLOCATED_WARNING; 171 status = U_SAFECLONE_ALLOCATED_WARNING;
146 return clone(); 172 return clone();
147 } 173 }
148 virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBr eakIterator(*this); } 174 virtual BreakIterator* clone(void) const { return new SimpleFilteredSentenceBr eakIterator(*this); }
149 virtual UClassID getDynamicClassID(void) const { return NULL; } 175 virtual UClassID getDynamicClassID(void) const { return NULL; }
150 virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return t rue; return false; } 176 virtual UBool operator==(const BreakIterator& o) const { if(this==&o) return t rue; return false; }
151 177
152 /* -- text modifying -- */ 178 /* -- text modifying -- */
153 virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(tex t,status); } 179 virtual void setText(UText *text, UErrorCode &status) { fDelegate->setText(tex t,status); }
154 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fD elegate->refreshInputText(input,status); return *this; } 180 virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) { fD elegate->refreshInputText(input,status); return *this; }
155 virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); } 181 virtual void adoptText(CharacterIterator* it) { fDelegate->adoptText(it); }
156 virtual void setText(const UnicodeString &text) { fDelegate->setText(text); } 182 virtual void setText(const UnicodeString &text) { fDelegate->setText(text); }
157 183
158 /* -- other functions that are just delegated -- */ 184 /* -- other functions that are just delegated -- */
159 virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDel egate->getUText(fillIn,status); } 185 virtual UText *getUText(UText *fillIn, UErrorCode &status) const { return fDel egate->getUText(fillIn,status); }
160 virtual CharacterIterator& getText(void) const { return fDelegate->getText(); } 186 virtual CharacterIterator& getText(void) const { return fDelegate->getText(); }
161 187
162 /* -- ITERATION -- */ 188 /* -- ITERATION -- */
163 virtual int32_t first(void) { return fDelegate->first(); } 189 virtual int32_t first(void);
164 virtual int32_t preceding(int32_t /*offset*/) { /* TODO: not implemented */ re turn UBRK_DONE; } 190 virtual int32_t preceding(int32_t offset);
165 virtual int32_t previous(void) { /* TODO: not implemented */ return UBRK_DONE; } 191 virtual int32_t previous(void);
166 virtual UBool isBoundary(int32_t offset) { return fDelegate->isBoundary(offset ); } 192 virtual UBool isBoundary(int32_t offset);
167 virtual int32_t current(void) const { return fDelegate->current(); } 193 virtual int32_t current(void) const { return fDelegate->current(); } // we kee p the delegate current, so this should be correct.
168 194
169 virtual int32_t next(void); 195 virtual int32_t next(void);
170 196
171 virtual int32_t next(int32_t /*n*/) { /* TODO: not implemented */ return UBRK_ DONE; } 197 virtual int32_t next(int32_t n);
172 virtual int32_t following(int32_t /*offset*/) { /* TODO: not implemented */ re turn UBRK_DONE; } 198 virtual int32_t following(int32_t offset);
173 virtual int32_t last(void) { return fDelegate->last(); } 199 virtual int32_t last(void);
174 200
201 private:
202 /**
203 * Given that the fDelegate has already given its "initial" answer,
204 * find the NEXT actual (non-excepted) break.
205 * @param n initial position from delegate
206 * @return new break position or UBRK_DONE
207 */
208 int32_t internalNext(int32_t n);
209 /**
210 * Given that the fDelegate has already given its "initial" answer,
211 * find the PREV actual (non-excepted) break.
212 * @param n initial position from delegate
213 * @return new break position or UBRK_DONE
214 */
215 int32_t internalPrev(int32_t n);
216 /**
217 * set up the UText with the value of the fDelegate.
218 * Call this before calling breakExceptionAt.
219 * May be able to avoid excess calls
220 */
221 void resetState(UErrorCode &status);
222 /**
223 * Is there a match (exception) at this spot?
224 */
225 enum EFBMatchResult { kNoExceptionHere, kExceptionHere };
226 /**
227 * Determine if there is an exception at this spot
228 * @param n spot to check
229 * @return kNoExceptionHere or kExceptionHere
230 **/
231 enum EFBMatchResult breakExceptionAt(int32_t n);
175 }; 232 };
176 233
177 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const S impleFilteredSentenceBreakIterator& other) 234 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(const S impleFilteredSentenceBreakIterator& other)
178 : BreakIterator(other), fDelegate(other.fDelegate->clone()) 235 : BreakIterator(other), fData(other.fData->incr()), fDelegate(other.fDelegate- >clone())
179 { 236 {
180 /*
181 TODO: not able to clone Tries. Should be a refcounted hidden master instead.
182 if(other.fBackwardsTrie.isValid()) {
183 fBackwardsTrie.adoptInstead(other.fBackwardsTrie->clone());
184 }
185 if(other.fForwardsPartialTrie.isValid()) {
186 fForwardsPartialTrie.adoptInstead(other.fForwardsPartialTrie->clone());
187 }
188 */
189 } 237 }
190 238
191 239
192 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt erator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) : 240 SimpleFilteredSentenceBreakIterator::SimpleFilteredSentenceBreakIterator(BreakIt erator *adopt, UCharsTrie *forwards, UCharsTrie *backwards, UErrorCode &status) :
193 BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC _ACTUAL_LOCALE,status)), 241 BreakIterator(adopt->getLocale(ULOC_VALID_LOCALE,status),adopt->getLocale(ULOC _ACTUAL_LOCALE,status)),
194 fDelegate(adopt), 242 fData(new SimpleFilteredSentenceBreakData(forwards, backwards)),
195 fBackwardsTrie(backwards), 243 fDelegate(adopt)
196 fForwardsPartialTrie(forwards)
197 { 244 {
198 // all set.. 245 // all set..
199 } 246 }
200 247
201 SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {} 248 SimpleFilteredSentenceBreakIterator::~SimpleFilteredSentenceBreakIterator() {
249 fData = fData->decr();
250 }
202 251
203 int32_t SimpleFilteredSentenceBreakIterator::next() { 252 void SimpleFilteredSentenceBreakIterator::resetState(UErrorCode &status) {
204 int32_t n = fDelegate->next();
205 if(n == UBRK_DONE || // at end or
206 fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exceptions
207 return n;
208 }
209 // OK, do we need to break here?
210 UErrorCode status = U_ZERO_ERROR;
211 // refresh text
212 fText.adoptInstead(fDelegate->getUText(fText.orphan(), status)); 253 fText.adoptInstead(fDelegate->getUText(fText.orphan(), status));
213 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlia s())); 254 }
214 do { // outer loop runs once per underlying break (from fDelegate). 255
256 SimpleFilteredSentenceBreakIterator::EFBMatchResult
257 SimpleFilteredSentenceBreakIterator::breakExceptionAt(int32_t n) {
258 int64_t bestPosn = -1;
259 int32_t bestValue = -1;
215 // loops while 'n' points to an exception. 260 // loops while 'n' points to an exception.
216 utext_setNativeIndex(fText.getAlias(), n); // from n.. 261 utext_setNativeIndex(fText.getAlias(), n); // from n..
217 fBackwardsTrie->reset(); 262 fData->fBackwardsTrie->reset();
218 UChar32 uch; 263 UChar32 uch;
264
219 //if(debug2) u_printf(" n@ %d\n", n); 265 //if(debug2) u_printf(" n@ %d\n", n);
220 // Assume a space is following the '.' (so we handle the case: "Mr. /Brown ") 266 // Assume a space is following the '.' (so we handle the case: "Mr. /Brown ")
221 if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: sk ip a class of chars here?? 267 if((uch=utext_previous32(fText.getAlias()))==(UChar32)0x0020) { // TODO: sk ip a class of chars here??
222 // TODO only do this the 1st time? 268 // TODO only do this the 1st time?
223 //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch); 269 //if(debug2) u_printf("skipping prev: |%C| \n", (UChar)uch);
224 } else { 270 } else {
225 //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch); 271 //if(debug2) u_printf("not skipping prev: |%C| \n", (UChar)uch);
226 uch = utext_next32(fText.getAlias()); 272 uch = utext_next32(fText.getAlias());
227 //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch); 273 //if(debug2) u_printf(" -> : |%C| \n", (UChar)uch);
228 } 274 }
275
229 UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE; 276 UStringTrieResult r = USTRINGTRIE_INTERMEDIATE_VALUE;
230 277
231 int32_t bestPosn = -1;
232 int32_t bestValue = -1;
233
234 while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and.. 278 while((uch=utext_previous32(fText.getAlias()))!=U_SENTINEL && // more to consume backwards and..
235 USTRINGTRIE_HAS_NEXT(r=fBackwardsTrie->nextForCodePoint(uch))) {// mor e in the trie 279 USTRINGTRIE_HAS_NEXT(r=fData->fBackwardsTrie->nextForCodePoint(uch))) {// more in the trie
236 if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far 280 if(USTRINGTRIE_HAS_VALUE(r)) { // remember the best match so far
237 bestPosn = utext_getNativeIndex(fText.getAlias()); 281 bestPosn = utext_getNativeIndex(fText.getAlias());
238 bestValue = fBackwardsTrie->getValue(); 282 bestValue = fData->fBackwardsTrie->getValue();
239 } 283 }
240 //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getN ativeIndex(fText.getAlias())); 284 //if(debug2) u_printf("rev< /%C/ cont?%d @%d\n", (UChar)uch, r, utext_getN ativeIndex(fText.getAlias()));
241 } 285 }
242 286
243 if(USTRINGTRIE_MATCHES(r)) { // exact match? 287 if(USTRINGTRIE_MATCHES(r)) { // exact match?
244 //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue =%d\n", (UChar)uch, r, bestPosn, bestValue); 288 //if(debug2) u_printf("rev<?/%C/?end of seq.. r=%d, bestPosn=%d, bestValue =%d\n", (UChar)uch, r, bestPosn, bestValue);
245 bestValue = fBackwardsTrie->getValue(); 289 bestValue = fData->fBackwardsTrie->getValue();
246 bestPosn = utext_getNativeIndex(fText.getAlias()); 290 bestPosn = utext_getNativeIndex(fText.getAlias());
247 //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue =%d\n", (UChar)uch, r, bestPosn, bestValue); 291 //if(debug2) u_printf("rev<+/%C/+end of seq.. r=%d, bestPosn=%d, bestValue =%d\n", (UChar)uch, r, bestPosn, bestValue);
248 } 292 }
249 293
250 if(bestPosn>=0) { 294 if(bestPosn>=0) {
251 //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue =%d\n", (UChar)uch, r, bestPosn, bestValue); 295 //if(debug2) u_printf("rev< /%C/ end of seq.. r=%d, bestPosn=%d, bestValue =%d\n", (UChar)uch, r, bestPosn, bestValue);
252 296
253 //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what? 297 //if(USTRINGTRIE_MATCHES(r)) { // matched - so, now what?
254 //int32_t bestValue = fBackwardsTrie->getValue(); 298 //int32_t bestValue = fBackwardsTrie->getValue();
255 ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UC har)uch, r, bestValue); 299 ////if(debug2) u_printf("rev< /%C/ matched, skip..%d bestValue=%d\n", (UC har)uch, r, bestValue);
256 300
257 if(bestValue == kMATCH) { // exact match! 301 if(bestValue == kMATCH) { // exact match!
258 //if(debug2) u_printf(" exact backward match\n"); 302 //if(debug2) u_printf(" exact backward match\n");
259 n = fDelegate->next(); // skip this one. Find the next lowerlevel break. 303 return kExceptionHere; // See if the next is another exception.
260 if(n==UBRK_DONE) return n;
261 continue; // See if the next is another exception.
262 } else if(bestValue == kPARTIAL 304 } else if(bestValue == kPARTIAL
263 && fForwardsPartialTrie.isValid()) { // make sure there's a forw ard trie 305 && fData->fForwardsPartialTrie.isValid()) { // make sure there's a forward trie
264 //if(debug2) u_printf(" partial backward match\n"); 306 //if(debug2) u_printf(" partial backward match\n");
265 // We matched the "Ph." in "Ph.D." - now we need to run everything throu gh the forwards trie 307 // We matched the "Ph." in "Ph.D." - now we need to run everything throu gh the forwards trie
266 // to see if it matches something going forward. 308 // to see if it matches something going forward.
267 fForwardsPartialTrie->reset(); 309 fData->fForwardsPartialTrie->reset();
268 UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE; 310 UStringTrieResult rfwd = USTRINGTRIE_INTERMEDIATE_VALUE;
269 utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close . . 311 utext_setNativeIndex(fText.getAlias(), bestPosn); // hope that's close . .
270 //if(debug2) u_printf("Retrying at %d\n", bestPosn); 312 //if(debug2) u_printf("Retrying at %d\n", bestPosn);
271 while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL && 313 while((uch=utext_next32(fText.getAlias()))!=U_SENTINEL &&
272 USTRINGTRIE_HAS_NEXT(rfwd=fForwardsPartialTrie->nextForCodePoint(u ch))) { 314 USTRINGTRIE_HAS_NEXT(rfwd=fData->fForwardsPartialTrie->nextForCode Point(uch))) {
273 //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, ute xt_getNativeIndex(fText.getAlias())); 315 //if(debug2) u_printf("fwd> /%C/ cont?%d @%d\n", (UChar)uch, rfwd, ute xt_getNativeIndex(fText.getAlias()));
274 } 316 }
275 if(USTRINGTRIE_MATCHES(rfwd)) { 317 if(USTRINGTRIE_MATCHES(rfwd)) {
276 //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch); 318 //if(debug2) u_printf("fwd> /%C/ == forward match!\n", (UChar)uch);
277 // only full matches here, nothing to check 319 // only full matches here, nothing to check
278 // skip the next: 320 // skip the next:
279 n = fDelegate->next(); 321 return kExceptionHere;
280 if(n==UBRK_DONE) return n;
281 continue;
282 } else { 322 } else {
283 //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch); 323 //if(debug2) u_printf("fwd> /%C/ no match.\n", (UChar)uch);
284 // no match (no exception) -return the 'underlying' break 324 // no match (no exception) -return the 'underlying' break
285 return n; 325 return kNoExceptionHere;
286 } 326 }
287 } else { 327 } else {
288 return n; // internal error and/or no forwards trie 328 return kNoExceptionHere; // internal error and/or no forwards trie
289 } 329 }
290 } else { 330 } else {
291 //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // n o best match 331 //if(debug2) u_printf("rev< /%C/ .. no match..%d\n", (UChar)uch, r); // n o best match
292 return n; // No match - so exit. Not an exception. 332 return kNoExceptionHere; // No match - so exit. Not an exception.
293 } 333 }
294 } while(n != UBRK_DONE); 334 }
335
336 // the workhorse single next.
337 int32_t
338 SimpleFilteredSentenceBreakIterator::internalNext(int32_t n) {
339 if(n == UBRK_DONE || // at end or
340 fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exce ptions
341 return n;
342 }
343 // OK, do we need to break here?
344 UErrorCode status = U_ZERO_ERROR;
345 // refresh text
346 resetState(status);
347 if(U_FAILURE(status)) return UBRK_DONE; // bail out
348 int64_t utextLen = utext_nativeLength(fText.getAlias());
349
350 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlia s()));
351 while (n != UBRK_DONE && n != utextLen) { // outer loop runs once per underlyi ng break (from fDelegate).
352 SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
353
354 switch(m) {
355 case kExceptionHere:
356 n = fDelegate->next(); // skip this one. Find the next lowerlevel break.
357 continue;
358
359 default:
360 case kNoExceptionHere:
361 return n;
362 }
363 }
295 return n; 364 return n;
296 } 365 }
297 366
367 int32_t
368 SimpleFilteredSentenceBreakIterator::internalPrev(int32_t n) {
369 if(n == 0 || n == UBRK_DONE || // at end or
370 fData->fBackwardsTrie.isNull()) { // .. no backwards table loaded == no exce ptions
371 return n;
372 }
373 // OK, do we need to break here?
374 UErrorCode status = U_ZERO_ERROR;
375 // refresh text
376 resetState(status);
377 if(U_FAILURE(status)) return UBRK_DONE; // bail out
378
379 //if(debug2) u_printf("str, native len=%d\n", utext_nativeLength(fText.getAlia s()));
380 while (n != UBRK_DONE && n != 0) { // outer loop runs once per underlying brea k (from fDelegate).
381 SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(n);
382
383 switch(m) {
384 case kExceptionHere:
385 n = fDelegate->previous(); // skip this one. Find the next lowerlevel brea k.
386 continue;
387
388 default:
389 case kNoExceptionHere:
390 return n;
391 }
392 }
393 return n;
394 }
395
396
397 int32_t
398 SimpleFilteredSentenceBreakIterator::next() {
399 return internalNext(fDelegate->next());
400 }
401
402 int32_t
403 SimpleFilteredSentenceBreakIterator::first(void) {
404 return internalNext(fDelegate->first());
405 }
406
407 int32_t
408 SimpleFilteredSentenceBreakIterator::preceding(int32_t offset) {
409 return internalPrev(fDelegate->preceding(offset));
410 }
411
412 int32_t
413 SimpleFilteredSentenceBreakIterator::previous(void) {
414 return internalPrev(fDelegate->previous());
415 }
416
417 UBool SimpleFilteredSentenceBreakIterator::isBoundary(int32_t offset) {
418 if(!fDelegate->isBoundary(offset)) return false; // no break to suppress
419
420 UErrorCode status = U_ZERO_ERROR;
421 resetState(status);
422
423 SimpleFilteredSentenceBreakIterator::EFBMatchResult m = breakExceptionAt(offse t);
424
425 switch(m) {
426 case kExceptionHere:
427 return false;
428 default:
429 case kNoExceptionHere:
430 return true;
431 }
432 }
433
434 int32_t
435 SimpleFilteredSentenceBreakIterator::next(int32_t offset) {
436 return internalNext(fDelegate->next(offset));
437 }
438
439 int32_t
440 SimpleFilteredSentenceBreakIterator::following(int32_t offset) {
441 return internalNext(fDelegate->following(offset));
442 }
443
444 int32_t
445 SimpleFilteredSentenceBreakIterator::last(void) {
446 // Don't suppress a break opportunity at the end of text.
447 return fDelegate->last();
448 }
449
450
298 /** 451 /**
299 * Concrete implementation of builder class. 452 * Concrete implementation of builder class.
300 */ 453 */
301 class U_I18N_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIterat orBuilder { 454 class U_COMMON_API SimpleFilteredBreakIteratorBuilder : public FilteredBreakIter atorBuilder {
302 public: 455 public:
303 virtual ~SimpleFilteredBreakIteratorBuilder(); 456 virtual ~SimpleFilteredBreakIteratorBuilder();
304 SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &statu s); 457 SimpleFilteredBreakIteratorBuilder(const Locale &fromLocale, UErrorCode &statu s);
305 SimpleFilteredBreakIteratorBuilder(UErrorCode &status); 458 SimpleFilteredBreakIteratorBuilder(UErrorCode &status);
306 virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& s tatus); 459 virtual UBool suppressBreakAfter(const UnicodeString& exception, UErrorCode& s tatus);
307 virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status); 460 virtual UBool unsuppressBreakAfter(const UnicodeString& exception, UErrorCode& status);
308 virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& st atus); 461 virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& st atus);
309 private: 462 private:
310 UStringSet fSet; 463 UStringSet fSet;
311 }; 464 };
(...skipping 55 matching lines...) Expand 10 before | Expand all | Expand 10 after
367 * on MSVC2012 for some reason. 520 * on MSVC2012 for some reason.
368 */ 521 */
369 static inline UnicodeString* newUnicodeStringArray(size_t count) { 522 static inline UnicodeString* newUnicodeStringArray(size_t count) {
370 return new UnicodeString[count ? count : 1]; 523 return new UnicodeString[count ? count : 1];
371 } 524 }
372 525
373 BreakIterator * 526 BreakIterator *
374 SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr rorCode& status) { 527 SimpleFilteredBreakIteratorBuilder::build(BreakIterator* adoptBreakIterator, UEr rorCode& status) {
375 LocalPointer<BreakIterator> adopt(adoptBreakIterator); 528 LocalPointer<BreakIterator> adopt(adoptBreakIterator);
376 529
530 LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status), status) ;
531 LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status), status );
377 if(U_FAILURE(status)) { 532 if(U_FAILURE(status)) {
378 return NULL; 533 return NULL;
379 } 534 }
380 535
381 LocalPointer<UCharsTrieBuilder> builder(new UCharsTrieBuilder(status));
382 LocalPointer<UCharsTrieBuilder> builder2(new UCharsTrieBuilder(status));
383
384 int32_t revCount = 0; 536 int32_t revCount = 0;
385 int32_t fwdCount = 0; 537 int32_t fwdCount = 0;
386 538
387 int32_t subCount = fSet.size(); 539 int32_t subCount = fSet.size();
388 540
389 UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount); 541 UnicodeString *ustrs_ptr = newUnicodeStringArray(subCount);
390 542
391 LocalArray<UnicodeString> ustrs(ustrs_ptr); 543 LocalArray<UnicodeString> ustrs(ustrs_ptr);
392 544
393 LocalMemory<int> partials; 545 LocalMemory<int> partials;
(...skipping 102 matching lines...) Expand 10 before | Expand all | Expand 10 after
496 648
497 FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() { 649 FilteredBreakIteratorBuilder::FilteredBreakIteratorBuilder() {
498 } 650 }
499 651
500 FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() { 652 FilteredBreakIteratorBuilder::~FilteredBreakIteratorBuilder() {
501 } 653 }
502 654
503 FilteredBreakIteratorBuilder * 655 FilteredBreakIteratorBuilder *
504 FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st atus) { 656 FilteredBreakIteratorBuilder::createInstance(const Locale& where, UErrorCode& st atus) {
505 if(U_FAILURE(status)) return NULL; 657 if(U_FAILURE(status)) return NULL;
506 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator Builder(where, status)); 658 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator Builder(where, status), status);
507 if(U_SUCCESS(status) && !ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; 659 return (U_SUCCESS(status))? ret.orphan(): NULL;
508 return ret.orphan();
509 } 660 }
510 661
511 FilteredBreakIteratorBuilder * 662 FilteredBreakIteratorBuilder *
512 FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) { 663 FilteredBreakIteratorBuilder::createInstance(UErrorCode& status) {
513 if(U_FAILURE(status)) return NULL; 664 if(U_FAILURE(status)) return NULL;
514 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator Builder(status)); 665 LocalPointer<FilteredBreakIteratorBuilder> ret(new SimpleFilteredBreakIterator Builder(status), status);
515 if(U_SUCCESS(status) && !ret.isValid()) status = U_MEMORY_ALLOCATION_ERROR; 666 return (U_SUCCESS(status))? ret.orphan(): NULL;
516 return ret.orphan();
517 } 667 }
518 668
519 U_NAMESPACE_END 669 U_NAMESPACE_END
520 670
521 #endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FIL TERED_BREAK_ITERATION 671 #endif //#if !UCONFIG_NO_BREAK_ITERATION && U_HAVE_STD_STRING && !UCONFIG_NO_FIL TERED_BREAK_ITERATION
OLDNEW
« no previous file with comments | « source/common/dictbe.cpp ('k') | source/common/hash.h » ('j') | no next file with comments »

Powered by Google App Engine
This is Rietveld 408576698