OLD | NEW |
(Empty) | |
| 1 /* |
| 2 ********************************************************************** |
| 3 * Copyright (C) 2001-2008 IBM and others. All rights reserved. |
| 4 ********************************************************************** |
| 5 * Date Name Description |
| 6 * 03/22/2000 helena Creation. |
| 7 ********************************************************************** |
| 8 */ |
| 9 |
| 10 #include "unicode/utypes.h" |
| 11 |
| 12 #if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION |
| 13 |
| 14 #include "unicode/stsearch.h" |
| 15 #include "usrchimp.h" |
| 16 #include "cmemory.h" |
| 17 |
| 18 U_NAMESPACE_BEGIN |
| 19 |
| 20 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) |
| 21 |
| 22 // public constructors and destructors ----------------------------------- |
| 23 |
| 24 StringSearch::StringSearch(const UnicodeString &pattern, |
| 25 const UnicodeString &text, |
| 26 const Locale &locale, |
| 27 BreakIterator *breakiter, |
| 28 UErrorCode &status) : |
| 29 SearchIterator(text, breakiter), |
| 30 m_collator_(), |
| 31 m_pattern_(pattern) |
| 32 { |
| 33 if (U_FAILURE(status)) { |
| 34 m_strsrch_ = NULL; |
| 35 return; |
| 36 } |
| 37 |
| 38 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), |
| 39 m_text_.getBuffer(), m_text_.length(), |
| 40 locale.getName(), (UBreakIterator *)breakiter, |
| 41 &status); |
| 42 uprv_free(m_search_); |
| 43 m_search_ = NULL; |
| 44 |
| 45 // !!! dlf m_collator_ is an odd beast. basically it is an aliasing |
| 46 // wrapper around the internal collator and rules, which (here) are |
| 47 // owned by this stringsearch object. this means 1) it's destructor |
| 48 // _should not_ delete the ucollator or rules, and 2) changes made |
| 49 // to the exposed collator (setStrength etc) _should_ modify the |
| 50 // ucollator. thus the collator is not a copy-on-write alias, and it |
| 51 // needs to distinguish itself not merely from 'stand alone' colators |
| 52 // but also from copy-on-write ones. it needs additional state, which |
| 53 // setUCollator should set. |
| 54 |
| 55 if (U_SUCCESS(status)) { |
| 56 // Alias the collator |
| 57 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| 58 // m_search_ has been created by the base SearchIterator class |
| 59 m_search_ = m_strsrch_->search; |
| 60 } |
| 61 } |
| 62 |
| 63 StringSearch::StringSearch(const UnicodeString &pattern, |
| 64 const UnicodeString &text, |
| 65 RuleBasedCollator *coll, |
| 66 BreakIterator *breakiter, |
| 67 UErrorCode &status) : |
| 68 SearchIterator(text, breakiter), |
| 69 m_collator_(), |
| 70 m_pattern_(pattern) |
| 71 { |
| 72 if (U_FAILURE(status)) { |
| 73 m_strsrch_ = NULL; |
| 74 return; |
| 75 } |
| 76 if (coll == NULL) { |
| 77 status = U_ILLEGAL_ARGUMENT_ERROR; |
| 78 m_strsrch_ = NULL; |
| 79 return; |
| 80 } |
| 81 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
| 82 m_pattern_.length(), |
| 83 m_text_.getBuffer(), |
| 84 m_text_.length(), coll->ucollator, |
| 85 (UBreakIterator *)breakiter, |
| 86 &status); |
| 87 uprv_free(m_search_); |
| 88 m_search_ = NULL; |
| 89 |
| 90 if (U_SUCCESS(status)) { |
| 91 // Alias the collator |
| 92 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| 93 // m_search_ has been created by the base SearchIterator class |
| 94 m_search_ = m_strsrch_->search; |
| 95 } |
| 96 } |
| 97 |
| 98 StringSearch::StringSearch(const UnicodeString &pattern, |
| 99 CharacterIterator &text, |
| 100 const Locale &locale, |
| 101 BreakIterator *breakiter, |
| 102 UErrorCode &status) : |
| 103 SearchIterator(text, breakiter), |
| 104 m_collator_(), |
| 105 m_pattern_(pattern) |
| 106 { |
| 107 if (U_FAILURE(status)) { |
| 108 m_strsrch_ = NULL; |
| 109 return; |
| 110 } |
| 111 m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), |
| 112 m_text_.getBuffer(), m_text_.length(), |
| 113 locale.getName(), (UBreakIterator *)breakiter, |
| 114 &status); |
| 115 uprv_free(m_search_); |
| 116 m_search_ = NULL; |
| 117 |
| 118 if (U_SUCCESS(status)) { |
| 119 // Alias the collator |
| 120 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| 121 // m_search_ has been created by the base SearchIterator class |
| 122 m_search_ = m_strsrch_->search; |
| 123 } |
| 124 } |
| 125 |
| 126 StringSearch::StringSearch(const UnicodeString &pattern, |
| 127 CharacterIterator &text, |
| 128 RuleBasedCollator *coll, |
| 129 BreakIterator *breakiter, |
| 130 UErrorCode &status) : |
| 131 SearchIterator(text, breakiter), |
| 132 m_collator_(), |
| 133 m_pattern_(pattern) |
| 134 { |
| 135 if (U_FAILURE(status)) { |
| 136 m_strsrch_ = NULL; |
| 137 return; |
| 138 } |
| 139 if (coll == NULL) { |
| 140 status = U_ILLEGAL_ARGUMENT_ERROR; |
| 141 m_strsrch_ = NULL; |
| 142 return; |
| 143 } |
| 144 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
| 145 m_pattern_.length(), |
| 146 m_text_.getBuffer(), |
| 147 m_text_.length(), coll->ucollator, |
| 148 (UBreakIterator *)breakiter, |
| 149 &status); |
| 150 uprv_free(m_search_); |
| 151 m_search_ = NULL; |
| 152 |
| 153 if (U_SUCCESS(status)) { |
| 154 // Alias the collator |
| 155 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| 156 // m_search_ has been created by the base SearchIterator class |
| 157 m_search_ = m_strsrch_->search; |
| 158 } |
| 159 } |
| 160 |
| 161 StringSearch::StringSearch(const StringSearch &that) : |
| 162 SearchIterator(that.m_text_, that.m_breakiterator_), |
| 163 m_collator_(), |
| 164 m_pattern_(that.m_pattern_) |
| 165 { |
| 166 UErrorCode status = U_ZERO_ERROR; |
| 167 |
| 168 // Free m_search_ from the superclass |
| 169 uprv_free(m_search_); |
| 170 m_search_ = NULL; |
| 171 |
| 172 if (that.m_strsrch_ == NULL) { |
| 173 // This was not a good copy |
| 174 m_strsrch_ = NULL; |
| 175 } |
| 176 else { |
| 177 // Make a deep copy |
| 178 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
| 179 m_pattern_.length(), |
| 180 m_text_.getBuffer(), |
| 181 m_text_.length(), |
| 182 that.m_strsrch_->collator, |
| 183 (UBreakIterator *)that.m_breakitera
tor_, |
| 184 &status); |
| 185 if (U_SUCCESS(status)) { |
| 186 // Alias the collator |
| 187 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| 188 // m_search_ has been created by the base SearchIterator class |
| 189 m_search_ = m_strsrch_->search; |
| 190 } |
| 191 } |
| 192 } |
| 193 |
| 194 StringSearch::~StringSearch() |
| 195 { |
| 196 if (m_strsrch_ != NULL) { |
| 197 usearch_close(m_strsrch_); |
| 198 m_search_ = NULL; |
| 199 } |
| 200 } |
| 201 |
| 202 StringSearch * |
| 203 StringSearch::clone() const { |
| 204 return new StringSearch(*this); |
| 205 } |
| 206 |
| 207 // operator overloading --------------------------------------------- |
| 208 StringSearch & StringSearch::operator=(const StringSearch &that) |
| 209 { |
| 210 if ((*this) != that) { |
| 211 UErrorCode status = U_ZERO_ERROR; |
| 212 m_text_ = that.m_text_; |
| 213 m_breakiterator_ = that.m_breakiterator_; |
| 214 m_pattern_ = that.m_pattern_; |
| 215 // all m_search_ in the parent class is linked up with m_strsrch_ |
| 216 usearch_close(m_strsrch_); |
| 217 m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
| 218 m_pattern_.length(), |
| 219 m_text_.getBuffer(), |
| 220 m_text_.length(), |
| 221 that.m_strsrch_->collator, |
| 222 NULL, &status); |
| 223 // Check null pointer |
| 224 if (m_strsrch_ != NULL) { |
| 225 // Alias the collator |
| 226 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| 227 m_search_ = m_strsrch_->search; |
| 228 } |
| 229 } |
| 230 return *this; |
| 231 } |
| 232 |
| 233 UBool StringSearch::operator==(const SearchIterator &that) const |
| 234 { |
| 235 if (this == &that) { |
| 236 return TRUE; |
| 237 } |
| 238 if (SearchIterator::operator ==(that)) { |
| 239 StringSearch &thatsrch = (StringSearch &)that; |
| 240 return (this->m_pattern_ == thatsrch.m_pattern_ && |
| 241 this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); |
| 242 } |
| 243 return FALSE; |
| 244 } |
| 245 |
| 246 // public get and set methods ---------------------------------------- |
| 247 |
| 248 void StringSearch::setOffset(int32_t position, UErrorCode &status) |
| 249 { |
| 250 // status checked in usearch_setOffset |
| 251 usearch_setOffset(m_strsrch_, position, &status); |
| 252 } |
| 253 |
| 254 int32_t StringSearch::getOffset(void) const |
| 255 { |
| 256 return usearch_getOffset(m_strsrch_); |
| 257 } |
| 258 |
| 259 void StringSearch::setText(const UnicodeString &text, UErrorCode &status) |
| 260 { |
| 261 if (U_SUCCESS(status)) { |
| 262 m_text_ = text; |
| 263 usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); |
| 264 } |
| 265 } |
| 266 |
| 267 void StringSearch::setText(CharacterIterator &text, UErrorCode &status) |
| 268 { |
| 269 if (U_SUCCESS(status)) { |
| 270 text.getText(m_text_); |
| 271 usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &stat
us); |
| 272 } |
| 273 } |
| 274 |
| 275 RuleBasedCollator * StringSearch::getCollator() const |
| 276 { |
| 277 return (RuleBasedCollator *)&m_collator_; |
| 278 } |
| 279 |
| 280 void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) |
| 281 { |
| 282 if (U_SUCCESS(status)) { |
| 283 usearch_setCollator(m_strsrch_, coll->getUCollator(), &status); |
| 284 // Alias the collator |
| 285 m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
| 286 } |
| 287 } |
| 288 |
| 289 void StringSearch::setPattern(const UnicodeString &pattern, |
| 290 UErrorCode &status) |
| 291 { |
| 292 if (U_SUCCESS(status)) { |
| 293 m_pattern_ = pattern; |
| 294 usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length
(), |
| 295 &status); |
| 296 } |
| 297 } |
| 298 |
| 299 const UnicodeString & StringSearch::getPattern() const |
| 300 { |
| 301 return m_pattern_; |
| 302 } |
| 303 |
| 304 // public methods ---------------------------------------------------- |
| 305 |
| 306 void StringSearch::reset() |
| 307 { |
| 308 usearch_reset(m_strsrch_); |
| 309 } |
| 310 |
| 311 SearchIterator * StringSearch::safeClone(void) const |
| 312 { |
| 313 UErrorCode status = U_ZERO_ERROR; |
| 314 StringSearch *result = new StringSearch(m_pattern_, m_text_, |
| 315 (RuleBasedCollator *)&m_collator_, |
| 316 m_breakiterator_, |
| 317 status); |
| 318 /* test for NULL */ |
| 319 if (result == 0) { |
| 320 status = U_MEMORY_ALLOCATION_ERROR; |
| 321 return 0; |
| 322 } |
| 323 result->setOffset(getOffset(), status); |
| 324 result->setMatchStart(m_strsrch_->search->matchedIndex); |
| 325 result->setMatchLength(m_strsrch_->search->matchedLength); |
| 326 if (U_FAILURE(status)) { |
| 327 return NULL; |
| 328 } |
| 329 return result; |
| 330 } |
| 331 |
| 332 // protected method ------------------------------------------------- |
| 333 |
| 334 int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) |
| 335 { |
| 336 // values passed here are already in the pre-shift position |
| 337 if (U_SUCCESS(status)) { |
| 338 if (m_strsrch_->pattern.CELength == 0) { |
| 339 m_search_->matchedIndex = |
| 340 m_search_->matchedIndex == USEARCH_DONE ? |
| 341 getOffset() : m_search_->matchedIndex + 1; |
| 342 m_search_->matchedLength = 0; |
| 343 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, |
| 344 &status); |
| 345 if (m_search_->matchedIndex == m_search_->textLength) { |
| 346 m_search_->matchedIndex = USEARCH_DONE; |
| 347 } |
| 348 } |
| 349 else { |
| 350 // looking at usearch.cpp, this part is shifted out to |
| 351 // StringSearch instead of SearchIterator because m_strsrch_ is |
| 352 // not accessible in SearchIterator |
| 353 #if 0 |
| 354 if (position + m_strsrch_->pattern.defaultShiftSize |
| 355 > m_search_->textLength) { |
| 356 setMatchNotFound(); |
| 357 return USEARCH_DONE; |
| 358 } |
| 359 #endif |
| 360 if (m_search_->matchedLength <= 0) { |
| 361 // the flipping direction issue has already been handled |
| 362 // in next() |
| 363 // for boundary check purposes. this will ensure that the |
| 364 // next match will not preceed the current offset |
| 365 // note search->matchedIndex will always be set to something |
| 366 // in the code |
| 367 m_search_->matchedIndex = position - 1; |
| 368 } |
| 369 |
| 370 ucol_setOffset(m_strsrch_->textIter, position, &status); |
| 371 |
| 372 #if 0 |
| 373 for (;;) { |
| 374 if (m_search_->isCanonicalMatch) { |
| 375 // can't use exact here since extra accents are allowed. |
| 376 usearch_handleNextCanonical(m_strsrch_, &status); |
| 377 } |
| 378 else { |
| 379 usearch_handleNextExact(m_strsrch_, &status); |
| 380 } |
| 381 if (U_FAILURE(status)) { |
| 382 return USEARCH_DONE; |
| 383 } |
| 384 if (m_breakiterator_ == NULL |
| 385 #if !UCONFIG_NO_BREAK_ITERATION |
| 386 || |
| 387 m_search_->matchedIndex == USEARCH_DONE || |
| 388 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && |
| 389 m_breakiterator_->isBoundary(m_search_->matchedIndex + |
| 390 m_search_->matchedLength)) |
| 391 #endif |
| 392 ) { |
| 393 if (m_search_->matchedIndex == USEARCH_DONE) { |
| 394 ucol_setOffset(m_strsrch_->textIter, |
| 395 m_search_->textLength, &status); |
| 396 } |
| 397 else { |
| 398 ucol_setOffset(m_strsrch_->textIter, |
| 399 m_search_->matchedIndex, &status); |
| 400 } |
| 401 return m_search_->matchedIndex; |
| 402 } |
| 403 } |
| 404 #else |
| 405 // if m_strsrch_->breakIter is always the same as m_breakiterator_ |
| 406 // then we don't need to check the match boundaries here because |
| 407 // usearch_handleNextXXX will already have done it. |
| 408 if (m_search_->isCanonicalMatch) { |
| 409 // *could* actually use exact here 'cause no extra accents allow
ed... |
| 410 usearch_handleNextCanonical(m_strsrch_, &status); |
| 411 } else { |
| 412 usearch_handleNextExact(m_strsrch_, &status); |
| 413 } |
| 414 |
| 415 if (U_FAILURE(status)) { |
| 416 return USEARCH_DONE; |
| 417 } |
| 418 |
| 419 if (m_search_->matchedIndex == USEARCH_DONE) { |
| 420 ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &sta
tus); |
| 421 } else { |
| 422 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &s
tatus); |
| 423 } |
| 424 |
| 425 return m_search_->matchedIndex; |
| 426 #endif |
| 427 } |
| 428 } |
| 429 return USEARCH_DONE; |
| 430 } |
| 431 |
| 432 int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) |
| 433 { |
| 434 // values passed here are already in the pre-shift position |
| 435 if (U_SUCCESS(status)) { |
| 436 if (m_strsrch_->pattern.CELength == 0) { |
| 437 m_search_->matchedIndex = |
| 438 (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : |
| 439 m_search_->matchedIndex); |
| 440 if (m_search_->matchedIndex == 0) { |
| 441 setMatchNotFound(); |
| 442 } |
| 443 else { |
| 444 m_search_->matchedIndex --; |
| 445 ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, |
| 446 &status); |
| 447 m_search_->matchedLength = 0; |
| 448 } |
| 449 } |
| 450 else { |
| 451 // looking at usearch.cpp, this part is shifted out to |
| 452 // StringSearch instead of SearchIterator because m_strsrch_ is |
| 453 // not accessible in SearchIterator |
| 454 #if 0 |
| 455 if (!m_search_->isOverlap && |
| 456 position - m_strsrch_->pattern.defaultShiftSize < 0) { |
| 457 setMatchNotFound(); |
| 458 return USEARCH_DONE; |
| 459 } |
| 460 |
| 461 for (;;) { |
| 462 if (m_search_->isCanonicalMatch) { |
| 463 // can't use exact here since extra accents are allowed. |
| 464 usearch_handlePreviousCanonical(m_strsrch_, &status); |
| 465 } |
| 466 else { |
| 467 usearch_handlePreviousExact(m_strsrch_, &status); |
| 468 } |
| 469 if (U_FAILURE(status)) { |
| 470 return USEARCH_DONE; |
| 471 } |
| 472 if (m_breakiterator_ == NULL |
| 473 #if !UCONFIG_NO_BREAK_ITERATION |
| 474 || |
| 475 m_search_->matchedIndex == USEARCH_DONE || |
| 476 (m_breakiterator_->isBoundary(m_search_->matchedIndex) && |
| 477 m_breakiterator_->isBoundary(m_search_->matchedIndex + |
| 478 m_search_->matchedLength)) |
| 479 #endif |
| 480 ) { |
| 481 return m_search_->matchedIndex; |
| 482 } |
| 483 } |
| 484 #else |
| 485 ucol_setOffset(m_strsrch_->textIter, position, &status); |
| 486 |
| 487 if (m_search_->isCanonicalMatch) { |
| 488 // *could* use exact match here since extra accents *not* allowe
d! |
| 489 usearch_handlePreviousCanonical(m_strsrch_, &status); |
| 490 } else { |
| 491 usearch_handlePreviousExact(m_strsrch_, &status); |
| 492 } |
| 493 |
| 494 if (U_FAILURE(status)) { |
| 495 return USEARCH_DONE; |
| 496 } |
| 497 |
| 498 return m_search_->matchedIndex; |
| 499 #endif |
| 500 } |
| 501 |
| 502 return m_search_->matchedIndex; |
| 503 } |
| 504 return USEARCH_DONE; |
| 505 } |
| 506 |
| 507 U_NAMESPACE_END |
| 508 |
| 509 #endif /* #if !UCONFIG_NO_COLLATION */ |
OLD | NEW |