Index: icu46/source/i18n/stsearch.cpp |
=================================================================== |
--- icu46/source/i18n/stsearch.cpp (revision 0) |
+++ icu46/source/i18n/stsearch.cpp (revision 0) |
@@ -0,0 +1,509 @@ |
+/* |
+********************************************************************** |
+* Copyright (C) 2001-2008 IBM and others. All rights reserved. |
+********************************************************************** |
+* Date Name Description |
+* 03/22/2000 helena Creation. |
+********************************************************************** |
+*/ |
+ |
+#include "unicode/utypes.h" |
+ |
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION |
+ |
+#include "unicode/stsearch.h" |
+#include "usrchimp.h" |
+#include "cmemory.h" |
+ |
+U_NAMESPACE_BEGIN |
+ |
+UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch) |
+ |
+// public constructors and destructors ----------------------------------- |
+ |
+StringSearch::StringSearch(const UnicodeString &pattern, |
+ const UnicodeString &text, |
+ const Locale &locale, |
+ BreakIterator *breakiter, |
+ UErrorCode &status) : |
+ SearchIterator(text, breakiter), |
+ m_collator_(), |
+ m_pattern_(pattern) |
+{ |
+ if (U_FAILURE(status)) { |
+ m_strsrch_ = NULL; |
+ return; |
+ } |
+ |
+ m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), |
+ m_text_.getBuffer(), m_text_.length(), |
+ locale.getName(), (UBreakIterator *)breakiter, |
+ &status); |
+ uprv_free(m_search_); |
+ m_search_ = NULL; |
+ |
+ // !!! dlf m_collator_ is an odd beast. basically it is an aliasing |
+ // wrapper around the internal collator and rules, which (here) are |
+ // owned by this stringsearch object. this means 1) it's destructor |
+ // _should not_ delete the ucollator or rules, and 2) changes made |
+ // to the exposed collator (setStrength etc) _should_ modify the |
+ // ucollator. thus the collator is not a copy-on-write alias, and it |
+ // needs to distinguish itself not merely from 'stand alone' colators |
+ // but also from copy-on-write ones. it needs additional state, which |
+ // setUCollator should set. |
+ |
+ if (U_SUCCESS(status)) { |
+ // Alias the collator |
+ m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
+ // m_search_ has been created by the base SearchIterator class |
+ m_search_ = m_strsrch_->search; |
+ } |
+} |
+ |
+StringSearch::StringSearch(const UnicodeString &pattern, |
+ const UnicodeString &text, |
+ RuleBasedCollator *coll, |
+ BreakIterator *breakiter, |
+ UErrorCode &status) : |
+ SearchIterator(text, breakiter), |
+ m_collator_(), |
+ m_pattern_(pattern) |
+{ |
+ if (U_FAILURE(status)) { |
+ m_strsrch_ = NULL; |
+ return; |
+ } |
+ if (coll == NULL) { |
+ status = U_ILLEGAL_ARGUMENT_ERROR; |
+ m_strsrch_ = NULL; |
+ return; |
+ } |
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
+ m_pattern_.length(), |
+ m_text_.getBuffer(), |
+ m_text_.length(), coll->ucollator, |
+ (UBreakIterator *)breakiter, |
+ &status); |
+ uprv_free(m_search_); |
+ m_search_ = NULL; |
+ |
+ if (U_SUCCESS(status)) { |
+ // Alias the collator |
+ m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
+ // m_search_ has been created by the base SearchIterator class |
+ m_search_ = m_strsrch_->search; |
+ } |
+} |
+ |
+StringSearch::StringSearch(const UnicodeString &pattern, |
+ CharacterIterator &text, |
+ const Locale &locale, |
+ BreakIterator *breakiter, |
+ UErrorCode &status) : |
+ SearchIterator(text, breakiter), |
+ m_collator_(), |
+ m_pattern_(pattern) |
+{ |
+ if (U_FAILURE(status)) { |
+ m_strsrch_ = NULL; |
+ return; |
+ } |
+ m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(), |
+ m_text_.getBuffer(), m_text_.length(), |
+ locale.getName(), (UBreakIterator *)breakiter, |
+ &status); |
+ uprv_free(m_search_); |
+ m_search_ = NULL; |
+ |
+ if (U_SUCCESS(status)) { |
+ // Alias the collator |
+ m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
+ // m_search_ has been created by the base SearchIterator class |
+ m_search_ = m_strsrch_->search; |
+ } |
+} |
+ |
+StringSearch::StringSearch(const UnicodeString &pattern, |
+ CharacterIterator &text, |
+ RuleBasedCollator *coll, |
+ BreakIterator *breakiter, |
+ UErrorCode &status) : |
+ SearchIterator(text, breakiter), |
+ m_collator_(), |
+ m_pattern_(pattern) |
+{ |
+ if (U_FAILURE(status)) { |
+ m_strsrch_ = NULL; |
+ return; |
+ } |
+ if (coll == NULL) { |
+ status = U_ILLEGAL_ARGUMENT_ERROR; |
+ m_strsrch_ = NULL; |
+ return; |
+ } |
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
+ m_pattern_.length(), |
+ m_text_.getBuffer(), |
+ m_text_.length(), coll->ucollator, |
+ (UBreakIterator *)breakiter, |
+ &status); |
+ uprv_free(m_search_); |
+ m_search_ = NULL; |
+ |
+ if (U_SUCCESS(status)) { |
+ // Alias the collator |
+ m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
+ // m_search_ has been created by the base SearchIterator class |
+ m_search_ = m_strsrch_->search; |
+ } |
+} |
+ |
+StringSearch::StringSearch(const StringSearch &that) : |
+ SearchIterator(that.m_text_, that.m_breakiterator_), |
+ m_collator_(), |
+ m_pattern_(that.m_pattern_) |
+{ |
+ UErrorCode status = U_ZERO_ERROR; |
+ |
+ // Free m_search_ from the superclass |
+ uprv_free(m_search_); |
+ m_search_ = NULL; |
+ |
+ if (that.m_strsrch_ == NULL) { |
+ // This was not a good copy |
+ m_strsrch_ = NULL; |
+ } |
+ else { |
+ // Make a deep copy |
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
+ m_pattern_.length(), |
+ m_text_.getBuffer(), |
+ m_text_.length(), |
+ that.m_strsrch_->collator, |
+ (UBreakIterator *)that.m_breakiterator_, |
+ &status); |
+ if (U_SUCCESS(status)) { |
+ // Alias the collator |
+ m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
+ // m_search_ has been created by the base SearchIterator class |
+ m_search_ = m_strsrch_->search; |
+ } |
+ } |
+} |
+ |
+StringSearch::~StringSearch() |
+{ |
+ if (m_strsrch_ != NULL) { |
+ usearch_close(m_strsrch_); |
+ m_search_ = NULL; |
+ } |
+} |
+ |
+StringSearch * |
+StringSearch::clone() const { |
+ return new StringSearch(*this); |
+} |
+ |
+// operator overloading --------------------------------------------- |
+StringSearch & StringSearch::operator=(const StringSearch &that) |
+{ |
+ if ((*this) != that) { |
+ UErrorCode status = U_ZERO_ERROR; |
+ m_text_ = that.m_text_; |
+ m_breakiterator_ = that.m_breakiterator_; |
+ m_pattern_ = that.m_pattern_; |
+ // all m_search_ in the parent class is linked up with m_strsrch_ |
+ usearch_close(m_strsrch_); |
+ m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(), |
+ m_pattern_.length(), |
+ m_text_.getBuffer(), |
+ m_text_.length(), |
+ that.m_strsrch_->collator, |
+ NULL, &status); |
+ // Check null pointer |
+ if (m_strsrch_ != NULL) { |
+ // Alias the collator |
+ m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
+ m_search_ = m_strsrch_->search; |
+ } |
+ } |
+ return *this; |
+} |
+ |
+UBool StringSearch::operator==(const SearchIterator &that) const |
+{ |
+ if (this == &that) { |
+ return TRUE; |
+ } |
+ if (SearchIterator::operator ==(that)) { |
+ StringSearch &thatsrch = (StringSearch &)that; |
+ return (this->m_pattern_ == thatsrch.m_pattern_ && |
+ this->m_strsrch_->collator == thatsrch.m_strsrch_->collator); |
+ } |
+ return FALSE; |
+} |
+ |
+// public get and set methods ---------------------------------------- |
+ |
+void StringSearch::setOffset(int32_t position, UErrorCode &status) |
+{ |
+ // status checked in usearch_setOffset |
+ usearch_setOffset(m_strsrch_, position, &status); |
+} |
+ |
+int32_t StringSearch::getOffset(void) const |
+{ |
+ return usearch_getOffset(m_strsrch_); |
+} |
+ |
+void StringSearch::setText(const UnicodeString &text, UErrorCode &status) |
+{ |
+ if (U_SUCCESS(status)) { |
+ m_text_ = text; |
+ usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status); |
+ } |
+} |
+ |
+void StringSearch::setText(CharacterIterator &text, UErrorCode &status) |
+{ |
+ if (U_SUCCESS(status)) { |
+ text.getText(m_text_); |
+ usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status); |
+ } |
+} |
+ |
+RuleBasedCollator * StringSearch::getCollator() const |
+{ |
+ return (RuleBasedCollator *)&m_collator_; |
+} |
+ |
+void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status) |
+{ |
+ if (U_SUCCESS(status)) { |
+ usearch_setCollator(m_strsrch_, coll->getUCollator(), &status); |
+ // Alias the collator |
+ m_collator_.setUCollator((UCollator *)m_strsrch_->collator); |
+ } |
+} |
+ |
+void StringSearch::setPattern(const UnicodeString &pattern, |
+ UErrorCode &status) |
+{ |
+ if (U_SUCCESS(status)) { |
+ m_pattern_ = pattern; |
+ usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(), |
+ &status); |
+ } |
+} |
+ |
+const UnicodeString & StringSearch::getPattern() const |
+{ |
+ return m_pattern_; |
+} |
+ |
+// public methods ---------------------------------------------------- |
+ |
+void StringSearch::reset() |
+{ |
+ usearch_reset(m_strsrch_); |
+} |
+ |
+SearchIterator * StringSearch::safeClone(void) const |
+{ |
+ UErrorCode status = U_ZERO_ERROR; |
+ StringSearch *result = new StringSearch(m_pattern_, m_text_, |
+ (RuleBasedCollator *)&m_collator_, |
+ m_breakiterator_, |
+ status); |
+ /* test for NULL */ |
+ if (result == 0) { |
+ status = U_MEMORY_ALLOCATION_ERROR; |
+ return 0; |
+ } |
+ result->setOffset(getOffset(), status); |
+ result->setMatchStart(m_strsrch_->search->matchedIndex); |
+ result->setMatchLength(m_strsrch_->search->matchedLength); |
+ if (U_FAILURE(status)) { |
+ return NULL; |
+ } |
+ return result; |
+} |
+ |
+// protected method ------------------------------------------------- |
+ |
+int32_t StringSearch::handleNext(int32_t position, UErrorCode &status) |
+{ |
+ // values passed here are already in the pre-shift position |
+ if (U_SUCCESS(status)) { |
+ if (m_strsrch_->pattern.CELength == 0) { |
+ m_search_->matchedIndex = |
+ m_search_->matchedIndex == USEARCH_DONE ? |
+ getOffset() : m_search_->matchedIndex + 1; |
+ m_search_->matchedLength = 0; |
+ ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, |
+ &status); |
+ if (m_search_->matchedIndex == m_search_->textLength) { |
+ m_search_->matchedIndex = USEARCH_DONE; |
+ } |
+ } |
+ else { |
+ // looking at usearch.cpp, this part is shifted out to |
+ // StringSearch instead of SearchIterator because m_strsrch_ is |
+ // not accessible in SearchIterator |
+#if 0 |
+ if (position + m_strsrch_->pattern.defaultShiftSize |
+ > m_search_->textLength) { |
+ setMatchNotFound(); |
+ return USEARCH_DONE; |
+ } |
+#endif |
+ if (m_search_->matchedLength <= 0) { |
+ // the flipping direction issue has already been handled |
+ // in next() |
+ // for boundary check purposes. this will ensure that the |
+ // next match will not preceed the current offset |
+ // note search->matchedIndex will always be set to something |
+ // in the code |
+ m_search_->matchedIndex = position - 1; |
+ } |
+ |
+ ucol_setOffset(m_strsrch_->textIter, position, &status); |
+ |
+#if 0 |
+ for (;;) { |
+ if (m_search_->isCanonicalMatch) { |
+ // can't use exact here since extra accents are allowed. |
+ usearch_handleNextCanonical(m_strsrch_, &status); |
+ } |
+ else { |
+ usearch_handleNextExact(m_strsrch_, &status); |
+ } |
+ if (U_FAILURE(status)) { |
+ return USEARCH_DONE; |
+ } |
+ if (m_breakiterator_ == NULL |
+#if !UCONFIG_NO_BREAK_ITERATION |
+ || |
+ m_search_->matchedIndex == USEARCH_DONE || |
+ (m_breakiterator_->isBoundary(m_search_->matchedIndex) && |
+ m_breakiterator_->isBoundary(m_search_->matchedIndex + |
+ m_search_->matchedLength)) |
+#endif |
+ ) { |
+ if (m_search_->matchedIndex == USEARCH_DONE) { |
+ ucol_setOffset(m_strsrch_->textIter, |
+ m_search_->textLength, &status); |
+ } |
+ else { |
+ ucol_setOffset(m_strsrch_->textIter, |
+ m_search_->matchedIndex, &status); |
+ } |
+ return m_search_->matchedIndex; |
+ } |
+ } |
+#else |
+ // if m_strsrch_->breakIter is always the same as m_breakiterator_ |
+ // then we don't need to check the match boundaries here because |
+ // usearch_handleNextXXX will already have done it. |
+ if (m_search_->isCanonicalMatch) { |
+ // *could* actually use exact here 'cause no extra accents allowed... |
+ usearch_handleNextCanonical(m_strsrch_, &status); |
+ } else { |
+ usearch_handleNextExact(m_strsrch_, &status); |
+ } |
+ |
+ if (U_FAILURE(status)) { |
+ return USEARCH_DONE; |
+ } |
+ |
+ if (m_search_->matchedIndex == USEARCH_DONE) { |
+ ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status); |
+ } else { |
+ ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status); |
+ } |
+ |
+ return m_search_->matchedIndex; |
+#endif |
+ } |
+ } |
+ return USEARCH_DONE; |
+} |
+ |
+int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status) |
+{ |
+ // values passed here are already in the pre-shift position |
+ if (U_SUCCESS(status)) { |
+ if (m_strsrch_->pattern.CELength == 0) { |
+ m_search_->matchedIndex = |
+ (m_search_->matchedIndex == USEARCH_DONE ? getOffset() : |
+ m_search_->matchedIndex); |
+ if (m_search_->matchedIndex == 0) { |
+ setMatchNotFound(); |
+ } |
+ else { |
+ m_search_->matchedIndex --; |
+ ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, |
+ &status); |
+ m_search_->matchedLength = 0; |
+ } |
+ } |
+ else { |
+ // looking at usearch.cpp, this part is shifted out to |
+ // StringSearch instead of SearchIterator because m_strsrch_ is |
+ // not accessible in SearchIterator |
+#if 0 |
+ if (!m_search_->isOverlap && |
+ position - m_strsrch_->pattern.defaultShiftSize < 0) { |
+ setMatchNotFound(); |
+ return USEARCH_DONE; |
+ } |
+ |
+ for (;;) { |
+ if (m_search_->isCanonicalMatch) { |
+ // can't use exact here since extra accents are allowed. |
+ usearch_handlePreviousCanonical(m_strsrch_, &status); |
+ } |
+ else { |
+ usearch_handlePreviousExact(m_strsrch_, &status); |
+ } |
+ if (U_FAILURE(status)) { |
+ return USEARCH_DONE; |
+ } |
+ if (m_breakiterator_ == NULL |
+#if !UCONFIG_NO_BREAK_ITERATION |
+ || |
+ m_search_->matchedIndex == USEARCH_DONE || |
+ (m_breakiterator_->isBoundary(m_search_->matchedIndex) && |
+ m_breakiterator_->isBoundary(m_search_->matchedIndex + |
+ m_search_->matchedLength)) |
+#endif |
+ ) { |
+ return m_search_->matchedIndex; |
+ } |
+ } |
+#else |
+ ucol_setOffset(m_strsrch_->textIter, position, &status); |
+ |
+ if (m_search_->isCanonicalMatch) { |
+ // *could* use exact match here since extra accents *not* allowed! |
+ usearch_handlePreviousCanonical(m_strsrch_, &status); |
+ } else { |
+ usearch_handlePreviousExact(m_strsrch_, &status); |
+ } |
+ |
+ if (U_FAILURE(status)) { |
+ return USEARCH_DONE; |
+ } |
+ |
+ return m_search_->matchedIndex; |
+#endif |
+ } |
+ |
+ return m_search_->matchedIndex; |
+ } |
+ return USEARCH_DONE; |
+} |
+ |
+U_NAMESPACE_END |
+ |
+#endif /* #if !UCONFIG_NO_COLLATION */ |
Property changes on: icu46/source/i18n/stsearch.cpp |
___________________________________________________________________ |
Added: svn:eol-style |
+ LF |