| Index: icu46/source/i18n/stsearch.cpp
|
| ===================================================================
|
| --- icu46/source/i18n/stsearch.cpp (revision 0)
|
| +++ icu46/source/i18n/stsearch.cpp (revision 0)
|
| @@ -0,0 +1,509 @@
|
| +/*
|
| +**********************************************************************
|
| +* Copyright (C) 2001-2008 IBM and others. All rights reserved.
|
| +**********************************************************************
|
| +* Date Name Description
|
| +* 03/22/2000 helena Creation.
|
| +**********************************************************************
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
|
| +
|
| +#include "unicode/stsearch.h"
|
| +#include "usrchimp.h"
|
| +#include "cmemory.h"
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringSearch)
|
| +
|
| +// public constructors and destructors -----------------------------------
|
| +
|
| +StringSearch::StringSearch(const UnicodeString &pattern,
|
| + const UnicodeString &text,
|
| + const Locale &locale,
|
| + BreakIterator *breakiter,
|
| + UErrorCode &status) :
|
| + SearchIterator(text, breakiter),
|
| + m_collator_(),
|
| + m_pattern_(pattern)
|
| +{
|
| + if (U_FAILURE(status)) {
|
| + m_strsrch_ = NULL;
|
| + return;
|
| + }
|
| +
|
| + m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
|
| + m_text_.getBuffer(), m_text_.length(),
|
| + locale.getName(), (UBreakIterator *)breakiter,
|
| + &status);
|
| + uprv_free(m_search_);
|
| + m_search_ = NULL;
|
| +
|
| + // !!! dlf m_collator_ is an odd beast. basically it is an aliasing
|
| + // wrapper around the internal collator and rules, which (here) are
|
| + // owned by this stringsearch object. this means 1) it's destructor
|
| + // _should not_ delete the ucollator or rules, and 2) changes made
|
| + // to the exposed collator (setStrength etc) _should_ modify the
|
| + // ucollator. thus the collator is not a copy-on-write alias, and it
|
| + // needs to distinguish itself not merely from 'stand alone' colators
|
| + // but also from copy-on-write ones. it needs additional state, which
|
| + // setUCollator should set.
|
| +
|
| + if (U_SUCCESS(status)) {
|
| + // Alias the collator
|
| + m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
|
| + // m_search_ has been created by the base SearchIterator class
|
| + m_search_ = m_strsrch_->search;
|
| + }
|
| +}
|
| +
|
| +StringSearch::StringSearch(const UnicodeString &pattern,
|
| + const UnicodeString &text,
|
| + RuleBasedCollator *coll,
|
| + BreakIterator *breakiter,
|
| + UErrorCode &status) :
|
| + SearchIterator(text, breakiter),
|
| + m_collator_(),
|
| + m_pattern_(pattern)
|
| +{
|
| + if (U_FAILURE(status)) {
|
| + m_strsrch_ = NULL;
|
| + return;
|
| + }
|
| + if (coll == NULL) {
|
| + status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + m_strsrch_ = NULL;
|
| + return;
|
| + }
|
| + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
|
| + m_pattern_.length(),
|
| + m_text_.getBuffer(),
|
| + m_text_.length(), coll->ucollator,
|
| + (UBreakIterator *)breakiter,
|
| + &status);
|
| + uprv_free(m_search_);
|
| + m_search_ = NULL;
|
| +
|
| + if (U_SUCCESS(status)) {
|
| + // Alias the collator
|
| + m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
|
| + // m_search_ has been created by the base SearchIterator class
|
| + m_search_ = m_strsrch_->search;
|
| + }
|
| +}
|
| +
|
| +StringSearch::StringSearch(const UnicodeString &pattern,
|
| + CharacterIterator &text,
|
| + const Locale &locale,
|
| + BreakIterator *breakiter,
|
| + UErrorCode &status) :
|
| + SearchIterator(text, breakiter),
|
| + m_collator_(),
|
| + m_pattern_(pattern)
|
| +{
|
| + if (U_FAILURE(status)) {
|
| + m_strsrch_ = NULL;
|
| + return;
|
| + }
|
| + m_strsrch_ = usearch_open(m_pattern_.getBuffer(), m_pattern_.length(),
|
| + m_text_.getBuffer(), m_text_.length(),
|
| + locale.getName(), (UBreakIterator *)breakiter,
|
| + &status);
|
| + uprv_free(m_search_);
|
| + m_search_ = NULL;
|
| +
|
| + if (U_SUCCESS(status)) {
|
| + // Alias the collator
|
| + m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
|
| + // m_search_ has been created by the base SearchIterator class
|
| + m_search_ = m_strsrch_->search;
|
| + }
|
| +}
|
| +
|
| +StringSearch::StringSearch(const UnicodeString &pattern,
|
| + CharacterIterator &text,
|
| + RuleBasedCollator *coll,
|
| + BreakIterator *breakiter,
|
| + UErrorCode &status) :
|
| + SearchIterator(text, breakiter),
|
| + m_collator_(),
|
| + m_pattern_(pattern)
|
| +{
|
| + if (U_FAILURE(status)) {
|
| + m_strsrch_ = NULL;
|
| + return;
|
| + }
|
| + if (coll == NULL) {
|
| + status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + m_strsrch_ = NULL;
|
| + return;
|
| + }
|
| + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
|
| + m_pattern_.length(),
|
| + m_text_.getBuffer(),
|
| + m_text_.length(), coll->ucollator,
|
| + (UBreakIterator *)breakiter,
|
| + &status);
|
| + uprv_free(m_search_);
|
| + m_search_ = NULL;
|
| +
|
| + if (U_SUCCESS(status)) {
|
| + // Alias the collator
|
| + m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
|
| + // m_search_ has been created by the base SearchIterator class
|
| + m_search_ = m_strsrch_->search;
|
| + }
|
| +}
|
| +
|
| +StringSearch::StringSearch(const StringSearch &that) :
|
| + SearchIterator(that.m_text_, that.m_breakiterator_),
|
| + m_collator_(),
|
| + m_pattern_(that.m_pattern_)
|
| +{
|
| + UErrorCode status = U_ZERO_ERROR;
|
| +
|
| + // Free m_search_ from the superclass
|
| + uprv_free(m_search_);
|
| + m_search_ = NULL;
|
| +
|
| + if (that.m_strsrch_ == NULL) {
|
| + // This was not a good copy
|
| + m_strsrch_ = NULL;
|
| + }
|
| + else {
|
| + // Make a deep copy
|
| + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
|
| + m_pattern_.length(),
|
| + m_text_.getBuffer(),
|
| + m_text_.length(),
|
| + that.m_strsrch_->collator,
|
| + (UBreakIterator *)that.m_breakiterator_,
|
| + &status);
|
| + if (U_SUCCESS(status)) {
|
| + // Alias the collator
|
| + m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
|
| + // m_search_ has been created by the base SearchIterator class
|
| + m_search_ = m_strsrch_->search;
|
| + }
|
| + }
|
| +}
|
| +
|
| +StringSearch::~StringSearch()
|
| +{
|
| + if (m_strsrch_ != NULL) {
|
| + usearch_close(m_strsrch_);
|
| + m_search_ = NULL;
|
| + }
|
| +}
|
| +
|
| +StringSearch *
|
| +StringSearch::clone() const {
|
| + return new StringSearch(*this);
|
| +}
|
| +
|
| +// operator overloading ---------------------------------------------
|
| +StringSearch & StringSearch::operator=(const StringSearch &that)
|
| +{
|
| + if ((*this) != that) {
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + m_text_ = that.m_text_;
|
| + m_breakiterator_ = that.m_breakiterator_;
|
| + m_pattern_ = that.m_pattern_;
|
| + // all m_search_ in the parent class is linked up with m_strsrch_
|
| + usearch_close(m_strsrch_);
|
| + m_strsrch_ = usearch_openFromCollator(m_pattern_.getBuffer(),
|
| + m_pattern_.length(),
|
| + m_text_.getBuffer(),
|
| + m_text_.length(),
|
| + that.m_strsrch_->collator,
|
| + NULL, &status);
|
| + // Check null pointer
|
| + if (m_strsrch_ != NULL) {
|
| + // Alias the collator
|
| + m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
|
| + m_search_ = m_strsrch_->search;
|
| + }
|
| + }
|
| + return *this;
|
| +}
|
| +
|
| +UBool StringSearch::operator==(const SearchIterator &that) const
|
| +{
|
| + if (this == &that) {
|
| + return TRUE;
|
| + }
|
| + if (SearchIterator::operator ==(that)) {
|
| + StringSearch &thatsrch = (StringSearch &)that;
|
| + return (this->m_pattern_ == thatsrch.m_pattern_ &&
|
| + this->m_strsrch_->collator == thatsrch.m_strsrch_->collator);
|
| + }
|
| + return FALSE;
|
| +}
|
| +
|
| +// public get and set methods ----------------------------------------
|
| +
|
| +void StringSearch::setOffset(int32_t position, UErrorCode &status)
|
| +{
|
| + // status checked in usearch_setOffset
|
| + usearch_setOffset(m_strsrch_, position, &status);
|
| +}
|
| +
|
| +int32_t StringSearch::getOffset(void) const
|
| +{
|
| + return usearch_getOffset(m_strsrch_);
|
| +}
|
| +
|
| +void StringSearch::setText(const UnicodeString &text, UErrorCode &status)
|
| +{
|
| + if (U_SUCCESS(status)) {
|
| + m_text_ = text;
|
| + usearch_setText(m_strsrch_, text.getBuffer(), text.length(), &status);
|
| + }
|
| +}
|
| +
|
| +void StringSearch::setText(CharacterIterator &text, UErrorCode &status)
|
| +{
|
| + if (U_SUCCESS(status)) {
|
| + text.getText(m_text_);
|
| + usearch_setText(m_strsrch_, m_text_.getBuffer(), m_text_.length(), &status);
|
| + }
|
| +}
|
| +
|
| +RuleBasedCollator * StringSearch::getCollator() const
|
| +{
|
| + return (RuleBasedCollator *)&m_collator_;
|
| +}
|
| +
|
| +void StringSearch::setCollator(RuleBasedCollator *coll, UErrorCode &status)
|
| +{
|
| + if (U_SUCCESS(status)) {
|
| + usearch_setCollator(m_strsrch_, coll->getUCollator(), &status);
|
| + // Alias the collator
|
| + m_collator_.setUCollator((UCollator *)m_strsrch_->collator);
|
| + }
|
| +}
|
| +
|
| +void StringSearch::setPattern(const UnicodeString &pattern,
|
| + UErrorCode &status)
|
| +{
|
| + if (U_SUCCESS(status)) {
|
| + m_pattern_ = pattern;
|
| + usearch_setPattern(m_strsrch_, m_pattern_.getBuffer(), m_pattern_.length(),
|
| + &status);
|
| + }
|
| +}
|
| +
|
| +const UnicodeString & StringSearch::getPattern() const
|
| +{
|
| + return m_pattern_;
|
| +}
|
| +
|
| +// public methods ----------------------------------------------------
|
| +
|
| +void StringSearch::reset()
|
| +{
|
| + usearch_reset(m_strsrch_);
|
| +}
|
| +
|
| +SearchIterator * StringSearch::safeClone(void) const
|
| +{
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + StringSearch *result = new StringSearch(m_pattern_, m_text_,
|
| + (RuleBasedCollator *)&m_collator_,
|
| + m_breakiterator_,
|
| + status);
|
| + /* test for NULL */
|
| + if (result == 0) {
|
| + status = U_MEMORY_ALLOCATION_ERROR;
|
| + return 0;
|
| + }
|
| + result->setOffset(getOffset(), status);
|
| + result->setMatchStart(m_strsrch_->search->matchedIndex);
|
| + result->setMatchLength(m_strsrch_->search->matchedLength);
|
| + if (U_FAILURE(status)) {
|
| + return NULL;
|
| + }
|
| + return result;
|
| +}
|
| +
|
| +// protected method -------------------------------------------------
|
| +
|
| +int32_t StringSearch::handleNext(int32_t position, UErrorCode &status)
|
| +{
|
| + // values passed here are already in the pre-shift position
|
| + if (U_SUCCESS(status)) {
|
| + if (m_strsrch_->pattern.CELength == 0) {
|
| + m_search_->matchedIndex =
|
| + m_search_->matchedIndex == USEARCH_DONE ?
|
| + getOffset() : m_search_->matchedIndex + 1;
|
| + m_search_->matchedLength = 0;
|
| + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
|
| + &status);
|
| + if (m_search_->matchedIndex == m_search_->textLength) {
|
| + m_search_->matchedIndex = USEARCH_DONE;
|
| + }
|
| + }
|
| + else {
|
| + // looking at usearch.cpp, this part is shifted out to
|
| + // StringSearch instead of SearchIterator because m_strsrch_ is
|
| + // not accessible in SearchIterator
|
| +#if 0
|
| + if (position + m_strsrch_->pattern.defaultShiftSize
|
| + > m_search_->textLength) {
|
| + setMatchNotFound();
|
| + return USEARCH_DONE;
|
| + }
|
| +#endif
|
| + if (m_search_->matchedLength <= 0) {
|
| + // the flipping direction issue has already been handled
|
| + // in next()
|
| + // for boundary check purposes. this will ensure that the
|
| + // next match will not preceed the current offset
|
| + // note search->matchedIndex will always be set to something
|
| + // in the code
|
| + m_search_->matchedIndex = position - 1;
|
| + }
|
| +
|
| + ucol_setOffset(m_strsrch_->textIter, position, &status);
|
| +
|
| +#if 0
|
| + for (;;) {
|
| + if (m_search_->isCanonicalMatch) {
|
| + // can't use exact here since extra accents are allowed.
|
| + usearch_handleNextCanonical(m_strsrch_, &status);
|
| + }
|
| + else {
|
| + usearch_handleNextExact(m_strsrch_, &status);
|
| + }
|
| + if (U_FAILURE(status)) {
|
| + return USEARCH_DONE;
|
| + }
|
| + if (m_breakiterator_ == NULL
|
| +#if !UCONFIG_NO_BREAK_ITERATION
|
| + ||
|
| + m_search_->matchedIndex == USEARCH_DONE ||
|
| + (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
|
| + m_breakiterator_->isBoundary(m_search_->matchedIndex +
|
| + m_search_->matchedLength))
|
| +#endif
|
| + ) {
|
| + if (m_search_->matchedIndex == USEARCH_DONE) {
|
| + ucol_setOffset(m_strsrch_->textIter,
|
| + m_search_->textLength, &status);
|
| + }
|
| + else {
|
| + ucol_setOffset(m_strsrch_->textIter,
|
| + m_search_->matchedIndex, &status);
|
| + }
|
| + return m_search_->matchedIndex;
|
| + }
|
| + }
|
| +#else
|
| + // if m_strsrch_->breakIter is always the same as m_breakiterator_
|
| + // then we don't need to check the match boundaries here because
|
| + // usearch_handleNextXXX will already have done it.
|
| + if (m_search_->isCanonicalMatch) {
|
| + // *could* actually use exact here 'cause no extra accents allowed...
|
| + usearch_handleNextCanonical(m_strsrch_, &status);
|
| + } else {
|
| + usearch_handleNextExact(m_strsrch_, &status);
|
| + }
|
| +
|
| + if (U_FAILURE(status)) {
|
| + return USEARCH_DONE;
|
| + }
|
| +
|
| + if (m_search_->matchedIndex == USEARCH_DONE) {
|
| + ucol_setOffset(m_strsrch_->textIter, m_search_->textLength, &status);
|
| + } else {
|
| + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex, &status);
|
| + }
|
| +
|
| + return m_search_->matchedIndex;
|
| +#endif
|
| + }
|
| + }
|
| + return USEARCH_DONE;
|
| +}
|
| +
|
| +int32_t StringSearch::handlePrev(int32_t position, UErrorCode &status)
|
| +{
|
| + // values passed here are already in the pre-shift position
|
| + if (U_SUCCESS(status)) {
|
| + if (m_strsrch_->pattern.CELength == 0) {
|
| + m_search_->matchedIndex =
|
| + (m_search_->matchedIndex == USEARCH_DONE ? getOffset() :
|
| + m_search_->matchedIndex);
|
| + if (m_search_->matchedIndex == 0) {
|
| + setMatchNotFound();
|
| + }
|
| + else {
|
| + m_search_->matchedIndex --;
|
| + ucol_setOffset(m_strsrch_->textIter, m_search_->matchedIndex,
|
| + &status);
|
| + m_search_->matchedLength = 0;
|
| + }
|
| + }
|
| + else {
|
| + // looking at usearch.cpp, this part is shifted out to
|
| + // StringSearch instead of SearchIterator because m_strsrch_ is
|
| + // not accessible in SearchIterator
|
| +#if 0
|
| + if (!m_search_->isOverlap &&
|
| + position - m_strsrch_->pattern.defaultShiftSize < 0) {
|
| + setMatchNotFound();
|
| + return USEARCH_DONE;
|
| + }
|
| +
|
| + for (;;) {
|
| + if (m_search_->isCanonicalMatch) {
|
| + // can't use exact here since extra accents are allowed.
|
| + usearch_handlePreviousCanonical(m_strsrch_, &status);
|
| + }
|
| + else {
|
| + usearch_handlePreviousExact(m_strsrch_, &status);
|
| + }
|
| + if (U_FAILURE(status)) {
|
| + return USEARCH_DONE;
|
| + }
|
| + if (m_breakiterator_ == NULL
|
| +#if !UCONFIG_NO_BREAK_ITERATION
|
| + ||
|
| + m_search_->matchedIndex == USEARCH_DONE ||
|
| + (m_breakiterator_->isBoundary(m_search_->matchedIndex) &&
|
| + m_breakiterator_->isBoundary(m_search_->matchedIndex +
|
| + m_search_->matchedLength))
|
| +#endif
|
| + ) {
|
| + return m_search_->matchedIndex;
|
| + }
|
| + }
|
| +#else
|
| + ucol_setOffset(m_strsrch_->textIter, position, &status);
|
| +
|
| + if (m_search_->isCanonicalMatch) {
|
| + // *could* use exact match here since extra accents *not* allowed!
|
| + usearch_handlePreviousCanonical(m_strsrch_, &status);
|
| + } else {
|
| + usearch_handlePreviousExact(m_strsrch_, &status);
|
| + }
|
| +
|
| + if (U_FAILURE(status)) {
|
| + return USEARCH_DONE;
|
| + }
|
| +
|
| + return m_search_->matchedIndex;
|
| +#endif
|
| + }
|
| +
|
| + return m_search_->matchedIndex;
|
| + }
|
| + return USEARCH_DONE;
|
| +}
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif /* #if !UCONFIG_NO_COLLATION */
|
|
|
| Property changes on: icu46/source/i18n/stsearch.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|