| Index: icu46/source/i18n/search.cpp
|
| ===================================================================
|
| --- icu46/source/i18n/search.cpp (revision 0)
|
| +++ icu46/source/i18n/search.cpp (revision 0)
|
| @@ -0,0 +1,443 @@
|
| +/*
|
| +**********************************************************************
|
| +* Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
|
| +**********************************************************************
|
| +* Date Name Description
|
| +* 03/22/2000 helena Creation.
|
| +**********************************************************************
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
|
| +
|
| +#include "unicode/brkiter.h"
|
| +#include "unicode/schriter.h"
|
| +#include "unicode/search.h"
|
| +#include "usrchimp.h"
|
| +#include "cmemory.h"
|
| +
|
| +// public constructors and destructors -----------------------------------
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +SearchIterator::SearchIterator(const SearchIterator &other)
|
| + : UObject(other)
|
| +{
|
| + m_breakiterator_ = other.m_breakiterator_;
|
| + m_text_ = other.m_text_;
|
| + m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
|
| + m_search_->breakIter = other.m_search_->breakIter;
|
| + m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
|
| + m_search_->isOverlap = other.m_search_->isOverlap;
|
| + m_search_->elementComparisonType = other.m_search_->elementComparisonType;
|
| + m_search_->matchedIndex = other.m_search_->matchedIndex;
|
| + m_search_->matchedLength = other.m_search_->matchedLength;
|
| + m_search_->text = other.m_search_->text;
|
| + m_search_->textLength = other.m_search_->textLength;
|
| +}
|
| +
|
| +SearchIterator::~SearchIterator()
|
| +{
|
| + if (m_search_ != NULL) {
|
| + uprv_free(m_search_);
|
| + }
|
| +}
|
| +
|
| +// public get and set methods ----------------------------------------
|
| +
|
| +void SearchIterator::setAttribute(USearchAttribute attribute,
|
| + USearchAttributeValue value,
|
| + UErrorCode &status)
|
| +{
|
| + if (U_SUCCESS(status)) {
|
| + switch (attribute)
|
| + {
|
| + case USEARCH_OVERLAP :
|
| + m_search_->isOverlap = (value == USEARCH_ON ? TRUE : FALSE);
|
| + break;
|
| + case USEARCH_CANONICAL_MATCH :
|
| + m_search_->isCanonicalMatch = (value == USEARCH_ON ? TRUE : FALSE);
|
| + break;
|
| + case USEARCH_ELEMENT_COMPARISON :
|
| + if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
|
| + m_search_->elementComparisonType = (int16_t)value;
|
| + } else {
|
| + m_search_->elementComparisonType = 0;
|
| + }
|
| + break;
|
| + default:
|
| + status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + }
|
| + }
|
| + if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
|
| + status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + }
|
| +}
|
| +
|
| +USearchAttributeValue SearchIterator::getAttribute(
|
| + USearchAttribute attribute) const
|
| +{
|
| + switch (attribute) {
|
| + case USEARCH_OVERLAP :
|
| + return (m_search_->isOverlap == TRUE ? USEARCH_ON : USEARCH_OFF);
|
| + case USEARCH_CANONICAL_MATCH :
|
| + return (m_search_->isCanonicalMatch == TRUE ? USEARCH_ON :
|
| + USEARCH_OFF);
|
| + case USEARCH_ELEMENT_COMPARISON :
|
| + {
|
| + int16_t value = m_search_->elementComparisonType;
|
| + if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
|
| + return (USearchAttributeValue)value;
|
| + } else {
|
| + return USEARCH_STANDARD_ELEMENT_COMPARISON;
|
| + }
|
| + }
|
| + default :
|
| + return USEARCH_DEFAULT;
|
| + }
|
| +}
|
| +
|
| +int32_t SearchIterator::getMatchedStart() const
|
| +{
|
| + return m_search_->matchedIndex;
|
| +}
|
| +
|
| +int32_t SearchIterator::getMatchedLength() const
|
| +{
|
| + return m_search_->matchedLength;
|
| +}
|
| +
|
| +void SearchIterator::getMatchedText(UnicodeString &result) const
|
| +{
|
| + int32_t matchedindex = m_search_->matchedIndex;
|
| + int32_t matchedlength = m_search_->matchedLength;
|
| + if (matchedindex != USEARCH_DONE && matchedlength != 0) {
|
| + result.setTo(m_search_->text + matchedindex, matchedlength);
|
| + }
|
| + else {
|
| + result.remove();
|
| + }
|
| +}
|
| +
|
| +void SearchIterator::setBreakIterator(BreakIterator *breakiter,
|
| + UErrorCode &status)
|
| +{
|
| + if (U_SUCCESS(status)) {
|
| +#if 0
|
| + m_search_->breakIter = NULL;
|
| + // the c++ breakiterator may not make use of ubreakiterator.
|
| + // so we'll have to keep track of it ourselves.
|
| +#else
|
| + // Well, gee... the Constructors that take a BreakIterator
|
| + // all cast the BreakIterator to a UBreakIterator and
|
| + // pass it to the corresponding usearch_openFromXXX
|
| + // routine, so there's no reason not to do this.
|
| + //
|
| + // Besides, a UBreakIterator is a BreakIterator, so
|
| + // any subclass of BreakIterator should work fine here...
|
| + m_search_->breakIter = (UBreakIterator *) breakiter;
|
| +#endif
|
| +
|
| + m_breakiterator_ = breakiter;
|
| + }
|
| +}
|
| +
|
| +const BreakIterator * SearchIterator::getBreakIterator(void) const
|
| +{
|
| + return m_breakiterator_;
|
| +}
|
| +
|
| +void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
|
| +{
|
| + if (U_SUCCESS(status)) {
|
| + if (text.length() == 0) {
|
| + status = U_ILLEGAL_ARGUMENT_ERROR;
|
| + }
|
| + else {
|
| + m_text_ = text;
|
| + m_search_->text = m_text_.getBuffer();
|
| + m_search_->textLength = m_text_.length();
|
| + }
|
| + }
|
| +}
|
| +
|
| +void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
|
| +{
|
| + if (U_SUCCESS(status)) {
|
| + text.getText(m_text_);
|
| + setText(m_text_, status);
|
| + }
|
| +}
|
| +
|
| +const UnicodeString & SearchIterator::getText(void) const
|
| +{
|
| + return m_text_;
|
| +}
|
| +
|
| +// operator overloading ----------------------------------------------
|
| +
|
| +UBool SearchIterator::operator==(const SearchIterator &that) const
|
| +{
|
| + if (this == &that) {
|
| + return TRUE;
|
| + }
|
| + return (m_breakiterator_ == that.m_breakiterator_ &&
|
| + m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
|
| + m_search_->isOverlap == that.m_search_->isOverlap &&
|
| + m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
|
| + m_search_->matchedIndex == that.m_search_->matchedIndex &&
|
| + m_search_->matchedLength == that.m_search_->matchedLength &&
|
| + m_search_->textLength == that.m_search_->textLength &&
|
| + getOffset() == that.getOffset() &&
|
| + (uprv_memcmp(m_search_->text, that.m_search_->text,
|
| + m_search_->textLength * sizeof(UChar)) == 0));
|
| +}
|
| +
|
| +// public methods ----------------------------------------------------
|
| +
|
| +int32_t SearchIterator::first(UErrorCode &status)
|
| +{
|
| + if (U_FAILURE(status)) {
|
| + return USEARCH_DONE;
|
| + }
|
| + setOffset(0, status);
|
| + return handleNext(0, status);
|
| +}
|
| +
|
| +int32_t SearchIterator::following(int32_t position,
|
| + UErrorCode &status)
|
| +{
|
| + if (U_FAILURE(status)) {
|
| + return USEARCH_DONE;
|
| + }
|
| + setOffset(position, status);
|
| + return handleNext(position, status);
|
| +}
|
| +
|
| +int32_t SearchIterator::last(UErrorCode &status)
|
| +{
|
| + if (U_FAILURE(status)) {
|
| + return USEARCH_DONE;
|
| + }
|
| + setOffset(m_search_->textLength, status);
|
| + return handlePrev(m_search_->textLength, status);
|
| +}
|
| +
|
| +int32_t SearchIterator::preceding(int32_t position,
|
| + UErrorCode &status)
|
| +{
|
| + if (U_FAILURE(status)) {
|
| + return USEARCH_DONE;
|
| + }
|
| + setOffset(position, status);
|
| + return handlePrev(position, status);
|
| +}
|
| +
|
| +int32_t SearchIterator::next(UErrorCode &status)
|
| +{
|
| + if (U_SUCCESS(status)) {
|
| + int32_t offset = getOffset();
|
| + int32_t matchindex = m_search_->matchedIndex;
|
| + int32_t matchlength = m_search_->matchedLength;
|
| + m_search_->reset = FALSE;
|
| + if (m_search_->isForwardSearching == TRUE) {
|
| + int32_t textlength = m_search_->textLength;
|
| + if (offset == textlength || matchindex == textlength ||
|
| + (matchindex != USEARCH_DONE &&
|
| + matchindex + matchlength >= textlength)) {
|
| + // not enough characters to match
|
| + setMatchNotFound();
|
| + return USEARCH_DONE;
|
| + }
|
| + }
|
| + else {
|
| + // switching direction.
|
| + // if matchedIndex == USEARCH_DONE, it means that either a
|
| + // setOffset has been called or that previous ran off the text
|
| + // string. the iterator would have been set to offset 0 if a
|
| + // match is not found.
|
| + m_search_->isForwardSearching = TRUE;
|
| + if (m_search_->matchedIndex != USEARCH_DONE) {
|
| + // there's no need to set the collation element iterator
|
| + // the next call to next will set the offset.
|
| + return matchindex;
|
| + }
|
| + }
|
| +
|
| + if (matchlength > 0) {
|
| + // if matchlength is 0 we are at the start of the iteration
|
| + if (m_search_->isOverlap) {
|
| + offset ++;
|
| + }
|
| + else {
|
| + offset += matchlength;
|
| + }
|
| + }
|
| + return handleNext(offset, status);
|
| + }
|
| + return USEARCH_DONE;
|
| +}
|
| +
|
| +int32_t SearchIterator::previous(UErrorCode &status)
|
| +{
|
| + if (U_SUCCESS(status)) {
|
| + int32_t offset;
|
| + if (m_search_->reset) {
|
| + offset = m_search_->textLength;
|
| + m_search_->isForwardSearching = FALSE;
|
| + m_search_->reset = FALSE;
|
| + setOffset(offset, status);
|
| + }
|
| + else {
|
| + offset = getOffset();
|
| + }
|
| +
|
| + int32_t matchindex = m_search_->matchedIndex;
|
| + if (m_search_->isForwardSearching == TRUE) {
|
| + // switching direction.
|
| + // if matchedIndex == USEARCH_DONE, it means that either a
|
| + // setOffset has been called or that next ran off the text
|
| + // string. the iterator would have been set to offset textLength if
|
| + // a match is not found.
|
| + m_search_->isForwardSearching = FALSE;
|
| + if (matchindex != USEARCH_DONE) {
|
| + return matchindex;
|
| + }
|
| + }
|
| + else {
|
| + if (offset == 0 || matchindex == 0) {
|
| + // not enough characters to match
|
| + setMatchNotFound();
|
| + return USEARCH_DONE;
|
| + }
|
| + }
|
| +
|
| + if (matchindex != USEARCH_DONE) {
|
| + if (m_search_->isOverlap) {
|
| + matchindex += m_search_->matchedLength - 2;
|
| + }
|
| +
|
| + return handlePrev(matchindex, status);
|
| + }
|
| +
|
| + return handlePrev(offset, status);
|
| + }
|
| +
|
| + return USEARCH_DONE;
|
| +}
|
| +
|
| +void SearchIterator::reset()
|
| +{
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + setMatchNotFound();
|
| + setOffset(0, status);
|
| + m_search_->isOverlap = FALSE;
|
| + m_search_->isCanonicalMatch = FALSE;
|
| + m_search_->elementComparisonType = 0;
|
| + m_search_->isForwardSearching = TRUE;
|
| + m_search_->reset = TRUE;
|
| +}
|
| +
|
| +// protected constructors and destructors -----------------------------
|
| +
|
| +SearchIterator::SearchIterator()
|
| +{
|
| + m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
|
| + m_search_->breakIter = NULL;
|
| + m_search_->isOverlap = FALSE;
|
| + m_search_->isCanonicalMatch = FALSE;
|
| + m_search_->elementComparisonType = 0;
|
| + m_search_->isForwardSearching = TRUE;
|
| + m_search_->reset = TRUE;
|
| + m_search_->matchedIndex = USEARCH_DONE;
|
| + m_search_->matchedLength = 0;
|
| + m_search_->text = NULL;
|
| + m_search_->textLength = 0;
|
| + m_breakiterator_ = NULL;
|
| +}
|
| +
|
| +SearchIterator::SearchIterator(const UnicodeString &text,
|
| + BreakIterator *breakiter) :
|
| + m_breakiterator_(breakiter),
|
| + m_text_(text)
|
| +{
|
| + m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
|
| + m_search_->breakIter = NULL;
|
| + m_search_->isOverlap = FALSE;
|
| + m_search_->isCanonicalMatch = FALSE;
|
| + m_search_->elementComparisonType = 0;
|
| + m_search_->isForwardSearching = TRUE;
|
| + m_search_->reset = TRUE;
|
| + m_search_->matchedIndex = USEARCH_DONE;
|
| + m_search_->matchedLength = 0;
|
| + m_search_->text = m_text_.getBuffer();
|
| + m_search_->textLength = text.length();
|
| +}
|
| +
|
| +SearchIterator::SearchIterator(CharacterIterator &text,
|
| + BreakIterator *breakiter) :
|
| + m_breakiterator_(breakiter)
|
| +{
|
| + m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
|
| + m_search_->breakIter = NULL;
|
| + m_search_->isOverlap = FALSE;
|
| + m_search_->isCanonicalMatch = FALSE;
|
| + m_search_->elementComparisonType = 0;
|
| + m_search_->isForwardSearching = TRUE;
|
| + m_search_->reset = TRUE;
|
| + m_search_->matchedIndex = USEARCH_DONE;
|
| + m_search_->matchedLength = 0;
|
| + text.getText(m_text_);
|
| + m_search_->text = m_text_.getBuffer();
|
| + m_search_->textLength = m_text_.length();
|
| + m_breakiterator_ = breakiter;
|
| +}
|
| +
|
| +// protected methods ------------------------------------------------------
|
| +
|
| +SearchIterator & SearchIterator::operator=(const SearchIterator &that)
|
| +{
|
| + if (this != &that) {
|
| + m_breakiterator_ = that.m_breakiterator_;
|
| + m_text_ = that.m_text_;
|
| + m_search_->breakIter = that.m_search_->breakIter;
|
| + m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
|
| + m_search_->isOverlap = that.m_search_->isOverlap;
|
| + m_search_->elementComparisonType = that.m_search_->elementComparisonType;
|
| + m_search_->matchedIndex = that.m_search_->matchedIndex;
|
| + m_search_->matchedLength = that.m_search_->matchedLength;
|
| + m_search_->text = that.m_search_->text;
|
| + m_search_->textLength = that.m_search_->textLength;
|
| + }
|
| + return *this;
|
| +}
|
| +
|
| +void SearchIterator::setMatchLength(int32_t length)
|
| +{
|
| + m_search_->matchedLength = length;
|
| +}
|
| +
|
| +void SearchIterator::setMatchStart(int32_t position)
|
| +{
|
| + m_search_->matchedIndex = position;
|
| +}
|
| +
|
| +void SearchIterator::setMatchNotFound()
|
| +{
|
| + setMatchStart(USEARCH_DONE);
|
| + setMatchLength(0);
|
| + UErrorCode status = U_ZERO_ERROR;
|
| + // by default no errors should be returned here since offsets are within
|
| + // range.
|
| + if (m_search_->isForwardSearching) {
|
| + setOffset(m_search_->textLength, status);
|
| + }
|
| + else {
|
| + setOffset(0, status);
|
| + }
|
| +}
|
| +
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif /* #if !UCONFIG_NO_COLLATION */
|
|
|
| Property changes on: icu46/source/i18n/search.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|