| Index: icu46/source/i18n/unicode/search.h
|
| ===================================================================
|
| --- icu46/source/i18n/unicode/search.h (revision 0)
|
| +++ icu46/source/i18n/unicode/search.h (revision 0)
|
| @@ -0,0 +1,569 @@
|
| +/*
|
| +**********************************************************************
|
| +* Copyright (C) 2001-2008 IBM and others. All rights reserved.
|
| +**********************************************************************
|
| +* Date Name Description
|
| +* 03/22/2000 helena Creation.
|
| +**********************************************************************
|
| +*/
|
| +
|
| +#ifndef SEARCH_H
|
| +#define SEARCH_H
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +/**
|
| + * \file
|
| + * \brief C++ API: SearchIterator object.
|
| + */
|
| +
|
| +#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
|
| +
|
| +#include "unicode/uobject.h"
|
| +#include "unicode/unistr.h"
|
| +#include "unicode/chariter.h"
|
| +#include "unicode/brkiter.h"
|
| +#include "unicode/usearch.h"
|
| +
|
| +/**
|
| +* @stable ICU 2.0
|
| +*/
|
| +struct USearch;
|
| +/**
|
| +* @stable ICU 2.0
|
| +*/
|
| +typedef struct USearch USearch;
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +/**
|
| + *
|
| + * <tt>SearchIterator</tt> is an abstract base class that provides
|
| + * methods to search for a pattern within a text string. Instances of
|
| + * <tt>SearchIterator</tt> maintain a current position and scans over the
|
| + * target text, returning the indices the pattern is matched and the length
|
| + * of each match.
|
| + * <p>
|
| + * <tt>SearchIterator</tt> defines a protocol for text searching.
|
| + * Subclasses provide concrete implementations of various search algorithms.
|
| + * For example, <tt>StringSearch</tt> implements language-sensitive pattern
|
| + * matching based on the comparison rules defined in a
|
| + * <tt>RuleBasedCollator</tt> object.
|
| + * <p>
|
| + * Other options for searching includes using a BreakIterator to restrict
|
| + * the points at which matches are detected.
|
| + * <p>
|
| + * <tt>SearchIterator</tt> provides an API that is similar to that of
|
| + * other text iteration classes such as <tt>BreakIterator</tt>. Using
|
| + * this class, it is easy to scan through text looking for all occurances of
|
| + * a given pattern. The following example uses a <tt>StringSearch</tt>
|
| + * object to find all instances of "fox" in the target string. Any other
|
| + * subclass of <tt>SearchIterator</tt> can be used in an identical
|
| + * manner.
|
| + * <pre><code>
|
| + * UnicodeString target("The quick brown fox jumped over the lazy fox");
|
| + * UnicodeString pattern("fox");
|
| + *
|
| + * SearchIterator *iter = new StringSearch(pattern, target);
|
| + * UErrorCode error = U_ZERO_ERROR;
|
| + * for (int pos = iter->first(error); pos != USEARCH_DONE;
|
| + * pos = iter->next(error)) {
|
| + * printf("Found match at %d pos, length is %d\n", pos,
|
| + * iter.getMatchLength());
|
| + * }
|
| + * </code></pre>
|
| + *
|
| + * @see StringSearch
|
| + * @see RuleBasedCollator
|
| + */
|
| +class U_I18N_API SearchIterator : public UObject {
|
| +
|
| +public:
|
| +
|
| + // public constructors and destructors -------------------------------
|
| +
|
| + /**
|
| + * Copy constructor that creates a SearchIterator instance with the same
|
| + * behavior, and iterating over the same text.
|
| + * @param other the SearchIterator instance to be copied.
|
| + * @stable ICU 2.0
|
| + */
|
| + SearchIterator(const SearchIterator &other);
|
| +
|
| + /**
|
| + * Destructor. Cleans up the search iterator data struct.
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual ~SearchIterator();
|
| +
|
| + // public get and set methods ----------------------------------------
|
| +
|
| + /**
|
| + * Sets the index to point to the given position, and clears any state
|
| + * that's affected.
|
| + * <p>
|
| + * This method takes the argument index and sets the position in the text
|
| + * string accordingly without checking if the index is pointing to a
|
| + * valid starting point to begin searching.
|
| + * @param position within the text to be set. If position is less
|
| + * than or greater than the text range for searching,
|
| + * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
|
| + * @param status for errors if it occurs
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual void setOffset(int32_t position, UErrorCode &status) = 0;
|
| +
|
| + /**
|
| + * Return the current index in the text being searched.
|
| + * If the iteration has gone past the end of the text
|
| + * (or past the beginning for a backwards search), USEARCH_DONE
|
| + * is returned.
|
| + * @return current index in the text being searched.
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual int32_t getOffset(void) const = 0;
|
| +
|
| + /**
|
| + * Sets the text searching attributes located in the enum
|
| + * USearchAttribute with values from the enum USearchAttributeValue.
|
| + * USEARCH_DEFAULT can be used for all attributes for resetting.
|
| + * @param attribute text attribute (enum USearchAttribute) to be set
|
| + * @param value text attribute value
|
| + * @param status for errors if it occurs
|
| + * @stable ICU 2.0
|
| + */
|
| + void setAttribute(USearchAttribute attribute,
|
| + USearchAttributeValue value,
|
| + UErrorCode &status);
|
| +
|
| + /**
|
| + * Gets the text searching attributes
|
| + * @param attribute text attribute (enum USearchAttribute) to be retrieve
|
| + * @return text attribute value
|
| + * @stable ICU 2.0
|
| + */
|
| + USearchAttributeValue getAttribute(USearchAttribute attribute) const;
|
| +
|
| + /**
|
| + * Returns the index to the match in the text string that was searched.
|
| + * This call returns a valid result only after a successful call to
|
| + * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
|
| + * Just after construction, or after a searching method returns
|
| + * <tt>USEARCH_DONE</tt>, this method will return <tt>USEARCH_DONE</tt>.
|
| + * <p>
|
| + * Use getMatchedLength to get the matched string length.
|
| + * @return index of a substring within the text string that is being
|
| + * searched.
|
| + * @see #first
|
| + * @see #next
|
| + * @see #previous
|
| + * @see #last
|
| + * @stable ICU 2.0
|
| + */
|
| + int32_t getMatchedStart(void) const;
|
| +
|
| + /**
|
| + * Returns the length of text in the string which matches the search
|
| + * pattern. This call returns a valid result only after a successful call
|
| + * to <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
|
| + * Just after construction, or after a searching method returns
|
| + * <tt>USEARCH_DONE</tt>, this method will return 0.
|
| + * @return The length of the match in the target text, or 0 if there
|
| + * is no match currently.
|
| + * @see #first
|
| + * @see #next
|
| + * @see #previous
|
| + * @see #last
|
| + * @stable ICU 2.0
|
| + */
|
| + int32_t getMatchedLength(void) const;
|
| +
|
| + /**
|
| + * Returns the text that was matched by the most recent call to
|
| + * <tt>first</tt>, <tt>next</tt>, <tt>previous</tt>, or <tt>last</tt>.
|
| + * If the iterator is not pointing at a valid match (e.g. just after
|
| + * construction or after <tt>USEARCH_DONE</tt> has been returned,
|
| + * returns an empty string.
|
| + * @param result stores the matched string or an empty string if a match
|
| + * is not found.
|
| + * @see #first
|
| + * @see #next
|
| + * @see #previous
|
| + * @see #last
|
| + * @stable ICU 2.0
|
| + */
|
| + void getMatchedText(UnicodeString &result) const;
|
| +
|
| + /**
|
| + * Set the BreakIterator that will be used to restrict the points
|
| + * at which matches are detected. The user is responsible for deleting
|
| + * the breakiterator.
|
| + * @param breakiter A BreakIterator that will be used to restrict the
|
| + * points at which matches are detected. If a match is
|
| + * found, but the match's start or end index is not a
|
| + * boundary as determined by the <tt>BreakIterator</tt>,
|
| + * the match will be rejected and another will be searched
|
| + * for. If this parameter is <tt>NULL</tt>, no break
|
| + * detection is attempted.
|
| + * @param status for errors if it occurs
|
| + * @see BreakIterator
|
| + * @stable ICU 2.0
|
| + */
|
| + void setBreakIterator(BreakIterator *breakiter, UErrorCode &status);
|
| +
|
| + /**
|
| + * Returns the BreakIterator that is used to restrict the points at
|
| + * which matches are detected. This will be the same object that was
|
| + * passed to the constructor or to <tt>setBreakIterator</tt>.
|
| + * Note that <tt>NULL</tt> is a legal value; it means that break
|
| + * detection should not be attempted.
|
| + * @return BreakIterator used to restrict matchings.
|
| + * @see #setBreakIterator
|
| + * @stable ICU 2.0
|
| + */
|
| + const BreakIterator * getBreakIterator(void) const;
|
| +
|
| + /**
|
| + * Set the string text to be searched. Text iteration will hence begin at
|
| + * the start of the text string. This method is useful if you want to
|
| + * re-use an iterator to search for the same pattern within a different
|
| + * body of text. The user is responsible for deleting the text.
|
| + * @param text string to be searched.
|
| + * @param status for errors. If the text length is 0,
|
| + * an U_ILLEGAL_ARGUMENT_ERROR is returned.
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual void setText(const UnicodeString &text, UErrorCode &status);
|
| +
|
| + /**
|
| + * Set the string text to be searched. Text iteration will hence begin at
|
| + * the start of the text string. This method is useful if you want to
|
| + * re-use an iterator to search for the same pattern within a different
|
| + * body of text.
|
| + * <p>
|
| + * Note: No parsing of the text within the <tt>CharacterIterator</tt>
|
| + * will be done during searching for this version. The block of text
|
| + * in <tt>CharacterIterator</tt> will be used as it is.
|
| + * The user is responsible for deleting the text.
|
| + * @param text string iterator to be searched.
|
| + * @param status for errors if any. If the text length is 0 then an
|
| + * U_ILLEGAL_ARGUMENT_ERROR is returned.
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual void setText(CharacterIterator &text, UErrorCode &status);
|
| +
|
| + /**
|
| + * Return the string text to be searched.
|
| + * @return text string to be searched.
|
| + * @stable ICU 2.0
|
| + */
|
| + const UnicodeString & getText(void) const;
|
| +
|
| + // operator overloading ----------------------------------------------
|
| +
|
| + /**
|
| + * Equality operator.
|
| + * @param that SearchIterator instance to be compared.
|
| + * @return TRUE if both BreakIterators are of the same class, have the
|
| + * same behavior, terates over the same text and have the same
|
| + * attributes. FALSE otherwise.
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual UBool operator==(const SearchIterator &that) const;
|
| +
|
| + /**
|
| + * Not-equal operator.
|
| + * @param that SearchIterator instance to be compared.
|
| + * @return FALSE if operator== returns TRUE, and vice versa.
|
| + * @stable ICU 2.0
|
| + */
|
| + UBool operator!=(const SearchIterator &that) const;
|
| +
|
| + // public methods ----------------------------------------------------
|
| +
|
| + /**
|
| + * Returns a copy of SearchIterator with the same behavior, and
|
| + * iterating over the same text, as this one. Note that all data will be
|
| + * replicated, except for the text string to be searched.
|
| + * @return cloned object
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual SearchIterator* safeClone(void) const = 0;
|
| +
|
| + /**
|
| + * Returns the first index at which the string text matches the search
|
| + * pattern. The iterator is adjusted so that its current index (as
|
| + * returned by <tt>getOffset</tt>) is the match position if one
|
| + * was found.
|
| + * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
|
| + * the iterator will be adjusted to the index USEARCH_DONE
|
| + * @param status for errors if it occurs
|
| + * @return The character index of the first match, or
|
| + * <tt>USEARCH_DONE</tt> if there are no matches.
|
| + * @see #getOffset
|
| + * @stable ICU 2.0
|
| + */
|
| + int32_t first(UErrorCode &status);
|
| +
|
| + /**
|
| + * Returns the first index greater than <tt>position</tt> at which the
|
| + * string text matches the search pattern. The iterator is adjusted so
|
| + * that its current index (as returned by <tt>getOffset</tt>) is the
|
| + * match position if one was found. If a match is not found,
|
| + * <tt>USEARCH_DONE</tt> will be returned and the iterator will be
|
| + * adjusted to the index USEARCH_DONE
|
| + * @param position where search if to start from. If position is less
|
| + * than or greater than the text range for searching,
|
| + * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
|
| + * @param status for errors if it occurs
|
| + * @return The character index of the first match following
|
| + * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are no
|
| + * matches.
|
| + * @see #getOffset
|
| + * @stable ICU 2.0
|
| + */
|
| + int32_t following(int32_t position, UErrorCode &status);
|
| +
|
| + /**
|
| + * Returns the last index in the target text at which it matches the
|
| + * search pattern. The iterator is adjusted so that its current index
|
| + * (as returned by <tt>getOffset</tt>) is the match position if one was
|
| + * found.
|
| + * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
|
| + * the iterator will be adjusted to the index USEARCH_DONE.
|
| + * @param status for errors if it occurs
|
| + * @return The index of the first match, or <tt>USEARCH_DONE</tt> if
|
| + * there are no matches.
|
| + * @see #getOffset
|
| + * @stable ICU 2.0
|
| + */
|
| + int32_t last(UErrorCode &status);
|
| +
|
| + /**
|
| + * Returns the first index less than <tt>position</tt> at which the string
|
| + * text matches the search pattern. The iterator is adjusted so that its
|
| + * current index (as returned by <tt>getOffset</tt>) is the match
|
| + * position if one was found. If a match is not found,
|
| + * <tt>USEARCH_DONE</tt> will be returned and the iterator will be
|
| + * adjusted to the index USEARCH_DONE
|
| + * @param position where search is to start from. If position is less
|
| + * than or greater than the text range for searching,
|
| + * an U_INDEX_OUTOFBOUNDS_ERROR will be returned
|
| + * @param status for errors if it occurs
|
| + * @return The character index of the first match preceding
|
| + * <tt>position</tt>, or <tt>USEARCH_DONE</tt> if there are
|
| + * no matches.
|
| + * @see #getOffset
|
| + * @stable ICU 2.0
|
| + */
|
| + int32_t preceding(int32_t position, UErrorCode &status);
|
| +
|
| + /**
|
| + * Returns the index of the next point at which the text matches the
|
| + * search pattern, starting from the current position
|
| + * The iterator is adjusted so that its current index (as returned by
|
| + * <tt>getOffset</tt>) is the match position if one was found.
|
| + * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
|
| + * the iterator will be adjusted to a position after the end of the text
|
| + * string.
|
| + * @param status for errors if it occurs
|
| + * @return The index of the next match after the current position,
|
| + * or <tt>USEARCH_DONE</tt> if there are no more matches.
|
| + * @see #getOffset
|
| + * @stable ICU 2.0
|
| + */
|
| + int32_t next(UErrorCode &status);
|
| +
|
| + /**
|
| + * Returns the index of the previous point at which the string text
|
| + * matches the search pattern, starting at the current position.
|
| + * The iterator is adjusted so that its current index (as returned by
|
| + * <tt>getOffset</tt>) is the match position if one was found.
|
| + * If a match is not found, <tt>USEARCH_DONE</tt> will be returned and
|
| + * the iterator will be adjusted to the index USEARCH_DONE
|
| + * @param status for errors if it occurs
|
| + * @return The index of the previous match before the current position,
|
| + * or <tt>USEARCH_DONE</tt> if there are no more matches.
|
| + * @see #getOffset
|
| + * @stable ICU 2.0
|
| + */
|
| + int32_t previous(UErrorCode &status);
|
| +
|
| + /**
|
| + * Resets the iteration.
|
| + * Search will begin at the start of the text string if a forward
|
| + * iteration is initiated before a backwards iteration. Otherwise if a
|
| + * backwards iteration is initiated before a forwards iteration, the
|
| + * search will begin at the end of the text string.
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual void reset();
|
| +
|
| +protected:
|
| + // protected data members ---------------------------------------------
|
| +
|
| + /**
|
| + * C search data struct
|
| + * @stable ICU 2.0
|
| + */
|
| + USearch *m_search_;
|
| +
|
| + /**
|
| + * Break iterator.
|
| + * Currently the C++ breakiterator does not have getRules etc to reproduce
|
| + * another in C. Hence we keep the original around and do the verification
|
| + * at the end of the match. The user is responsible for deleting this
|
| + * break iterator.
|
| + * @stable ICU 2.0
|
| + */
|
| + BreakIterator *m_breakiterator_;
|
| +
|
| + /**
|
| + * Unicode string version of the search text
|
| + * @stable ICU 2.0
|
| + */
|
| + UnicodeString m_text_;
|
| +
|
| + // protected constructors and destructors -----------------------------
|
| +
|
| + /**
|
| + * Default constructor.
|
| + * Initializes data to the default values.
|
| + * @stable ICU 2.0
|
| + */
|
| + SearchIterator();
|
| +
|
| + /**
|
| + * Constructor for use by subclasses.
|
| + * @param text The target text to be searched.
|
| + * @param breakiter A {@link BreakIterator} that is used to restrict the
|
| + * points at which matches are detected. If
|
| + * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a
|
| + * match, but the match's start or end index is not a
|
| + * boundary as determined by the <tt>BreakIterator</tt>,
|
| + * the match is rejected and <tt>handleNext</tt> or
|
| + * <tt>handlePrev</tt> is called again. If this parameter
|
| + * is <tt>NULL</tt>, no break detection is attempted.
|
| + * @see #handleNext
|
| + * @see #handlePrev
|
| + * @stable ICU 2.0
|
| + */
|
| + SearchIterator(const UnicodeString &text,
|
| + BreakIterator *breakiter = NULL);
|
| +
|
| + /**
|
| + * Constructor for use by subclasses.
|
| + * <p>
|
| + * Note: No parsing of the text within the <tt>CharacterIterator</tt>
|
| + * will be done during searching for this version. The block of text
|
| + * in <tt>CharacterIterator</tt> will be used as it is.
|
| + * @param text The target text to be searched.
|
| + * @param breakiter A {@link BreakIterator} that is used to restrict the
|
| + * points at which matches are detected. If
|
| + * <tt>handleNext</tt> or <tt>handlePrev</tt> finds a
|
| + * match, but the match's start or end index is not a
|
| + * boundary as determined by the <tt>BreakIterator</tt>,
|
| + * the match is rejected and <tt>handleNext</tt> or
|
| + * <tt>handlePrev</tt> is called again. If this parameter
|
| + * is <tt>NULL</tt>, no break detection is attempted.
|
| + * @see #handleNext
|
| + * @see #handlePrev
|
| + * @stable ICU 2.0
|
| + */
|
| + SearchIterator(CharacterIterator &text, BreakIterator *breakiter = NULL);
|
| +
|
| + // protected methods --------------------------------------------------
|
| +
|
| + /**
|
| + * Assignment operator. Sets this iterator to have the same behavior,
|
| + * and iterate over the same text, as the one passed in.
|
| + * @param that instance to be copied.
|
| + * @stable ICU 2.0
|
| + */
|
| + SearchIterator & operator=(const SearchIterator &that);
|
| +
|
| + /**
|
| + * Abstract method which subclasses override to provide the mechanism
|
| + * for finding the next match in the target text. This allows different
|
| + * subclasses to provide different search algorithms.
|
| + * <p>
|
| + * If a match is found, the implementation should return the index at
|
| + * which the match starts and should call
|
| + * <tt>setMatchLength</tt> with the number of characters
|
| + * in the target text that make up the match. If no match is found, the
|
| + * method should return USEARCH_DONE.
|
| + * <p>
|
| + * @param position The index in the target text at which the search
|
| + * should start.
|
| + * @param status for error codes if it occurs.
|
| + * @return index at which the match starts, else if match is not found
|
| + * USEARCH_DONE is returned
|
| + * @see #setMatchLength
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual int32_t handleNext(int32_t position, UErrorCode &status)
|
| + = 0;
|
| +
|
| + /**
|
| + * Abstract method which subclasses override to provide the mechanism for
|
| + * finding the previous match in the target text. This allows different
|
| + * subclasses to provide different search algorithms.
|
| + * <p>
|
| + * If a match is found, the implementation should return the index at
|
| + * which the match starts and should call
|
| + * <tt>setMatchLength</tt> with the number of characters
|
| + * in the target text that make up the match. If no match is found, the
|
| + * method should return USEARCH_DONE.
|
| + * <p>
|
| + * @param position The index in the target text at which the search
|
| + * should start.
|
| + * @param status for error codes if it occurs.
|
| + * @return index at which the match starts, else if match is not found
|
| + * USEARCH_DONE is returned
|
| + * @see #setMatchLength
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual int32_t handlePrev(int32_t position, UErrorCode &status)
|
| + = 0;
|
| +
|
| + /**
|
| + * Sets the length of the currently matched string in the text string to
|
| + * be searched.
|
| + * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
|
| + * methods should call this when they find a match in the target text.
|
| + * @param length length of the matched text.
|
| + * @see #handleNext
|
| + * @see #handlePrev
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual void setMatchLength(int32_t length);
|
| +
|
| + /**
|
| + * Sets the offset of the currently matched string in the text string to
|
| + * be searched.
|
| + * Subclasses' <tt>handleNext</tt> and <tt>handlePrev</tt>
|
| + * methods should call this when they find a match in the target text.
|
| + * @param position start offset of the matched text.
|
| + * @see #handleNext
|
| + * @see #handlePrev
|
| + * @stable ICU 2.0
|
| + */
|
| + virtual void setMatchStart(int32_t position);
|
| +
|
| + /**
|
| + * sets match not found
|
| + * @stable ICU 2.0
|
| + */
|
| + void setMatchNotFound();
|
| +};
|
| +
|
| +inline UBool SearchIterator::operator!=(const SearchIterator &that) const
|
| +{
|
| + return !operator==(that);
|
| +}
|
| +U_NAMESPACE_END
|
| +
|
| +#endif /* #if !UCONFIG_NO_COLLATION */
|
| +
|
| +#endif
|
| +
|
|
|
| Property changes on: icu46/source/i18n/unicode/search.h
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|