| Index: icu46/source/i18n/strmatch.h
|
| ===================================================================
|
| --- icu46/source/i18n/strmatch.h (revision 0)
|
| +++ icu46/source/i18n/strmatch.h (revision 0)
|
| @@ -0,0 +1,254 @@
|
| +/*
|
| + * Copyright (C) 2001-2004, International Business Machines Corporation
|
| + * and others. All Rights Reserved.
|
| + **********************************************************************
|
| + * Date Name Description
|
| + * 07/23/01 aliu Creation.
|
| + **********************************************************************
|
| + */
|
| +#ifndef STRMATCH_H
|
| +#define STRMATCH_H
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_TRANSLITERATION
|
| +
|
| +#include "unicode/unistr.h"
|
| +#include "unicode/unifunct.h"
|
| +#include "unicode/unimatch.h"
|
| +#include "unicode/unirepl.h"
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +class TransliterationRuleData;
|
| +
|
| +/**
|
| + * An object that matches a fixed input string, implementing the
|
| + * UnicodeMatcher API. This object also implements the
|
| + * UnicodeReplacer API, allowing it to emit the matched text as
|
| + * output. Since the match text may contain flexible match elements,
|
| + * such as UnicodeSets, the emitted text is not the match pattern, but
|
| + * instead a substring of the actual matched text. Following
|
| + * convention, the output text is the leftmost match seen up to this
|
| + * point.
|
| + *
|
| + * A StringMatcher may represent a segment, in which case it has a
|
| + * positive segment number. This affects how the matcher converts
|
| + * itself to a pattern but does not otherwise affect its function.
|
| + *
|
| + * A StringMatcher that is not a segment should not be used as a
|
| + * UnicodeReplacer.
|
| + */
|
| +class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public UnicodeReplacer {
|
| +
|
| + public:
|
| +
|
| + /**
|
| + * Construct a matcher that matches the given pattern string.
|
| + * @param string the pattern to be matched, possibly containing
|
| + * stand-ins that represent nested UnicodeMatcher objects.
|
| + * @param start inclusive start index of text to be replaced
|
| + * @param limit exclusive end index of text to be replaced;
|
| + * must be greater than or equal to start
|
| + * @param segmentNum the segment number from 1..n, or 0 if this is
|
| + * not a segment.
|
| + * @param data context object mapping stand-ins to
|
| + * UnicodeMatcher objects.
|
| + */
|
| + StringMatcher(const UnicodeString& string,
|
| + int32_t start,
|
| + int32_t limit,
|
| + int32_t segmentNum,
|
| + const TransliterationRuleData& data);
|
| +
|
| + /**
|
| + * Copy constructor
|
| + * @param o the object to be copied.
|
| + */
|
| + StringMatcher(const StringMatcher& o);
|
| +
|
| + /**
|
| + * Destructor
|
| + */
|
| + virtual ~StringMatcher();
|
| +
|
| + /**
|
| + * Implement UnicodeFunctor
|
| + * @return a copy of the object.
|
| + */
|
| + virtual UnicodeFunctor* clone() const;
|
| +
|
| + /**
|
| + * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
|
| + * and return the pointer.
|
| + * @return the UnicodeMatcher point.
|
| + */
|
| + virtual UnicodeMatcher* toMatcher() const;
|
| +
|
| + /**
|
| + * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer
|
| + * and return the pointer.
|
| + * @return the UnicodeReplacer pointer.
|
| + */
|
| + virtual UnicodeReplacer* toReplacer() const;
|
| +
|
| + /**
|
| + * Implement UnicodeMatcher
|
| + * @param text the text to be matched
|
| + * @param offset on input, the index into text at which to begin
|
| + * matching. On output, the limit of the matched text. The
|
| + * number of matched characters is the output value of offset
|
| + * minus the input value. Offset should always point to the
|
| + * HIGH SURROGATE (leading code unit) of a pair of surrogates,
|
| + * both on entry and upon return.
|
| + * @param limit the limit index of text to be matched. Greater
|
| + * than offset for a forward direction match, less than offset for
|
| + * a backward direction match. The last character to be
|
| + * considered for matching will be text.charAt(limit-1) in the
|
| + * forward direction or text.charAt(limit+1) in the backward
|
| + * direction.
|
| + * @param incremental if TRUE, then assume further characters may
|
| + * be inserted at limit and check for partial matching. Otherwise
|
| + * assume the text as given is complete.
|
| + * @return a match degree value indicating a full match, a partial
|
| + * match, or a mismatch. If incremental is FALSE then
|
| + * U_PARTIAL_MATCH should never be returned.
|
| + */
|
| + virtual UMatchDegree matches(const Replaceable& text,
|
| + int32_t& offset,
|
| + int32_t limit,
|
| + UBool incremental);
|
| +
|
| + /**
|
| + * Implement UnicodeMatcher
|
| + * @param result Output param to receive the pattern.
|
| + * @param escapeUnprintable if True then escape the unprintable characters.
|
| + * @return A reference to 'result'.
|
| + */
|
| + virtual UnicodeString& toPattern(UnicodeString& result,
|
| + UBool escapeUnprintable = FALSE) const;
|
| +
|
| + /**
|
| + * Implement UnicodeMatcher
|
| + * Returns TRUE if this matcher will match a character c, where c
|
| + * & 0xFF == v, at offset, in the forward direction (with limit >
|
| + * offset). This is used by <tt>RuleBasedTransliterator</tt> for
|
| + * indexing.
|
| + * @param v the given value
|
| + * @return TRUE if this matcher will match a character c,
|
| + * where c & 0xFF == v
|
| + */
|
| + virtual UBool matchesIndexValue(uint8_t v) const;
|
| +
|
| + /**
|
| + * Implement UnicodeMatcher
|
| + */
|
| + virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;
|
| +
|
| + /**
|
| + * Implement UnicodeFunctor
|
| + */
|
| + virtual void setData(const TransliterationRuleData*);
|
| +
|
| + /**
|
| + * Replace characters in 'text' from 'start' to 'limit' with the
|
| + * output text of this object. Update the 'cursor' parameter to
|
| + * give the cursor position and return the length of the
|
| + * replacement text.
|
| + *
|
| + * @param text the text to be matched
|
| + * @param start inclusive start index of text to be replaced
|
| + * @param limit exclusive end index of text to be replaced;
|
| + * must be greater than or equal to start
|
| + * @param cursor output parameter for the cursor position.
|
| + * Not all replacer objects will update this, but in a complete
|
| + * tree of replacer objects, representing the entire output side
|
| + * of a transliteration rule, at least one must update it.
|
| + * @return the number of 16-bit code units in the text replacing
|
| + * the characters at offsets start..(limit-1) in text
|
| + */
|
| + virtual int32_t replace(Replaceable& text,
|
| + int32_t start,
|
| + int32_t limit,
|
| + int32_t& cursor);
|
| +
|
| + /**
|
| + * Returns a string representation of this replacer. If the
|
| + * result of calling this function is passed to the appropriate
|
| + * parser, typically TransliteratorParser, it will produce another
|
| + * replacer that is equal to this one.
|
| + * @param result the string to receive the pattern. Previous
|
| + * contents will be deleted.
|
| + * @param escapeUnprintable if TRUE then convert unprintable
|
| + * character to their hex escape representations, \\uxxxx or
|
| + * \\Uxxxxxxxx. Unprintable characters are defined by
|
| + * Utility.isUnprintable().
|
| + * @return a reference to 'result'.
|
| + */
|
| + virtual UnicodeString& toReplacerPattern(UnicodeString& result,
|
| + UBool escapeUnprintable) const;
|
| +
|
| + /**
|
| + * Remove any match data. This must be called before performing a
|
| + * set of matches with this segment.
|
| + */
|
| + void resetMatch();
|
| +
|
| + /**
|
| + * ICU "poor man's RTTI", returns a UClassID for the actual class.
|
| + *
|
| + * @draft ICU 2.2
|
| + */
|
| + virtual UClassID getDynamicClassID() const;
|
| +
|
| + /**
|
| + * ICU "poor man's RTTI", returns a UClassID for this class.
|
| + *
|
| + * @draft ICU 2.2
|
| + */
|
| + static UClassID U_EXPORT2 getStaticClassID();
|
| +
|
| + /**
|
| + * Union the set of all characters that may output by this object
|
| + * into the given set.
|
| + * @param toUnionTo the set into which to union the output characters
|
| + */
|
| + virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;
|
| +
|
| + private:
|
| +
|
| + /**
|
| + * The text to be matched.
|
| + */
|
| + UnicodeString pattern;
|
| +
|
| + /**
|
| + * Context object that maps stand-ins to matcher and replacer
|
| + * objects.
|
| + */
|
| + const TransliterationRuleData* data;
|
| +
|
| + /**
|
| + * The segment number, 1-based, or 0 if not a segment.
|
| + */
|
| + int32_t segmentNumber;
|
| +
|
| + /**
|
| + * Start offset, in the match text, of the <em>rightmost</em>
|
| + * match.
|
| + */
|
| + int32_t matchStart;
|
| +
|
| + /**
|
| + * Limit offset, in the match text, of the <em>rightmost</em>
|
| + * match.
|
| + */
|
| + int32_t matchLimit;
|
| +
|
| +};
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
| +
|
| +#endif
|
|
|
| Property changes on: icu46/source/i18n/strmatch.h
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|