icu46/source/i18n/strmatch.h - Issue 5516007: Check in the pristine copy of ICU 4.6...

Unified Diff: icu46/source/i18n/strmatch.h

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: icu46/source/i18n/strmatch.h

===================================================================

--- icu46/source/i18n/strmatch.h (revision 0)

+++ icu46/source/i18n/strmatch.h (revision 0)

@@ -0,0 +1,254 @@

+/*

+ **********************************************************************

+ * Date Name Description

+ * 07/23/01 aliu Creation.

+ **********************************************************************

+ */

+#ifndef STRMATCH_H

+#define STRMATCH_H

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_TRANSLITERATION

+#include "unicode/unistr.h"

+#include "unicode/unifunct.h"

+#include "unicode/unimatch.h"

+#include "unicode/unirepl.h"

+U_NAMESPACE_BEGIN

+class TransliterationRuleData;

+/**

+ * An object that matches a fixed input string, implementing the

+ * UnicodeMatcher API. This object also implements the

+ * UnicodeReplacer API, allowing it to emit the matched text as

+ * output. Since the match text may contain flexible match elements,

+ * such as UnicodeSets, the emitted text is not the match pattern, but

+ * instead a substring of the actual matched text. Following

+ * convention, the output text is the leftmost match seen up to this

+ * point.

+ *

+ * A StringMatcher may represent a segment, in which case it has a

+ * positive segment number. This affects how the matcher converts

+ * itself to a pattern but does not otherwise affect its function.

+ *

+ * A StringMatcher that is not a segment should not be used as a

+ * UnicodeReplacer.

+ */

+class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public UnicodeReplacer {

+ public:

+ /**

+ * Construct a matcher that matches the given pattern string.

+ * @param string the pattern to be matched, possibly containing

+ * stand-ins that represent nested UnicodeMatcher objects.

+ * @param start inclusive start index of text to be replaced

+ * @param limit exclusive end index of text to be replaced;

+ * must be greater than or equal to start

+ * @param segmentNum the segment number from 1..n, or 0 if this is

+ * not a segment.

+ * @param data context object mapping stand-ins to

+ * UnicodeMatcher objects.

+ */

+ StringMatcher(const UnicodeString& string,

+ int32_t start,

+ int32_t limit,

+ int32_t segmentNum,

+ const TransliterationRuleData& data);

+ /**

+ * Copy constructor

+ * @param o the object to be copied.

+ */

+ StringMatcher(const StringMatcher& o);

+ /**

+ * Destructor

+ */

+ virtual ~StringMatcher();

+ /**

+ * Implement UnicodeFunctor

+ * @return a copy of the object.

+ */

+ virtual UnicodeFunctor* clone() const;

+ /**

+ * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer

+ * and return the pointer.

+ * @return the UnicodeMatcher point.

+ */

+ virtual UnicodeMatcher* toMatcher() const;

+ /**

+ * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer

+ * and return the pointer.

+ * @return the UnicodeReplacer pointer.

+ */

+ virtual UnicodeReplacer* toReplacer() const;

+ /**

+ * Implement UnicodeMatcher

+ * @param text the text to be matched

+ * @param offset on input, the index into text at which to begin

+ * matching. On output, the limit of the matched text. The

+ * number of matched characters is the output value of offset

+ * minus the input value. Offset should always point to the

+ * HIGH SURROGATE (leading code unit) of a pair of surrogates,

+ * both on entry and upon return.

+ * @param limit the limit index of text to be matched. Greater

+ * than offset for a forward direction match, less than offset for

+ * a backward direction match. The last character to be

+ * considered for matching will be text.charAt(limit-1) in the

+ * forward direction or text.charAt(limit+1) in the backward

+ * direction.

+ * @param incremental if TRUE, then assume further characters may

+ * be inserted at limit and check for partial matching. Otherwise

+ * assume the text as given is complete.

+ * @return a match degree value indicating a full match, a partial

+ * match, or a mismatch. If incremental is FALSE then

+ * U_PARTIAL_MATCH should never be returned.

+ */

+ virtual UMatchDegree matches(const Replaceable& text,

+ int32_t& offset,

+ int32_t limit,

+ UBool incremental);

+ /**

+ * Implement UnicodeMatcher

+ * @param result Output param to receive the pattern.

+ * @param escapeUnprintable if True then escape the unprintable characters.

+ * @return A reference to 'result'.

+ */

+ virtual UnicodeString& toPattern(UnicodeString& result,

+ UBool escapeUnprintable = FALSE) const;

+ /**

+ * Implement UnicodeMatcher

+ * Returns TRUE if this matcher will match a character c, where c

+ * & 0xFF == v, at offset, in the forward direction (with limit >

+ * offset). This is used by <tt>RuleBasedTransliterator</tt> for

+ * indexing.

+ * @param v the given value

+ * @return TRUE if this matcher will match a character c,

+ * where c & 0xFF == v

+ */

+ virtual UBool matchesIndexValue(uint8_t v) const;

+ /**

+ * Implement UnicodeMatcher

+ */

+ virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;

+ /**

+ * Implement UnicodeFunctor

+ */

+ virtual void setData(const TransliterationRuleData*);

+ /**

+ * Replace characters in 'text' from 'start' to 'limit' with the

+ * output text of this object. Update the 'cursor' parameter to

+ * give the cursor position and return the length of the

+ * replacement text.

+ *

+ * @param text the text to be matched

+ * @param start inclusive start index of text to be replaced

+ * @param limit exclusive end index of text to be replaced;

+ * must be greater than or equal to start

+ * @param cursor output parameter for the cursor position.

+ * Not all replacer objects will update this, but in a complete

+ * tree of replacer objects, representing the entire output side

+ * of a transliteration rule, at least one must update it.

+ * @return the number of 16-bit code units in the text replacing

+ * the characters at offsets start..(limit-1) in text

+ */

+ virtual int32_t replace(Replaceable& text,

+ int32_t start,

+ int32_t limit,

+ int32_t& cursor);

+ /**

+ * Returns a string representation of this replacer. If the

+ * result of calling this function is passed to the appropriate

+ * parser, typically TransliteratorParser, it will produce another

+ * replacer that is equal to this one.

+ * @param result the string to receive the pattern. Previous

+ * contents will be deleted.

+ * @param escapeUnprintable if TRUE then convert unprintable

+ * character to their hex escape representations, \\uxxxx or

+ * \\Uxxxxxxxx. Unprintable characters are defined by

+ * Utility.isUnprintable().

+ * @return a reference to 'result'.

+ */

+ virtual UnicodeString& toReplacerPattern(UnicodeString& result,

+ UBool escapeUnprintable) const;

+ /**

+ * Remove any match data. This must be called before performing a

+ * set of matches with this segment.

+ */

+ void resetMatch();

+ /**

+ * ICU "poor man's RTTI", returns a UClassID for the actual class.

+ *

+ * @draft ICU 2.2

+ */

+ virtual UClassID getDynamicClassID() const;

+ /**

+ * ICU "poor man's RTTI", returns a UClassID for this class.

+ *

+ * @draft ICU 2.2

+ */

+ static UClassID U_EXPORT2 getStaticClassID();

+ /**

+ * Union the set of all characters that may output by this object

+ * into the given set.

+ * @param toUnionTo the set into which to union the output characters

+ */

+ virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;

+ private:

+ /**

+ * The text to be matched.

+ */

+ UnicodeString pattern;

+ /**

+ * Context object that maps stand-ins to matcher and replacer

+ * objects.

+ */

+ const TransliterationRuleData* data;

+ /**

+ * The segment number, 1-based, or 0 if not a segment.

+ */

+ int32_t segmentNumber;

+ /**

+ * Start offset, in the match text, of the <em>rightmost</em>

+ * match.

+ */

+ int32_t matchStart;

+ /**

+ * Limit offset, in the match text, of the <em>rightmost</em>

+ * match.

+ */

+ int32_t matchLimit;

+};

+U_NAMESPACE_END

+#endif /* #if !UCONFIG_NO_TRANSLITERATION */

+#endif

Property changes on: icu46/source/i18n/strmatch.h

___________________________________________________________________

Added: svn:eol-style

+ LF

« no previous file with comments | « icu46/source/i18n/sortkey.cpp ('k') | icu46/source/i18n/strmatch.cpp » ('j') | no next file with comments »