icu46/source/i18n/strmatch.h - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/i18n/strmatch.h

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 * Copyright (C) 2001-2004, International Business Machines Corporation

	3 * and others. All Rights Reserved.

	4 **********************************************************************

	5 * Date Name Description

	6 * 07/23/01 aliu Creation.

	7 **********************************************************************

	8 */

	9 #ifndef STRMATCH_H

	10 #define STRMATCH_H

	11

	12 #include "unicode/utypes.h"

	13

	14 #if !UCONFIG_NO_TRANSLITERATION

	15

	16 #include "unicode/unistr.h"

	17 #include "unicode/unifunct.h"

	18 #include "unicode/unimatch.h"

	19 #include "unicode/unirepl.h"

	20

	21 U_NAMESPACE_BEGIN

	22

	23 class TransliterationRuleData;

	24

	25 /**

	26 * An object that matches a fixed input string, implementing the

	27 * UnicodeMatcher API. This object also implements the

	28 * UnicodeReplacer API, allowing it to emit the matched text as

	29 * output. Since the match text may contain flexible match elements,

	30 * such as UnicodeSets, the emitted text is not the match pattern, but

	31 * instead a substring of the actual matched text. Following

	32 * convention, the output text is the leftmost match seen up to this

	33 * point.

	34 *

	35 * A StringMatcher may represent a segment, in which case it has a

	36 * positive segment number. This affects how the matcher converts

	37 * itself to a pattern but does not otherwise affect its function.

	38 *

	39 * A StringMatcher that is not a segment should not be used as a

	40 * UnicodeReplacer.

	41 */

	42 class StringMatcher : public UnicodeFunctor, public UnicodeMatcher, public Unico deReplacer {

	43

	44 public:

	45

	46 /**

	47 * Construct a matcher that matches the given pattern string.

	48 * @param string the pattern to be matched, possibly containing

	49 * stand-ins that represent nested UnicodeMatcher objects.

	50 * @param start inclusive start index of text to be replaced

	51 * @param limit exclusive end index of text to be replaced;

	52 * must be greater than or equal to start

	53 * @param segmentNum the segment number from 1..n, or 0 if this is

	54 * not a segment.

	55 * @param data context object mapping stand-ins to

	56 * UnicodeMatcher objects.

	57 */

	58 StringMatcher(const UnicodeString& string,

	59 int32_t start,

	60 int32_t limit,

	61 int32_t segmentNum,

	62 const TransliterationRuleData& data);

	63

	64 /**

	65 * Copy constructor

	66 * @param o the object to be copied.

	67 */

	68 StringMatcher(const StringMatcher& o);

	69

	70 /**

	71 * Destructor

	72 */

	73 virtual ~StringMatcher();

	74

	75 /**

	76 * Implement UnicodeFunctor

	77 * @return a copy of the object.

	78 */

	79 virtual UnicodeFunctor* clone() const;

	80

	81 /**

	82 * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer

	83 * and return the pointer.

	84 * @return the UnicodeMatcher point.

	85 */

	86 virtual UnicodeMatcher* toMatcher() const;

	87

	88 /**

	89 * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer

	90 * and return the pointer.

	91 * @return the UnicodeReplacer pointer.

	92 */

	93 virtual UnicodeReplacer* toReplacer() const;

	94

	95 /**

	96 * Implement UnicodeMatcher

	97 * @param text the text to be matched

	98 * @param offset on input, the index into text at which to begin

	99 * matching. On output, the limit of the matched text. The

	100 * number of matched characters is the output value of offset

	101 * minus the input value. Offset should always point to the

	102 * HIGH SURROGATE (leading code unit) of a pair of surrogates,

	103 * both on entry and upon return.

	104 * @param limit the limit index of text to be matched. Greater

	105 * than offset for a forward direction match, less than offset for

	106 * a backward direction match. The last character to be

	107 * considered for matching will be text.charAt(limit-1) in the

	108 * forward direction or text.charAt(limit+1) in the backward

	109 * direction.

	110 * @param incremental if TRUE, then assume further characters may

	111 * be inserted at limit and check for partial matching. Otherwise

	112 * assume the text as given is complete.

	113 * @return a match degree value indicating a full match, a partial

	114 * match, or a mismatch. If incremental is FALSE then

	115 * U_PARTIAL_MATCH should never be returned.

	116 */

	117 virtual UMatchDegree matches(const Replaceable& text,

	118 int32_t& offset,

	119 int32_t limit,

	120 UBool incremental);

	121

	122 /**

	123 * Implement UnicodeMatcher

	124 * @param result Output param to receive the pattern.

	125 * @param escapeUnprintable if True then escape the unprintable characters.

	126 * @return A reference to 'result'.

	127 */

	128 virtual UnicodeString& toPattern(UnicodeString& result,

	129 UBool escapeUnprintable = FALSE) const;

	130

	131 /**

	132 * Implement UnicodeMatcher

	133 * Returns TRUE if this matcher will match a character c, where c

	134 * & 0xFF == v, at offset, in the forward direction (with limit >

	135 * offset). This is used by <tt>RuleBasedTransliterator</tt> for

	136 * indexing.

	137 * @param v the given value

	138 * @return TRUE if this matcher will match a character c,

	139 * where c & 0xFF == v

	140 */

	141 virtual UBool matchesIndexValue(uint8_t v) const;

	142

	143 /**

	144 * Implement UnicodeMatcher

	145 */

	146 virtual void addMatchSetTo(UnicodeSet& toUnionTo) const;

	147

	148 /**

	149 * Implement UnicodeFunctor

	150 */

	151 virtual void setData(const TransliterationRuleData*);

	152

	153 /**

	154 * Replace characters in 'text' from 'start' to 'limit' with the

	155 * output text of this object. Update the 'cursor' parameter to

	156 * give the cursor position and return the length of the

	157 * replacement text.

	158 *

	159 * @param text the text to be matched

	160 * @param start inclusive start index of text to be replaced

	161 * @param limit exclusive end index of text to be replaced;

	162 * must be greater than or equal to start

	163 * @param cursor output parameter for the cursor position.

	164 * Not all replacer objects will update this, but in a complete

	165 * tree of replacer objects, representing the entire output side

	166 * of a transliteration rule, at least one must update it.

	167 * @return the number of 16-bit code units in the text replacing

	168 * the characters at offsets start..(limit-1) in text

	169 */

	170 virtual int32_t replace(Replaceable& text,

	171 int32_t start,

	172 int32_t limit,

	173 int32_t& cursor);

	174

	175 /**

	176 * Returns a string representation of this replacer. If the

	177 * result of calling this function is passed to the appropriate

	178 * parser, typically TransliteratorParser, it will produce another

	179 * replacer that is equal to this one.

	180 * @param result the string to receive the pattern. Previous

	181 * contents will be deleted.

	182 * @param escapeUnprintable if TRUE then convert unprintable

	183 * character to their hex escape representations, \\uxxxx or

	184 * \\Uxxxxxxxx. Unprintable characters are defined by

	185 * Utility.isUnprintable().

	186 * @return a reference to 'result'.

	187 */

	188 virtual UnicodeString& toReplacerPattern(UnicodeString& result,

	189 UBool escapeUnprintable) const;

	190

	191 /**

	192 * Remove any match data. This must be called before performing a

	193 * set of matches with this segment.

	194 */

	195 void resetMatch();

	196

	197 /**

	198 * ICU "poor man's RTTI", returns a UClassID for the actual class.

	199 *

	200 * @draft ICU 2.2

	201 */

	202 virtual UClassID getDynamicClassID() const;

	203

	204 /**

	205 * ICU "poor man's RTTI", returns a UClassID for this class.

	206 *

	207 * @draft ICU 2.2

	208 */

	209 static UClassID U_EXPORT2 getStaticClassID();

	210

	211 /**

	212 * Union the set of all characters that may output by this object

	213 * into the given set.

	214 * @param toUnionTo the set into which to union the output characters

	215 */

	216 virtual void addReplacementSetTo(UnicodeSet& toUnionTo) const;

	217

	218 private:

	219

	220 /**

	221 * The text to be matched.

	222 */

	223 UnicodeString pattern;

	224

	225 /**

	226 * Context object that maps stand-ins to matcher and replacer

	227 * objects.

	228 */

	229 const TransliterationRuleData* data;

	230

	231 /**

	232 * The segment number, 1-based, or 0 if not a segment.

	233 */

	234 int32_t segmentNumber;

	235

	236 /**

	237 * Start offset, in the match text, of the <em>rightmost</em>

	238 * match.

	239 */

	240 int32_t matchStart;

	241

	242 /**

	243 * Limit offset, in the match text, of the <em>rightmost</em>

	244 * match.

	245 */

	246 int32_t matchLimit;

	247

	248 };

	249

	250 U_NAMESPACE_END

	251

	252 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

	253

	254 #endif

OLD	NEW

« no previous file with comments | « icu46/source/i18n/sortkey.cpp ('k') | icu46/source/i18n/strmatch.cpp » ('j') | no next file with comments »