icu46/source/i18n/strmatch.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/i18n/strmatch.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (c) 2001-2004, International Business Machines Corporation

	4 * and others. All Rights Reserved.

	5 **********************************************************************

	6 * Date Name Description

	7 * 07/23/01 aliu Creation.

	8 **********************************************************************

	9 */

	10

	11 #include "unicode/utypes.h"

	12

	13 #if !UCONFIG_NO_TRANSLITERATION

	14

	15 #include "strmatch.h"

	16 #include "rbt_data.h"

	17 #include "util.h"

	18 #include "unicode/uniset.h"

	19

	20 U_NAMESPACE_BEGIN

	21

	22 static const UChar EMPTY[] = { 0 }; // empty string: ""

	23

	24 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringMatcher)

	25

	26 StringMatcher::StringMatcher(const UnicodeString& theString,

	27 int32_t start,

	28 int32_t limit,

	29 int32_t segmentNum,

	30 const TransliterationRuleData& theData) :

	31 data(&theData),

	32 segmentNumber(segmentNum),

	33 matchStart(-1),

	34 matchLimit(-1)

	35 {

	36 theString.extractBetween(start, limit, pattern);

	37 }

	38

	39 StringMatcher::StringMatcher(const StringMatcher& o) :

	40 UnicodeFunctor(o),

	41 UnicodeMatcher(o),

	42 UnicodeReplacer(o),

	43 pattern(o.pattern),

	44 data(o.data),

	45 segmentNumber(o.segmentNumber),

	46 matchStart(o.matchStart),

	47 matchLimit(o.matchLimit)

	48 {

	49 }

	50

	51 /**

	52 * Destructor

	53 */

	54 StringMatcher::~StringMatcher() {

	55 }

	56

	57 /**

	58 * Implement UnicodeFunctor

	59 */

	60 UnicodeFunctor* StringMatcher::clone() const {

	61 return new StringMatcher(*this);

	62 }

	63

	64 /**

	65 * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer

	66 * and return the pointer.

	67 */

	68 UnicodeMatcher* StringMatcher::toMatcher() const {

	69 return (UnicodeMatcher*) this;

	70 }

	71

	72 /**

	73 * UnicodeFunctor API. Cast 'this' to a UnicodeReplacer* pointer

	74 * and return the pointer.

	75 */

	76 UnicodeReplacer* StringMatcher::toReplacer() const {

	77 return (UnicodeReplacer*) this;

	78 }

	79

	80 /**

	81 * Implement UnicodeMatcher

	82 */

	83 UMatchDegree StringMatcher::matches(const Replaceable& text,

	84 int32_t& offset,

	85 int32_t limit,

	86 UBool incremental) {

	87 int32_t i;

	88 int32_t cursor = offset;

	89 if (limit < cursor) {

	90 // Match in the reverse direction

	91 for (i=pattern.length()-1; i>=0; --i) {

	92 UChar keyChar = pattern.charAt(i);

	93 UnicodeMatcher* subm = data->lookupMatcher(keyChar);

	94 if (subm == 0) {

	95 if (cursor > limit &&

	96 keyChar == text.charAt(cursor)) {

	97 --cursor;

	98 } else {

	99 return U_MISMATCH;

	100 }

	101 } else {

	102 UMatchDegree m =

	103 subm->matches(text, cursor, limit, incremental);

	104 if (m != U_MATCH) {

	105 return m;

	106 }

	107 }

	108 }

	109 // Record the match position, but adjust for a normal

	110 // forward start, limit, and only if a prior match does not

	111 // exist -- we want the rightmost match.

	112 if (matchStart < 0) {

	113 matchStart = cursor+1;

	114 matchLimit = offset+1;

	115 }

	116 } else {

	117 for (i=0; i<pattern.length(); ++i) {

	118 if (incremental && cursor == limit) {

	119 // We've reached the context limit without a mismatch and

	120 // without completing our match.

	121 return U_PARTIAL_MATCH;

	122 }

	123 UChar keyChar = pattern.charAt(i);

	124 UnicodeMatcher* subm = data->lookupMatcher(keyChar);

	125 if (subm == 0) {

	126 // Don't need the cursor < limit check if

	127 // incremental is TRUE (because it's done above); do need

	128 // it otherwise.

	129 if (cursor < limit &&

	130 keyChar == text.charAt(cursor)) {

	131 ++cursor;

	132 } else {

	133 return U_MISMATCH;

	134 }

	135 } else {

	136 UMatchDegree m =

	137 subm->matches(text, cursor, limit, incremental);

	138 if (m != U_MATCH) {

	139 return m;

	140 }

	141 }

	142 }

	143 // Record the match position

	144 matchStart = offset;

	145 matchLimit = cursor;

	146 }

	147

	148 offset = cursor;

	149 return U_MATCH;

	150 }

	151

	152 /**

	153 * Implement UnicodeMatcher

	154 */

	155 UnicodeString& StringMatcher::toPattern(UnicodeString& result,

	156 UBool escapeUnprintable) const

	157 {

	158 result.truncate(0);

	159 UnicodeString str, quoteBuf;

	160 if (segmentNumber > 0) {

	161 result.append((UChar)40); /(/

	162 }

	163 for (int32_t i=0; i<pattern.length(); ++i) {

	164 UChar keyChar = pattern.charAt(i);

	165 const UnicodeMatcher* m = data->lookupMatcher(keyChar);

	166 if (m == 0) {

	167 ICU_Utility::appendToRule(result, keyChar, FALSE, escapeUnprintable, quoteBuf);

	168 } else {

	169 ICU_Utility::appendToRule(result, m->toPattern(str, escapeUnprintabl e),

	170 TRUE, escapeUnprintable, quoteBuf);

	171 }

	172 }

	173 if (segmentNumber > 0) {

	174 result.append((UChar)41); /)/

	175 }

	176 // Flush quoteBuf out to result

	177 ICU_Utility::appendToRule(result, -1,

	178 TRUE, escapeUnprintable, quoteBuf);

	179 return result;

	180 }

	181

	182 /**

	183 * Implement UnicodeMatcher

	184 */

	185 UBool StringMatcher::matchesIndexValue(uint8_t v) const {

	186 if (pattern.length() == 0) {

	187 return TRUE;

	188 }

	189 UChar32 c = pattern.char32At(0);

	190 const UnicodeMatcher *m = data->lookupMatcher(c);

	191 return (m == 0) ? ((c & 0xFF) == v) : m->matchesIndexValue(v);

	192 }

	193

	194 /**

	195 * Implement UnicodeMatcher

	196 */

	197 void StringMatcher::addMatchSetTo(UnicodeSet& toUnionTo) const {

	198 UChar32 ch;

	199 for (int32_t i=0; i<pattern.length(); i+=UTF_CHAR_LENGTH(ch)) {

	200 ch = pattern.char32At(i);

	201 const UnicodeMatcher* matcher = data->lookupMatcher(ch);

	202 if (matcher == NULL) {

	203 toUnionTo.add(ch);

	204 } else {

	205 matcher->addMatchSetTo(toUnionTo);

	206 }

	207 }

	208 }

	209

	210 /**

	211 * UnicodeReplacer API

	212 */

	213 int32_t StringMatcher::replace(Replaceable& text,

	214 int32_t start,

	215 int32_t limit,

	216 int32_t& /cursor/) {

	217

	218 int32_t outLen = 0;

	219

	220 // Copy segment with out-of-band data

	221 int32_t dest = limit;

	222 // If there was no match, that means that a quantifier

	223 // matched zero-length. E.g., x (a)* y matched "xy".

	224 if (matchStart >= 0) {

	225 if (matchStart != matchLimit) {

	226 text.copy(matchStart, matchLimit, dest);

	227 outLen = matchLimit - matchStart;

	228 }

	229 }

	230

	231 text.handleReplaceBetween(start, limit, EMPTY); // delete original text

	232

	233 return outLen;

	234 }

	235

	236 /**

	237 * UnicodeReplacer API

	238 */

	239 UnicodeString& StringMatcher::toReplacerPattern(UnicodeString& rule,

	240 UBool /escapeUnprintable/) con st {

	241 // assert(segmentNumber > 0);

	242 rule.truncate(0);

	243 rule.append((UChar)0x0024 /$/);

	244 ICU_Utility::appendNumber(rule, segmentNumber, 10, 1);

	245 return rule;

	246 }

	247

	248 /**

	249 * Remove any match info. This must be called before performing a

	250 * set of matches with this segment.

	251 */

	252 void StringMatcher::resetMatch() {

	253 matchStart = matchLimit = -1;

	254 }

	255

	256 /**

	257 * Union the set of all characters that may output by this object

	258 * into the given set.

	259 * @param toUnionTo the set into which to union the output characters

	260 */

	261 void StringMatcher::addReplacementSetTo(UnicodeSet& /toUnionTo/) const {

	262 // The output of this replacer varies; it is the source text between

	263 // matchStart and matchLimit. Since this varies depending on the

	264 // input text, we can't compute it here. We can either do nothing

	265 // or we can add ALL characters to the set. It's probably more useful

	266 // to do nothing.

	267 }

	268

	269 /**

	270 * Implement UnicodeFunctor

	271 */

	272 void StringMatcher::setData(const TransliterationRuleData* d) {

	273 data = d;

	274 int32_t i = 0;

	275 while (i<pattern.length()) {

	276 UChar32 c = pattern.char32At(i);

	277 UnicodeFunctor* f = data->lookup(c);

	278 if (f != NULL) {

	279 f->setData(data);

	280 }

	281 i += UTF_CHAR_LENGTH(c);

	282 }

	283 }

	284

	285 U_NAMESPACE_END

	286

	287 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

	288

	289 //eof

OLD	NEW

« no previous file with comments | « icu46/source/i18n/strmatch.h ('k') | icu46/source/i18n/strrepl.h » ('j') | no next file with comments »