icu46/source/i18n/strrepl.cpp - Issue 5516007: Check in the pristine copy of ICU 4.6...

Side by Side Diff: icu46/source/i18n/strrepl.cpp

Issue 5516007: Check in the pristine copy of ICU 4.6... (Closed) Base URL: svn://chrome-svn/chrome/trunk/deps/third_party/

Patch Set: Created 10 years ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View unified diff | Download patch | Annotate | Revision Log

Property Changes:

Added: svn:eol-style
+ LF

OLD	NEW
(Empty)
	1 /*

	2 **********************************************************************

	3 * Copyright (c) 2002-2004, International Business Machines Corporation

	4 * and others. All Rights Reserved.

	5 **********************************************************************

	6 * Date Name Description

	7 * 01/21/2002 aliu Creation.

	8 **********************************************************************

	9 */

	10

	11 #include "unicode/utypes.h"

	12

	13 #if !UCONFIG_NO_TRANSLITERATION

	14

	15 #include "strrepl.h"

	16 #include "rbt_data.h"

	17 #include "util.h"

	18 #include "unicode/uniset.h"

	19

	20 U_NAMESPACE_BEGIN

	21

	22 static const UChar EMPTY[] = { 0 }; // empty string: ""

	23

	24 UnicodeReplacer::~UnicodeReplacer() {}

	25 UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringReplacer)

	26

	27 /**

	28 * Construct a StringReplacer that sets the emits the given output

	29 * text and sets the cursor to the given position.

	30 * @param theOutput text that will replace input text when the

	31 * replace() method is called. May contain stand-in characters

	32 * that represent nested replacers.

	33 * @param theCursorPos cursor position that will be returned by

	34 * the replace() method

	35 * @param theData transliterator context object that translates

	36 * stand-in characters to UnicodeReplacer objects

	37 */

	38 StringReplacer::StringReplacer(const UnicodeString& theOutput,

	39 int32_t theCursorPos,

	40 const TransliterationRuleData* theData) {

	41 output = theOutput;

	42 cursorPos = theCursorPos;

	43 hasCursor = TRUE;

	44 data = theData;

	45 isComplex = TRUE;

	46 }

	47

	48 /**

	49 * Construct a StringReplacer that sets the emits the given output

	50 * text and does not modify the cursor.

	51 * @param theOutput text that will replace input text when the

	52 * replace() method is called. May contain stand-in characters

	53 * that represent nested replacers.

	54 * @param theData transliterator context object that translates

	55 * stand-in characters to UnicodeReplacer objects

	56 */

	57 StringReplacer::StringReplacer(const UnicodeString& theOutput,

	58 const TransliterationRuleData* theData) {

	59 output = theOutput;

	60 cursorPos = 0;

	61 hasCursor = FALSE;

	62 data = theData;

	63 isComplex = TRUE;

	64 }

	65

	66 /**

	67 * Copy constructor.

	68 */

	69 StringReplacer::StringReplacer(const StringReplacer& other) :

	70 UnicodeFunctor(other),

	71 UnicodeReplacer(other)

	72 {

	73 output = other.output;

	74 cursorPos = other.cursorPos;

	75 hasCursor = other.hasCursor;

	76 data = other.data;

	77 isComplex = other.isComplex;

	78 }

	79

	80 /**

	81 * Destructor

	82 */

	83 StringReplacer::~StringReplacer() {

	84 }

	85

	86 /**

	87 * Implement UnicodeFunctor

	88 */

	89 UnicodeFunctor* StringReplacer::clone() const {

	90 return new StringReplacer(*this);

	91 }

	92

	93 /**

	94 * Implement UnicodeFunctor

	95 */

	96 UnicodeReplacer* StringReplacer::toReplacer() const {

	97 return (UnicodeReplacer*) this;

	98 }

	99

	100 /**

	101 * UnicodeReplacer API

	102 */

	103 int32_t StringReplacer::replace(Replaceable& text,

	104 int32_t start,

	105 int32_t limit,

	106 int32_t& cursor) {

	107 int32_t outLen;

	108 int32_t newStart = 0;

	109

	110 // NOTE: It should be possible to _always_ run the complex

	111 // processing code; just slower. If not, then there is a bug

	112 // in the complex processing code.

	113

	114 // Simple (no nested replacers) Processing Code :

	115 if (!isComplex) {

	116 text.handleReplaceBetween(start, limit, output);

	117 outLen = output.length();

	118

	119 // Setup default cursor position (for cursorPos within output)

	120 newStart = cursorPos;

	121 }

	122

	123 // Complex (nested replacers) Processing Code :

	124 else {

	125 /* When there are segments to be copied, use the Replaceable.copy()

	126 * API in order to retain out-of-band data. Copy everything to the

	127 * end of the string, then copy them back over the key. This preserves

	128 * the integrity of indices into the key and surrounding context while

	129 * generating the output text.

	130 */

	131 UnicodeString buf;

	132 int32_t oOutput; // offset into 'output'

	133 isComplex = FALSE;

	134

	135 // The temporary buffer starts at tempStart, and extends

	136 // to destLimit. The start of the buffer has a single

	137 // character from before the key. This provides style

	138 // data when addition characters are filled into the

	139 // temporary buffer. If there is nothing to the left, use

	140 // the non-character U+FFFF, which Replaceable subclasses

	141 // should treat specially as a "no-style character."

	142 // destStart points to the point after the style context

	143 // character, so it is tempStart+1 or tempStart+2.

	144 int32_t tempStart = text.length(); // start of temp buffer

	145 int32_t destStart = tempStart; // copy new text to here

	146 if (start > 0) {

	147 int32_t len = UTF_CHAR_LENGTH(text.char32At(start-1));

	148 text.copy(start-len, start, tempStart);

	149 destStart += len;

	150 } else {

	151 UnicodeString str((UChar) 0xFFFF);

	152 text.handleReplaceBetween(tempStart, tempStart, str);

	153 destStart++;

	154 }

	155 int32_t destLimit = destStart;

	156

	157 for (oOutput=0; oOutput<output.length(); ) {

	158 if (oOutput == cursorPos) {

	159 // Record the position of the cursor

	160 newStart = destLimit - destStart; // relative to start

	161 }

	162 UChar32 c = output.char32At(oOutput);

	163 UnicodeReplacer* r = data->lookupReplacer(c);

	164 if (r == NULL) {

	165 // Accumulate straight (non-segment) text.

	166 buf.append(c);

	167 } else {

	168 isComplex = TRUE;

	169

	170 // Insert any accumulated straight text.

	171 if (buf.length() > 0) {

	172 text.handleReplaceBetween(destLimit, destLimit, buf);

	173 destLimit += buf.length();

	174 buf.truncate(0);

	175 }

	176

	177 // Delegate output generation to replacer object

	178 int32_t len = r->replace(text, destLimit, destLimit, cursor);

	179 destLimit += len;

	180 }

	181 oOutput += UTF_CHAR_LENGTH(c);

	182 }

	183 // Insert any accumulated straight text.

	184 if (buf.length() > 0) {

	185 text.handleReplaceBetween(destLimit, destLimit, buf);

	186 destLimit += buf.length();

	187 }

	188 if (oOutput == cursorPos) {

	189 // Record the position of the cursor

	190 newStart = destLimit - destStart; // relative to start

	191 }

	192

	193 outLen = destLimit - destStart;

	194

	195 // Copy new text to start, and delete it

	196 text.copy(destStart, destLimit, start);

	197 text.handleReplaceBetween(tempStart + outLen, destLimit + outLen, EMPTY) ;

	198

	199 // Delete the old text (the key)

	200 text.handleReplaceBetween(start + outLen, limit + outLen, EMPTY);

	201 }

	202

	203 if (hasCursor) {

	204 // Adjust the cursor for positions outside the key. These

	205 // refer to code points rather than code units. If cursorPos

	206 // is within the output string, then use newStart, which has

	207 // already been set above.

	208 if (cursorPos < 0) {

	209 newStart = start;

	210 int32_t n = cursorPos;

	211 // Outside the output string, cursorPos counts code points

	212 while (n < 0 && newStart > 0) {

	213 newStart -= UTF_CHAR_LENGTH(text.char32At(newStart-1));

	214 ++n;

	215 }

	216 newStart += n;

	217 } else if (cursorPos > output.length()) {

	218 newStart = start + outLen;

	219 int32_t n = cursorPos - output.length();

	220 // Outside the output string, cursorPos counts code points

	221 while (n > 0 && newStart < text.length()) {

	222 newStart += UTF_CHAR_LENGTH(text.char32At(newStart));

	223 --n;

	224 }

	225 newStart += n;

	226 } else {

	227 // Cursor is within output string. It has been set up above

	228 // to be relative to start.

	229 newStart += start;

	230 }

	231

	232 cursor = newStart;

	233 }

	234

	235 return outLen;

	236 }

	237

	238 /**

	239 * UnicodeReplacer API

	240 */

	241 UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule,

	242 UBool escapeUnprintable) const {

	243 rule.truncate(0);

	244 UnicodeString quoteBuf;

	245

	246 int32_t cursor = cursorPos;

	247

	248 // Handle a cursor preceding the output

	249 if (hasCursor && cursor < 0) {

	250 while (cursor++ < 0) {

	251 ICU_Utility::appendToRule(rule, (UChar)0x0040 /@/, TRUE, escapeUnp rintable, quoteBuf);

	252 }

	253 // Fall through and append '\|' below

	254 }

	255

	256 for (int32_t i=0; i<output.length(); ++i) {

	257 if (hasCursor && i == cursor) {

	258 ICU_Utility::appendToRule(rule, (UChar)0x007C /\|/, TRUE, escapeUnp rintable, quoteBuf);

	259 }

	260 UChar c = output.charAt(i); // Ok to use 16-bits here

	261

	262 UnicodeReplacer* r = data->lookupReplacer(c);

	263 if (r == NULL) {

	264 ICU_Utility::appendToRule(rule, c, FALSE, escapeUnprintable, quoteBu f);

	265 } else {

	266 UnicodeString buf;

	267 r->toReplacerPattern(buf, escapeUnprintable);

	268 buf.insert(0, (UChar)0x20);

	269 buf.append((UChar)0x20);

	270 ICU_Utility::appendToRule(rule, buf,

	271 TRUE, escapeUnprintable, quoteBuf);

	272 }

	273 }

	274

	275 // Handle a cursor after the output. Use > rather than >= because

	276 // if cursor == output.length() it is at the end of the output,

	277 // which is the default position, so we need not emit it.

	278 if (hasCursor && cursor > output.length()) {

	279 cursor -= output.length();

	280 while (cursor-- > 0) {

	281 ICU_Utility::appendToRule(rule, (UChar)0x0040 /@/, TRUE, escapeUnp rintable, quoteBuf);

	282 }

	283 ICU_Utility::appendToRule(rule, (UChar)0x007C /\|/, TRUE, escapeUnprint able, quoteBuf);

	284 }

	285 // Flush quoteBuf out to result

	286 ICU_Utility::appendToRule(rule, -1,

	287 TRUE, escapeUnprintable, quoteBuf);

	288

	289 return rule;

	290 }

	291

	292 /**

	293 * Implement UnicodeReplacer

	294 */

	295 void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {

	296 UChar32 ch;

	297 for (int32_t i=0; i<output.length(); i+=UTF_CHAR_LENGTH(ch)) {

	298 ch = output.char32At(i);

	299 UnicodeReplacer* r = data->lookupReplacer(ch);

	300 if (r == NULL) {

	301 toUnionTo.add(ch);

	302 } else {

	303 r->addReplacementSetTo(toUnionTo);

	304 }

	305 }

	306 }

	307

	308 /**

	309 * UnicodeFunctor API

	310 */

	311 void StringReplacer::setData(const TransliterationRuleData* d) {

	312 data = d;

	313 int32_t i = 0;

	314 while (i<output.length()) {

	315 UChar32 c = output.char32At(i);

	316 UnicodeFunctor* f = data->lookup(c);

	317 if (f != NULL) {

	318 f->setData(data);

	319 }

	320 i += UTF_CHAR_LENGTH(c);

	321 }

	322 }

	323

	324 U_NAMESPACE_END

	325

	326 #endif /* #if !UCONFIG_NO_TRANSLITERATION */

	327

	328 //eof

OLD	NEW

« no previous file with comments | « icu46/source/i18n/strrepl.h ('k') | icu46/source/i18n/stsearch.cpp » ('j') | no next file with comments »