| Index: icu46/source/i18n/strrepl.cpp
|
| ===================================================================
|
| --- icu46/source/i18n/strrepl.cpp (revision 0)
|
| +++ icu46/source/i18n/strrepl.cpp (revision 0)
|
| @@ -0,0 +1,328 @@
|
| +/*
|
| +**********************************************************************
|
| +* Copyright (c) 2002-2004, International Business Machines Corporation
|
| +* and others. All Rights Reserved.
|
| +**********************************************************************
|
| +* Date Name Description
|
| +* 01/21/2002 aliu Creation.
|
| +**********************************************************************
|
| +*/
|
| +
|
| +#include "unicode/utypes.h"
|
| +
|
| +#if !UCONFIG_NO_TRANSLITERATION
|
| +
|
| +#include "strrepl.h"
|
| +#include "rbt_data.h"
|
| +#include "util.h"
|
| +#include "unicode/uniset.h"
|
| +
|
| +U_NAMESPACE_BEGIN
|
| +
|
| +static const UChar EMPTY[] = { 0 }; // empty string: ""
|
| +
|
| +UnicodeReplacer::~UnicodeReplacer() {}
|
| +UOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringReplacer)
|
| +
|
| +/**
|
| + * Construct a StringReplacer that sets the emits the given output
|
| + * text and sets the cursor to the given position.
|
| + * @param theOutput text that will replace input text when the
|
| + * replace() method is called. May contain stand-in characters
|
| + * that represent nested replacers.
|
| + * @param theCursorPos cursor position that will be returned by
|
| + * the replace() method
|
| + * @param theData transliterator context object that translates
|
| + * stand-in characters to UnicodeReplacer objects
|
| + */
|
| +StringReplacer::StringReplacer(const UnicodeString& theOutput,
|
| + int32_t theCursorPos,
|
| + const TransliterationRuleData* theData) {
|
| + output = theOutput;
|
| + cursorPos = theCursorPos;
|
| + hasCursor = TRUE;
|
| + data = theData;
|
| + isComplex = TRUE;
|
| +}
|
| +
|
| +/**
|
| + * Construct a StringReplacer that sets the emits the given output
|
| + * text and does not modify the cursor.
|
| + * @param theOutput text that will replace input text when the
|
| + * replace() method is called. May contain stand-in characters
|
| + * that represent nested replacers.
|
| + * @param theData transliterator context object that translates
|
| + * stand-in characters to UnicodeReplacer objects
|
| + */
|
| +StringReplacer::StringReplacer(const UnicodeString& theOutput,
|
| + const TransliterationRuleData* theData) {
|
| + output = theOutput;
|
| + cursorPos = 0;
|
| + hasCursor = FALSE;
|
| + data = theData;
|
| + isComplex = TRUE;
|
| +}
|
| +
|
| +/**
|
| + * Copy constructor.
|
| + */
|
| +StringReplacer::StringReplacer(const StringReplacer& other) :
|
| + UnicodeFunctor(other),
|
| + UnicodeReplacer(other)
|
| +{
|
| + output = other.output;
|
| + cursorPos = other.cursorPos;
|
| + hasCursor = other.hasCursor;
|
| + data = other.data;
|
| + isComplex = other.isComplex;
|
| +}
|
| +
|
| +/**
|
| + * Destructor
|
| + */
|
| +StringReplacer::~StringReplacer() {
|
| +}
|
| +
|
| +/**
|
| + * Implement UnicodeFunctor
|
| + */
|
| +UnicodeFunctor* StringReplacer::clone() const {
|
| + return new StringReplacer(*this);
|
| +}
|
| +
|
| +/**
|
| + * Implement UnicodeFunctor
|
| + */
|
| +UnicodeReplacer* StringReplacer::toReplacer() const {
|
| + return (UnicodeReplacer*) this;
|
| +}
|
| +
|
| +/**
|
| + * UnicodeReplacer API
|
| + */
|
| +int32_t StringReplacer::replace(Replaceable& text,
|
| + int32_t start,
|
| + int32_t limit,
|
| + int32_t& cursor) {
|
| + int32_t outLen;
|
| + int32_t newStart = 0;
|
| +
|
| + // NOTE: It should be possible to _always_ run the complex
|
| + // processing code; just slower. If not, then there is a bug
|
| + // in the complex processing code.
|
| +
|
| + // Simple (no nested replacers) Processing Code :
|
| + if (!isComplex) {
|
| + text.handleReplaceBetween(start, limit, output);
|
| + outLen = output.length();
|
| +
|
| + // Setup default cursor position (for cursorPos within output)
|
| + newStart = cursorPos;
|
| + }
|
| +
|
| + // Complex (nested replacers) Processing Code :
|
| + else {
|
| + /* When there are segments to be copied, use the Replaceable.copy()
|
| + * API in order to retain out-of-band data. Copy everything to the
|
| + * end of the string, then copy them back over the key. This preserves
|
| + * the integrity of indices into the key and surrounding context while
|
| + * generating the output text.
|
| + */
|
| + UnicodeString buf;
|
| + int32_t oOutput; // offset into 'output'
|
| + isComplex = FALSE;
|
| +
|
| + // The temporary buffer starts at tempStart, and extends
|
| + // to destLimit. The start of the buffer has a single
|
| + // character from before the key. This provides style
|
| + // data when addition characters are filled into the
|
| + // temporary buffer. If there is nothing to the left, use
|
| + // the non-character U+FFFF, which Replaceable subclasses
|
| + // should treat specially as a "no-style character."
|
| + // destStart points to the point after the style context
|
| + // character, so it is tempStart+1 or tempStart+2.
|
| + int32_t tempStart = text.length(); // start of temp buffer
|
| + int32_t destStart = tempStart; // copy new text to here
|
| + if (start > 0) {
|
| + int32_t len = UTF_CHAR_LENGTH(text.char32At(start-1));
|
| + text.copy(start-len, start, tempStart);
|
| + destStart += len;
|
| + } else {
|
| + UnicodeString str((UChar) 0xFFFF);
|
| + text.handleReplaceBetween(tempStart, tempStart, str);
|
| + destStart++;
|
| + }
|
| + int32_t destLimit = destStart;
|
| +
|
| + for (oOutput=0; oOutput<output.length(); ) {
|
| + if (oOutput == cursorPos) {
|
| + // Record the position of the cursor
|
| + newStart = destLimit - destStart; // relative to start
|
| + }
|
| + UChar32 c = output.char32At(oOutput);
|
| + UnicodeReplacer* r = data->lookupReplacer(c);
|
| + if (r == NULL) {
|
| + // Accumulate straight (non-segment) text.
|
| + buf.append(c);
|
| + } else {
|
| + isComplex = TRUE;
|
| +
|
| + // Insert any accumulated straight text.
|
| + if (buf.length() > 0) {
|
| + text.handleReplaceBetween(destLimit, destLimit, buf);
|
| + destLimit += buf.length();
|
| + buf.truncate(0);
|
| + }
|
| +
|
| + // Delegate output generation to replacer object
|
| + int32_t len = r->replace(text, destLimit, destLimit, cursor);
|
| + destLimit += len;
|
| + }
|
| + oOutput += UTF_CHAR_LENGTH(c);
|
| + }
|
| + // Insert any accumulated straight text.
|
| + if (buf.length() > 0) {
|
| + text.handleReplaceBetween(destLimit, destLimit, buf);
|
| + destLimit += buf.length();
|
| + }
|
| + if (oOutput == cursorPos) {
|
| + // Record the position of the cursor
|
| + newStart = destLimit - destStart; // relative to start
|
| + }
|
| +
|
| + outLen = destLimit - destStart;
|
| +
|
| + // Copy new text to start, and delete it
|
| + text.copy(destStart, destLimit, start);
|
| + text.handleReplaceBetween(tempStart + outLen, destLimit + outLen, EMPTY);
|
| +
|
| + // Delete the old text (the key)
|
| + text.handleReplaceBetween(start + outLen, limit + outLen, EMPTY);
|
| + }
|
| +
|
| + if (hasCursor) {
|
| + // Adjust the cursor for positions outside the key. These
|
| + // refer to code points rather than code units. If cursorPos
|
| + // is within the output string, then use newStart, which has
|
| + // already been set above.
|
| + if (cursorPos < 0) {
|
| + newStart = start;
|
| + int32_t n = cursorPos;
|
| + // Outside the output string, cursorPos counts code points
|
| + while (n < 0 && newStart > 0) {
|
| + newStart -= UTF_CHAR_LENGTH(text.char32At(newStart-1));
|
| + ++n;
|
| + }
|
| + newStart += n;
|
| + } else if (cursorPos > output.length()) {
|
| + newStart = start + outLen;
|
| + int32_t n = cursorPos - output.length();
|
| + // Outside the output string, cursorPos counts code points
|
| + while (n > 0 && newStart < text.length()) {
|
| + newStart += UTF_CHAR_LENGTH(text.char32At(newStart));
|
| + --n;
|
| + }
|
| + newStart += n;
|
| + } else {
|
| + // Cursor is within output string. It has been set up above
|
| + // to be relative to start.
|
| + newStart += start;
|
| + }
|
| +
|
| + cursor = newStart;
|
| + }
|
| +
|
| + return outLen;
|
| +}
|
| +
|
| +/**
|
| + * UnicodeReplacer API
|
| + */
|
| +UnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule,
|
| + UBool escapeUnprintable) const {
|
| + rule.truncate(0);
|
| + UnicodeString quoteBuf;
|
| +
|
| + int32_t cursor = cursorPos;
|
| +
|
| + // Handle a cursor preceding the output
|
| + if (hasCursor && cursor < 0) {
|
| + while (cursor++ < 0) {
|
| + ICU_Utility::appendToRule(rule, (UChar)0x0040 /*@*/, TRUE, escapeUnprintable, quoteBuf);
|
| + }
|
| + // Fall through and append '|' below
|
| + }
|
| +
|
| + for (int32_t i=0; i<output.length(); ++i) {
|
| + if (hasCursor && i == cursor) {
|
| + ICU_Utility::appendToRule(rule, (UChar)0x007C /*|*/, TRUE, escapeUnprintable, quoteBuf);
|
| + }
|
| + UChar c = output.charAt(i); // Ok to use 16-bits here
|
| +
|
| + UnicodeReplacer* r = data->lookupReplacer(c);
|
| + if (r == NULL) {
|
| + ICU_Utility::appendToRule(rule, c, FALSE, escapeUnprintable, quoteBuf);
|
| + } else {
|
| + UnicodeString buf;
|
| + r->toReplacerPattern(buf, escapeUnprintable);
|
| + buf.insert(0, (UChar)0x20);
|
| + buf.append((UChar)0x20);
|
| + ICU_Utility::appendToRule(rule, buf,
|
| + TRUE, escapeUnprintable, quoteBuf);
|
| + }
|
| + }
|
| +
|
| + // Handle a cursor after the output. Use > rather than >= because
|
| + // if cursor == output.length() it is at the end of the output,
|
| + // which is the default position, so we need not emit it.
|
| + if (hasCursor && cursor > output.length()) {
|
| + cursor -= output.length();
|
| + while (cursor-- > 0) {
|
| + ICU_Utility::appendToRule(rule, (UChar)0x0040 /*@*/, TRUE, escapeUnprintable, quoteBuf);
|
| + }
|
| + ICU_Utility::appendToRule(rule, (UChar)0x007C /*|*/, TRUE, escapeUnprintable, quoteBuf);
|
| + }
|
| + // Flush quoteBuf out to result
|
| + ICU_Utility::appendToRule(rule, -1,
|
| + TRUE, escapeUnprintable, quoteBuf);
|
| +
|
| + return rule;
|
| +}
|
| +
|
| +/**
|
| + * Implement UnicodeReplacer
|
| + */
|
| +void StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
|
| + UChar32 ch;
|
| + for (int32_t i=0; i<output.length(); i+=UTF_CHAR_LENGTH(ch)) {
|
| + ch = output.char32At(i);
|
| + UnicodeReplacer* r = data->lookupReplacer(ch);
|
| + if (r == NULL) {
|
| + toUnionTo.add(ch);
|
| + } else {
|
| + r->addReplacementSetTo(toUnionTo);
|
| + }
|
| + }
|
| +}
|
| +
|
| +/**
|
| + * UnicodeFunctor API
|
| + */
|
| +void StringReplacer::setData(const TransliterationRuleData* d) {
|
| + data = d;
|
| + int32_t i = 0;
|
| + while (i<output.length()) {
|
| + UChar32 c = output.char32At(i);
|
| + UnicodeFunctor* f = data->lookup(c);
|
| + if (f != NULL) {
|
| + f->setData(data);
|
| + }
|
| + i += UTF_CHAR_LENGTH(c);
|
| + }
|
| +}
|
| +
|
| +U_NAMESPACE_END
|
| +
|
| +#endif /* #if !UCONFIG_NO_TRANSLITERATION */
|
| +
|
| +//eof
|
|
|
| Property changes on: icu46/source/i18n/strrepl.cpp
|
| ___________________________________________________________________
|
| Added: svn:eol-style
|
| + LF
|
|
|
|
|