source/i18n/affixpatternparser.cpp - Issue 1621843002: ICU 56 update step 1

Unified Diff: source/i18n/affixpatternparser.cpp

Issue 1621843002: ICU 56 update step 1 (Closed) Base URL: https://chromium.googlesource.com/chromium/deps/icu.git@561

Patch Set: Created 4 years, 11 months ago

Use n/p to move between diff chunks; N/P to move between comments. Draft comments are only viewable by you.

Jump to:

View side-by-side diff with in-line comments

Download patch

Index: source/i18n/affixpatternparser.cpp

diff --git a/source/i18n/affixpatternparser.cpp b/source/i18n/affixpatternparser.cpp

new file mode 100644

index 0000000000000000000000000000000000000000..41eadef2b3a88a8e22f4b3faa310ce175b1891cc

--- /dev/null

+++ b/source/i18n/affixpatternparser.cpp

@@ -0,0 +1,692 @@

+/*

+ *

+ * file name: affixpatternparser.cpp

+ */

+#include "unicode/utypes.h"

+#if !UCONFIG_NO_FORMATTING

+#include "unicode/dcfmtsym.h"

+#include "unicode/plurrule.h"

+#include "unicode/ucurr.h"

+#include "affixpatternparser.h"

+#include "charstr.h"

+#include "precision.h"

+#include "uassert.h"

+#include "unistrappender.h"

+ static UChar gDefaultSymbols[] = {0xa4, 0xa4, 0xa4};

+static UChar gPercent = 0x25;

+static UChar gPerMill = 0x2030;

+static UChar gNegative = 0x2D;

+static UChar gPositive = 0x2B;

+#define PACK_TOKEN_AND_LENGTH(t, l) ((UChar) (((t) << 8) | (l & 0xFF)))

+#define UNPACK_TOKEN(c) ((AffixPattern::ETokenType) (((c) >> 8) & 0x7F))

+#define UNPACK_LONG(c) (((c) >> 8) & 0x80)

+#define UNPACK_LENGTH(c) ((c) & 0xFF)

+U_NAMESPACE_BEGIN

+static int32_t

+nextToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) {

+ if (buffer[idx] != 0x27 || idx + 1 == len) {

+ *token = buffer[idx];

+ return 1;

+ }

+ *token = buffer[idx + 1];

+ if (buffer[idx + 1] == 0xA4) {

+ int32_t i = 2;

+ for (; idx + i < len && i < 4 && buffer[idx + i] == buffer[idx + 1]; ++i);

+ return i;

+ }

+ return 2;

+static int32_t

+nextUserToken(const UChar *buffer, int32_t idx, int32_t len, UChar *token) {

+ *token = buffer[idx];

+ int32_t max;

+ switch (buffer[idx]) {

+ case 0x27:

+ max = 2;

+ break;

+ case 0xA4:

+ max = 3;

+ break;

+ default:

+ max = 1;

+ break;

+ }

+ int32_t i = 1;

+ for (; idx + i < len && i < max && buffer[idx + i] == buffer[idx]; ++i);

+ return i;

+CurrencyAffixInfo::CurrencyAffixInfo()

+ : fSymbol(gDefaultSymbols, 1),

+ fISO(gDefaultSymbols, 2),

+ fLong(DigitAffix(gDefaultSymbols, 3)),

+ fIsDefault(TRUE) {

+void

+CurrencyAffixInfo::set(

+ const char *locale,

+ const PluralRules *rules,

+ const UChar *currency,

+ UErrorCode &status) {

+ if (U_FAILURE(status)) {

+ return;

+ }

+ fIsDefault = FALSE;

+ if (currency == NULL) {

+ fSymbol.setTo(gDefaultSymbols, 1);

+ fISO.setTo(gDefaultSymbols, 2);

+ fLong.remove();

+ fLong.append(gDefaultSymbols, 3);

+ fIsDefault = TRUE;

+ return;

+ }

+ int32_t len;

+ UBool unusedIsChoice;

+ const UChar *symbol = ucurr_getName(

+ currency, locale, UCURR_SYMBOL_NAME, &unusedIsChoice,

+ &len, &status);

+ if (U_FAILURE(status)) {

+ return;

+ }

+ fSymbol.setTo(symbol, len);

+ fISO.setTo(currency, u_strlen(currency));

+ fLong.remove();

+ StringEnumeration* keywords = rules->getKeywords(status);

+ if (U_FAILURE(status)) {

+ return;

+ }

+ const UnicodeString* pluralCount;

+ while ((pluralCount = keywords->snext(status)) != NULL) {

+ CharString pCount;

+ pCount.appendInvariantChars(*pluralCount, status);

+ const UChar *pluralName = ucurr_getPluralName(

+ currency, locale, &unusedIsChoice, pCount.data(),

+ &len, &status);

+ fLong.setVariant(pCount.data(), UnicodeString(pluralName, len), status);

+ }

+ delete keywords;

+void

+CurrencyAffixInfo::adjustPrecision(

+ const UChar *currency, const UCurrencyUsage usage,

+ FixedPrecision &precision, UErrorCode &status) {

+ if (U_FAILURE(status)) {

+ return;

+ }

+ int32_t digitCount = ucurr_getDefaultFractionDigitsForUsage(

+ currency, usage, &status);

+ precision.fMin.setFracDigitCount(digitCount);

+ precision.fMax.setFracDigitCount(digitCount);

+ double increment = ucurr_getRoundingIncrementForUsage(

+ currency, usage, &status);

+ if (increment == 0.0) {

+ precision.fRoundingIncrement.clear();

+ } else {

+ precision.fRoundingIncrement.set(increment);

+ // guard against round-off error

+ precision.fRoundingIncrement.round(6);

+ }

+void

+AffixPattern::addLiteral(

+ const UChar *literal, int32_t start, int32_t len) {

+ char32Count += u_countChar32(literal + start, len);

+ literals.append(literal, start, len);

+ int32_t tlen = tokens.length();

+ // Takes 4 UChars to encode maximum literal length.

+ UChar *tokenChars = tokens.getBuffer(tlen + 4);

+ // find start of literal size. May be tlen if there is no literal.

+ // While finding start of literal size, compute literal length

+ int32_t literalLength = 0;

+ int32_t tLiteralStart = tlen;

+ while (tLiteralStart > 0 && UNPACK_TOKEN(tokenChars[tLiteralStart - 1]) == kLiteral) {

+ tLiteralStart--;

+ literalLength <<= 8;

+ literalLength |= UNPACK_LENGTH(tokenChars[tLiteralStart]);

+ }

+ // Add number of chars we just added to literal

+ literalLength += len;

+ // Now encode the new length starting at tLiteralStart

+ tlen = tLiteralStart;

+ tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral, literalLength & 0xFF);

+ literalLength >>= 8;

+ while (literalLength) {

+ tokenChars[tlen++] = PACK_TOKEN_AND_LENGTH(kLiteral | 0x80, literalLength & 0xFF);

+ literalLength >>= 8;

+ }

+ tokens.releaseBuffer(tlen);

+void

+AffixPattern::add(ETokenType t) {

+ add(t, 1);

+void

+AffixPattern::addCurrency(uint8_t count) {

+ add(kCurrency, count);

+void

+AffixPattern::add(ETokenType t, uint8_t count) {

+ U_ASSERT(t != kLiteral);

+ char32Count += count;

+ switch (t) {

+ case kCurrency:

+ hasCurrencyToken = TRUE;

+ break;

+ case kPercent:

+ hasPercentToken = TRUE;

+ break;

+ case kPerMill:

+ hasPermillToken = TRUE;

+ break;

+ default:

+ // Do nothing

+ break;

+ }

+ tokens.append(PACK_TOKEN_AND_LENGTH(t, count));

+AffixPattern &

+AffixPattern::append(const AffixPattern &other) {

+ AffixPatternIterator iter;

+ other.iterator(iter);

+ UnicodeString literal;

+ while (iter.nextToken()) {

+ switch (iter.getTokenType()) {

+ case kLiteral:

+ iter.getLiteral(literal);

+ addLiteral(literal.getBuffer(), 0, literal.length());

+ break;

+ case kCurrency:

+ addCurrency(iter.getTokenLength());

+ break;

+ default:

+ add(iter.getTokenType());

+ break;

+ }

+ return *this;

+void

+AffixPattern::remove() {

+ tokens.remove();

+ literals.remove();

+ hasCurrencyToken = FALSE;

+ hasPercentToken = FALSE;

+ hasPermillToken = FALSE;

+ char32Count = 0;

+// escapes literals for strings where special characters are NOT escaped

+// except for apostrophe.

+static void escapeApostropheInLiteral(

+ const UnicodeString &literal, UnicodeStringAppender &appender) {

+ int32_t len = literal.length();

+ const UChar *buffer = literal.getBuffer();

+ for (int32_t i = 0; i < len; ++i) {

+ UChar ch = buffer[i];

+ switch (ch) {

+ case 0x27:

+ appender.append((UChar) 0x27);

+ break;

+ default:

+ appender.append(ch);

+ break;

+ }

+// escapes literals for user strings where special characters in literals

+// are escaped with apostrophe.

+static void escapeLiteral(

+ const UnicodeString &literal, UnicodeStringAppender &appender) {

+ int32_t len = literal.length();

+ const UChar *buffer = literal.getBuffer();

+ for (int32_t i = 0; i < len; ++i) {

+ UChar ch = buffer[i];

+ switch (ch) {

+ case 0x27:

+ appender.append((UChar) 0x27);

+ break;

+ case 0x25:

+ appender.append((UChar) 0x27);

+ appender.append((UChar) 0x25);

+ appender.append((UChar) 0x27);

+ break;

+ case 0x2030:

+ appender.append((UChar) 0x27);

+ appender.append((UChar) 0x2030);

+ appender.append((UChar) 0x27);

+ break;

+ case 0xA4:

+ appender.append((UChar) 0x27);

+ appender.append((UChar) 0xA4);

+ appender.append((UChar) 0x27);

+ break;

+ case 0x2D:

+ appender.append((UChar) 0x27);

+ appender.append((UChar) 0x2D);

+ appender.append((UChar) 0x27);

+ break;

+ case 0x2B:

+ appender.append((UChar) 0x27);

+ appender.append((UChar) 0x2B);

+ appender.append((UChar) 0x27);

+ break;

+ default:

+ appender.append(ch);

+ break;

+ }

+UnicodeString &

+AffixPattern::toString(UnicodeString &appendTo) const {

+ AffixPatternIterator iter;

+ iterator(iter);

+ UnicodeStringAppender appender(appendTo);

+ UnicodeString literal;

+ while (iter.nextToken()) {

+ switch (iter.getTokenType()) {

+ case kLiteral:

+ escapeApostropheInLiteral(iter.getLiteral(literal), appender);

+ break;

+ case kPercent:

+ appender.append((UChar) 0x27);

+ appender.append((UChar) 0x25);

+ break;

+ case kPerMill:

+ appender.append((UChar) 0x27);

+ appender.append((UChar) 0x2030);

+ break;

+ case kCurrency:

+ {

+ appender.append((UChar) 0x27);

+ int32_t cl = iter.getTokenLength();

+ for (int32_t i = 0; i < cl; ++i) {

+ appender.append((UChar) 0xA4);

+ }

+ break;

+ case kNegative:

+ appender.append((UChar) 0x27);

+ appender.append((UChar) 0x2D);

+ break;

+ case kPositive:

+ appender.append((UChar) 0x27);

+ appender.append((UChar) 0x2B);

+ break;

+ default:

+ U_ASSERT(FALSE);

+ break;

+ }

+ return appendTo;

+UnicodeString &

+AffixPattern::toUserString(UnicodeString &appendTo) const {

+ AffixPatternIterator iter;

+ iterator(iter);

+ UnicodeStringAppender appender(appendTo);

+ UnicodeString literal;

+ while (iter.nextToken()) {

+ switch (iter.getTokenType()) {

+ case kLiteral:

+ escapeLiteral(iter.getLiteral(literal), appender);

+ break;

+ case kPercent:

+ appender.append((UChar) 0x25);

+ break;

+ case kPerMill:

+ appender.append((UChar) 0x2030);

+ break;

+ case kCurrency:

+ {

+ int32_t cl = iter.getTokenLength();

+ for (int32_t i = 0; i < cl; ++i) {

+ appender.append((UChar) 0xA4);

+ }

+ break;

+ case kNegative:

+ appender.append((UChar) 0x2D);

+ break;

+ case kPositive:

+ appender.append((UChar) 0x2B);

+ break;

+ default:

+ U_ASSERT(FALSE);

+ break;

+ }

+ return appendTo;

+class AffixPatternAppender : public UMemory {

+public:

+ AffixPatternAppender(AffixPattern &dest) : fDest(&dest), fIdx(0) { }

+ inline void append(UChar x) {

+ if (fIdx == UPRV_LENGTHOF(fBuffer)) {

+ fDest->addLiteral(fBuffer, 0, fIdx);

+ fIdx = 0;

+ }

+ fBuffer[fIdx++] = x;

+ }

+ inline void append(UChar32 x) {

+ if (fIdx >= UPRV_LENGTHOF(fBuffer) - 1) {

+ fDest->addLiteral(fBuffer, 0, fIdx);

+ fIdx = 0;

+ }

+ U16_APPEND_UNSAFE(fBuffer, fIdx, x);

+ }

+ inline void flush() {

+ if (fIdx) {

+ fDest->addLiteral(fBuffer, 0, fIdx);

+ }

+ fIdx = 0;

+ }

+ /**

+ * flush the buffer when we go out of scope.

+ */

+ ~AffixPatternAppender() {

+ flush();

+ }

+private:

+ AffixPattern *fDest;

+ int32_t fIdx;

+ UChar fBuffer[32];

+ AffixPatternAppender(const AffixPatternAppender &other);

+ AffixPatternAppender &operator=(const AffixPatternAppender &other);

+};

+AffixPattern &

+AffixPattern::parseUserAffixString(

+ const UnicodeString &affixStr,

+ AffixPattern &appendTo,

+ UErrorCode &status) {

+ if (U_FAILURE(status)) {

+ return appendTo;

+ }

+ int32_t len = affixStr.length();

+ const UChar *buffer = affixStr.getBuffer();

+ // 0 = not quoted; 1 = quoted.

+ int32_t state = 0;

+ AffixPatternAppender appender(appendTo);

+ for (int32_t i = 0; i < len; ) {

+ UChar token;

+ int32_t tokenSize = nextUserToken(buffer, i, len, &token);

+ i += tokenSize;

+ if (token == 0x27 && tokenSize == 1) { // quote

+ state = 1 - state;

+ continue;

+ }

+ if (state == 0) {

+ switch (token) {

+ case 0x25:

+ appender.flush();

+ appendTo.add(kPercent, 1);

+ break;

+ case 0x27: // double quote

+ appender.append((UChar) 0x27);

+ break;

+ case 0x2030:

+ appender.flush();

+ appendTo.add(kPerMill, 1);

+ break;

+ case 0x2D:

+ appender.flush();

+ appendTo.add(kNegative, 1);

+ break;

+ case 0x2B:

+ appender.flush();

+ appendTo.add(kPositive, 1);

+ break;

+ case 0xA4:

+ appender.flush();

+ appendTo.add(kCurrency, tokenSize);

+ break;

+ default:

+ appender.append(token);

+ break;

+ }

+ } else {

+ switch (token) {

+ case 0x27: // double quote

+ appender.append((UChar) 0x27);

+ break;

+ case 0xA4: // included b/c tokenSize can be > 1

+ for (int32_t j = 0; j < tokenSize; ++j) {

+ appender.append((UChar) 0xA4);

+ }

+ break;

+ default:

+ appender.append(token);

+ break;

+ }

+ return appendTo;

+AffixPattern &

+AffixPattern::parseAffixString(

+ const UnicodeString &affixStr,

+ AffixPattern &appendTo,

+ UErrorCode &status) {

+ if (U_FAILURE(status)) {

+ return appendTo;

+ }

+ int32_t len = affixStr.length();

+ const UChar *buffer = affixStr.getBuffer();

+ for (int32_t i = 0; i < len; ) {

+ UChar token;

+ int32_t tokenSize = nextToken(buffer, i, len, &token);

+ if (tokenSize == 1) {

+ int32_t literalStart = i;

+ ++i;

+ while (i < len && (tokenSize = nextToken(buffer, i, len, &token)) == 1) {

+ ++i;

+ }

+ appendTo.addLiteral(buffer, literalStart, i - literalStart);

+ // If we reached end of string, we are done

+ if (i == len) {

+ return appendTo;

+ }

+ i += tokenSize;

+ switch (token) {

+ case 0x25:

+ appendTo.add(kPercent, 1);

+ break;

+ case 0x2030:

+ appendTo.add(kPerMill, 1);

+ break;

+ case 0x2D:

+ appendTo.add(kNegative, 1);

+ break;

+ case 0x2B:

+ appendTo.add(kPositive, 1);

+ break;

+ case 0xA4:

+ {

+ if (tokenSize - 1 > 3) {

+ status = U_PARSE_ERROR;

+ return appendTo;

+ }

+ appendTo.add(kCurrency, tokenSize - 1);

+ }

+ break;

+ default:

+ appendTo.addLiteral(&token, 0, 1);

+ break;

+ }

+ return appendTo;

+AffixPatternIterator &

+AffixPattern::iterator(AffixPatternIterator &result) const {

+ result.nextLiteralIndex = 0;

+ result.lastLiteralLength = 0;

+ result.nextTokenIndex = 0;

+ result.tokens = &tokens;

+ result.literals = &literals;

+ return result;

+UBool

+AffixPatternIterator::nextToken() {

+ int32_t tlen = tokens->length();

+ if (nextTokenIndex == tlen) {

+ return FALSE;

+ }

+ ++nextTokenIndex;

+ const UChar *tokenBuffer = tokens->getBuffer();

+ if (UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]) ==

+ AffixPattern::kLiteral) {

+ while (nextTokenIndex < tlen &&

+ UNPACK_LONG(tokenBuffer[nextTokenIndex])) {

+ ++nextTokenIndex;

+ }

+ lastLiteralLength = 0;

+ int32_t i = nextTokenIndex - 1;

+ for (; UNPACK_LONG(tokenBuffer[i]); --i) {

+ lastLiteralLength <<= 8;

+ lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]);

+ }

+ lastLiteralLength <<= 8;

+ lastLiteralLength |= UNPACK_LENGTH(tokenBuffer[i]);

+ nextLiteralIndex += lastLiteralLength;

+ }

+ return TRUE;

+AffixPattern::ETokenType

+AffixPatternIterator::getTokenType() const {

+ return UNPACK_TOKEN(tokens->charAt(nextTokenIndex - 1));

+UnicodeString &

+AffixPatternIterator::getLiteral(UnicodeString &result) const {

+ const UChar *buffer = literals->getBuffer();

+ result.setTo(buffer + (nextLiteralIndex - lastLiteralLength), lastLiteralLength);

+ return result;

+int32_t

+AffixPatternIterator::getTokenLength() const {

+ const UChar *tokenBuffer = tokens->getBuffer();

+ AffixPattern::ETokenType type = UNPACK_TOKEN(tokenBuffer[nextTokenIndex - 1]);

+ return type == AffixPattern::kLiteral ? lastLiteralLength : UNPACK_LENGTH(tokenBuffer[nextTokenIndex - 1]);

+AffixPatternParser::AffixPatternParser()

+ : fPercent(gPercent), fPermill(gPerMill), fNegative(gNegative), fPositive(gPositive) {

+AffixPatternParser::AffixPatternParser(

+ const DecimalFormatSymbols &symbols) {

+ setDecimalFormatSymbols(symbols);

+void

+AffixPatternParser::setDecimalFormatSymbols(

+ const DecimalFormatSymbols &symbols) {

+ fPercent = symbols.getConstSymbol(DecimalFormatSymbols::kPercentSymbol);

+ fPermill = symbols.getConstSymbol(DecimalFormatSymbols::kPerMillSymbol);

+ fNegative = symbols.getConstSymbol(DecimalFormatSymbols::kMinusSignSymbol);

+ fPositive = symbols.getConstSymbol(DecimalFormatSymbols::kPlusSignSymbol);

+PluralAffix &

+AffixPatternParser::parse(

+ const AffixPattern &affixPattern,

+ const CurrencyAffixInfo &currencyAffixInfo,

+ PluralAffix &appendTo,

+ UErrorCode &status) const {

+ if (U_FAILURE(status)) {

+ return appendTo;

+ }

+ AffixPatternIterator iter;

+ affixPattern.iterator(iter);

+ UnicodeString literal;

+ while (iter.nextToken()) {

+ switch (iter.getTokenType()) {

+ case AffixPattern::kPercent:

+ appendTo.append(fPercent, UNUM_PERCENT_FIELD);

+ break;

+ case AffixPattern::kPerMill:

+ appendTo.append(fPermill, UNUM_PERMILL_FIELD);

+ break;

+ case AffixPattern::kNegative:

+ appendTo.append(fNegative, UNUM_SIGN_FIELD);

+ break;

+ case AffixPattern::kPositive:

+ appendTo.append(fPositive, UNUM_SIGN_FIELD);

+ break;

+ case AffixPattern::kCurrency:

+ switch (iter.getTokenLength()) {

+ case 1:

+ appendTo.append(

+ currencyAffixInfo.getSymbol(), UNUM_CURRENCY_FIELD);

+ break;

+ case 2:

+ appendTo.append(

+ currencyAffixInfo.getISO(), UNUM_CURRENCY_FIELD);

+ break;

+ case 3:

+ appendTo.append(

+ currencyAffixInfo.getLong(), UNUM_CURRENCY_FIELD, status);

+ break;

+ default:

+ U_ASSERT(FALSE);

+ break;

+ }

+ break;

+ case AffixPattern::kLiteral:

+ appendTo.append(iter.getLiteral(literal));

+ break;

+ default:

+ U_ASSERT(FALSE);

+ break;

+ }

+ return appendTo;

+U_NAMESPACE_END

+#endif /* #if !UCONFIG_NO_FORMATTING */

« no previous file with comments | « source/i18n/affixpatternparser.h ('k') | source/i18n/alphaindex.cpp » ('j') | no next file with comments »